summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common')
-rw-r--r--thirdparty/icu4c/common/bytestriebuilder.cpp26
-rw-r--r--thirdparty/icu4c/common/charstr.cpp34
-rw-r--r--thirdparty/icu4c/common/charstr.h3
-rw-r--r--thirdparty/icu4c/common/cmemory.h55
-rw-r--r--thirdparty/icu4c/common/dictbe.cpp32
-rw-r--r--thirdparty/icu4c/common/edits.cpp1
-rw-r--r--thirdparty/icu4c/common/filteredbrk.cpp93
-rw-r--r--thirdparty/icu4c/common/hash.h19
-rw-r--r--thirdparty/icu4c/common/localematcher.cpp12
-rw-r--r--thirdparty/icu4c/common/localeprioritylist.cpp11
-rw-r--r--thirdparty/icu4c/common/locdispnames.cpp2
-rw-r--r--thirdparty/icu4c/common/locid.cpp244
-rw-r--r--thirdparty/icu4c/common/loclikelysubtags.cpp3
-rw-r--r--thirdparty/icu4c/common/norm2allmodes.h82
-rw-r--r--thirdparty/icu4c/common/normalizer2impl.cpp152
-rw-r--r--thirdparty/icu4c/common/normalizer2impl.h11
-rw-r--r--thirdparty/icu4c/common/pluralmap.h2
-rw-r--r--thirdparty/icu4c/common/putil.cpp4
-rw-r--r--thirdparty/icu4c/common/putilimp.h2
-rw-r--r--thirdparty/icu4c/common/rbbi.cpp2
-rw-r--r--thirdparty/icu4c/common/rbbi_cache.cpp2
-rw-r--r--thirdparty/icu4c/common/rbbiscan.cpp6
-rw-r--r--thirdparty/icu4c/common/rbbitblb.cpp2
-rw-r--r--thirdparty/icu4c/common/resource.h6
-rw-r--r--thirdparty/icu4c/common/restrace.cpp3
-rw-r--r--thirdparty/icu4c/common/servnotf.h4
-rw-r--r--thirdparty/icu4c/common/ubrk.cpp12
-rw-r--r--thirdparty/icu4c/common/ucase.cpp2
-rw-r--r--thirdparty/icu4c/common/uchar.cpp38
-rw-r--r--thirdparty/icu4c/common/ucnv2022.cpp6
-rw-r--r--thirdparty/icu4c/common/ucnv_bld.cpp2
-rw-r--r--thirdparty/icu4c/common/ucnv_err.cpp2
-rw-r--r--thirdparty/icu4c/common/ucnv_lmb.cpp4
-rw-r--r--thirdparty/icu4c/common/ucnv_u7.cpp2
-rw-r--r--thirdparty/icu4c/common/ucnvisci.cpp6
-rw-r--r--thirdparty/icu4c/common/ucurr.cpp4
-rw-r--r--thirdparty/icu4c/common/uhash.cpp81
-rw-r--r--thirdparty/icu4c/common/uhash.h95
-rw-r--r--thirdparty/icu4c/common/uloc.cpp42
-rw-r--r--thirdparty/icu4c/common/uloc_keytype.cpp2
-rw-r--r--thirdparty/icu4c/common/uloc_tag.cpp41
-rw-r--r--thirdparty/icu4c/common/ulocimp.h3
-rw-r--r--thirdparty/icu4c/common/unicode/bytestream.h6
-rw-r--r--thirdparty/icu4c/common/unicode/bytestrie.h3
-rw-r--r--thirdparty/icu4c/common/unicode/bytestriebuilder.h5
-rw-r--r--thirdparty/icu4c/common/unicode/docmain.h8
-rw-r--r--thirdparty/icu4c/common/unicode/icuplug.h7
-rw-r--r--thirdparty/icu4c/common/unicode/localematcher.h16
-rw-r--r--thirdparty/icu4c/common/unicode/locid.h4
-rw-r--r--thirdparty/icu4c/common/unicode/normalizer2.h28
-rw-r--r--thirdparty/icu4c/common/unicode/platform.h2
-rw-r--r--thirdparty/icu4c/common/unicode/stringpiece.h26
-rw-r--r--thirdparty/icu4c/common/unicode/ubrk.h21
-rw-r--r--thirdparty/icu4c/common/unicode/ucnv.h4
-rw-r--r--thirdparty/icu4c/common/unicode/ucnvsel.h4
-rw-r--r--thirdparty/icu4c/common/unicode/unifilt.h4
-rw-r--r--thirdparty/icu4c/common/unicode/uniset.h66
-rw-r--r--thirdparty/icu4c/common/unicode/unistr.h8
-rw-r--r--thirdparty/icu4c/common/unicode/urename.h16
-rw-r--r--thirdparty/icu4c/common/unicode/uset.h95
-rw-r--r--thirdparty/icu4c/common/unicode/ushape.h2
-rw-r--r--thirdparty/icu4c/common/unicode/utrace.h17
-rw-r--r--thirdparty/icu4c/common/unicode/uvernum.h12
-rw-r--r--thirdparty/icu4c/common/uniset.cpp128
-rw-r--r--thirdparty/icu4c/common/uniset_props.cpp116
-rw-r--r--thirdparty/icu4c/common/unisetspan.cpp25
-rw-r--r--thirdparty/icu4c/common/uprops.h52
-rw-r--r--thirdparty/icu4c/common/uresbund.cpp23
-rw-r--r--thirdparty/icu4c/common/uresdata.cpp8
-rw-r--r--thirdparty/icu4c/common/uresimp.h4
-rw-r--r--thirdparty/icu4c/common/uset.cpp35
-rw-r--r--thirdparty/icu4c/common/usprep.cpp2
-rw-r--r--thirdparty/icu4c/common/ustr_wcs.cpp4
-rw-r--r--thirdparty/icu4c/common/utext.cpp6
-rw-r--r--thirdparty/icu4c/common/util.h6
-rw-r--r--thirdparty/icu4c/common/utracimp.h2
-rw-r--r--thirdparty/icu4c/common/uvector.cpp4
-rw-r--r--thirdparty/icu4c/common/wintz.cpp24
78 files changed, 1385 insertions, 563 deletions
diff --git a/thirdparty/icu4c/common/bytestriebuilder.cpp b/thirdparty/icu4c/common/bytestriebuilder.cpp
index ec1ab7d8f5..28256f272a 100644
--- a/thirdparty/icu4c/common/bytestriebuilder.cpp
+++ b/thirdparty/icu4c/common/bytestriebuilder.cpp
@@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
+ } else {
+ char intBytes[5];
+ return write(intBytes, internalEncodeDelta(i, intBytes));
}
- char intBytes[5];
- int32_t length;
+}
+
+int32_t
+BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
+ U_ASSERT(i>=0);
+ if(i<=BytesTrie::kMaxOneByteDelta) {
+ intBytes[0]=(char)i;
+ return 1;
+ }
+ int32_t length=1;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
- length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
- length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
- length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
- length=4;
+ length=2;
}
- intBytes[1]=(char)(i>>16);
+ intBytes[length++]=(char)(i>>16);
}
- intBytes[1]=(char)(i>>8);
+ intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
- return write(intBytes, length);
+ return length;
}
U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/charstr.cpp b/thirdparty/icu4c/common/charstr.cpp
index 318a185b3f..c35622882c 100644
--- a/thirdparty/icu4c/common/charstr.cpp
+++ b/thirdparty/icu4c/common/charstr.cpp
@@ -14,6 +14,8 @@
* created by: Markus W. Scherer
*/
+#include <cstdlib>
+
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "charstr.h"
@@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this;
}
+CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
+ if (number < 0) {
+ this->append('-', status);
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ }
+
+ if (number == 0) {
+ this->append('0', status);
+ return *this;
+ }
+
+ int32_t numLen = 0;
+ while (number != 0) {
+ int32_t residue = number % 10;
+ number /= 10;
+ this->append(std::abs(residue) + '0', status);
+ numLen++;
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ }
+
+ int32_t start = this->length() - numLen, end = this->length() - 1;
+ while(start < end) {
+ std::swap(this->data()[start++], this->data()[end--]);
+ }
+
+ return *this;
+}
+
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
diff --git a/thirdparty/icu4c/common/charstr.h b/thirdparty/icu4c/common/charstr.h
index 6619faac61..175acd1c0a 100644
--- a/thirdparty/icu4c/common/charstr.h
+++ b/thirdparty/icu4c/common/charstr.h
@@ -127,6 +127,9 @@ public:
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+
+ CharString &appendNumber(int32_t number, UErrorCode &status);
+
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
diff --git a/thirdparty/icu4c/common/cmemory.h b/thirdparty/icu4c/common/cmemory.h
index a9d9424b4e..f03b7dcce6 100644
--- a/thirdparty/icu4c/common/cmemory.h
+++ b/thirdparty/icu4c/common/cmemory.h
@@ -31,14 +31,63 @@
#include <stddef.h>
#include <string.h>
#include "unicode/localpointer.h"
+#include "uassert.h"
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif
-
-#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
-#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
+// uprv_memcpy and uprv_memmove
+#if defined(__clang__)
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ /* Suppress warnings about addresses that will never be NULL */ \
+ _Pragma("clang diagnostic push") \
+ _Pragma("clang diagnostic ignored \"-Waddress\"") \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ _Pragma("clang diagnostic pop") \
+ U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ /* Suppress warnings about addresses that will never be NULL */ \
+ _Pragma("clang diagnostic push") \
+ _Pragma("clang diagnostic ignored \"-Waddress\"") \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ _Pragma("clang diagnostic pop") \
+ U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#elif defined(__GNUC__)
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ /* Suppress warnings about addresses that will never be NULL */ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Waddress\"") \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ _Pragma("GCC diagnostic pop") \
+ U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ /* Suppress warnings about addresses that will never be NULL */ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Waddress\"") \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ _Pragma("GCC diagnostic pop") \
+ U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#else
+#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
+ U_ASSERT(dst != NULL); \
+ U_ASSERT(src != NULL); \
+ U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
+} UPRV_BLOCK_MACRO_END
+#endif
/**
* \def UPRV_LENGTHOF
diff --git a/thirdparty/icu4c/common/dictbe.cpp b/thirdparty/icu4c/common/dictbe.cpp
index b42cdf03fa..44285755f3 100644
--- a/thirdparty/icu4c/common/dictbe.cpp
+++ b/thirdparty/icu4c/common/dictbe.cpp
@@ -265,13 +265,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
- int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%THAI_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%THAI_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@@ -503,13 +499,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
- int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%LAO_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%LAO_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@@ -699,13 +691,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
- int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@@ -908,13 +896,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
- int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
- if (wordsMatched < 2) {
- // Followed by another dictionary word; mark first word as a good candidate
- words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
- wordsMatched = 2;
- }
+ // Followed by another dictionary word; mark first word as a good candidate
+ words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
diff --git a/thirdparty/icu4c/common/edits.cpp b/thirdparty/icu4c/common/edits.cpp
index 95f0c19a72..92ca36fb5d 100644
--- a/thirdparty/icu4c/common/edits.cpp
+++ b/thirdparty/icu4c/common/edits.cpp
@@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
}
Edits &Edits::operator=(const Edits &other) {
+ if (this == &other) { return *this; } // self-assignment: no-op
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
diff --git a/thirdparty/icu4c/common/filteredbrk.cpp b/thirdparty/icu4c/common/filteredbrk.cpp
index c07128cbce..25080f9d33 100644
--- a/thirdparty/icu4c/common/filteredbrk.cpp
+++ b/thirdparty/icu4c/common/filteredbrk.cpp
@@ -20,6 +20,7 @@
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
#include "uvector.h"
#include "cmemory.h"
+#include "umutex.h"
U_NAMESPACE_BEGIN
@@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
public:
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
- SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
- SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
- virtual ~SimpleFilteredSentenceBreakData();
+ SimpleFilteredSentenceBreakData *incr() {
+ umtx_atomic_inc(&refcount);
+ return this;
+ }
+ SimpleFilteredSentenceBreakData *decr() {
+ if(umtx_atomic_dec(&refcount) <= 0) {
+ delete this;
+ }
+ return 0;
+ }
+ virtual ~SimpleFilteredSentenceBreakData();
+
+ bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
+ bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
- LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
- LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
- int32_t refcount;
+ const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
+ const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
+
+private:
+ // These tries own their data arrays.
+ // They are shared and must therefore not be modified.
+ LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
+ LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
+ u_atomic_int32_t refcount;
};
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
@@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
fDelegate(adopt)
{
- // all set..
+ if (fData == nullptr) {
+ delete forwards;
+ delete backwards;
+ if (U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
}
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
@@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t bestValue = -1;
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
- fData->fBackwardsTrie->reset();
- UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
- if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
+ if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
- uch = utext_next32(fText.getAlias());
+ utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
- UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
-
- while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
- USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
- if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
- bestPosn = utext_getNativeIndex(fText.getAlias());
- bestValue = fData->fBackwardsTrie->getValue();
- }
- //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
+ {
+ // Do not modify the shared trie!
+ UCharsTrie iter(fData->getBackwardsTrie());
+ UChar32 uch;
+ while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
+ UStringTrieResult r = iter.nextForCodePoint(uch);
+ if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
+ bestPosn = utext_getNativeIndex(fText.getAlias());
+ bestValue = iter.getValue();
+ }
+ if(!USTRINGTRIE_HAS_NEXT(r)) {
+ break;
+ }
+ //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
+ }
}
- if(USTRINGTRIE_MATCHES(r)) { // exact match?
- //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
- bestValue = fData->fBackwardsTrie->getValue();
- bestPosn = utext_getNativeIndex(fText.getAlias());
- //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
- }
+ //if(bestValue >= 0) {
+ //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
+ //}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
- //int32_t bestValue = fBackwardsTrie->getValue();
+ //int32_t bestValue = iter.getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
return kExceptionHere; // See if the next is another exception.
} else if(bestValue == kPARTIAL
- && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
+ && fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
- fData->fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
+ // Do not modify the shared trie!
+ UCharsTrie iter(fData->getForwardsPartialTrie());
+ UChar32 uch;
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
- USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
+ USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
@@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
if(n == UBRK_DONE || // at end or
- fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+ !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
if(n == 0 || n == UBRK_DONE || // at end or
- fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
+ !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
- if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
+ if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
UErrorCode status = U_ZERO_ERROR;
resetState(status);
diff --git a/thirdparty/icu4c/common/hash.h b/thirdparty/icu4c/common/hash.h
index f02cb7087a..b927ddb3c3 100644
--- a/thirdparty/icu4c/common/hash.h
+++ b/thirdparty/icu4c/common/hash.h
@@ -85,16 +85,22 @@ public:
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
+ inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
+
inline void* get(const UnicodeString& key) const;
inline int32_t geti(const UnicodeString& key) const;
+ inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
+
inline void* remove(const UnicodeString& key);
inline int32_t removei(const UnicodeString& key);
inline void removeAll(void);
+ inline UBool containsKey(const UnicodeString& key) const;
+
inline const UHashElement* find(const UnicodeString& key) const;
/**
@@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
+inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
+ UErrorCode& status) {
+ return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
+}
+
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
@@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
+inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
+ return uhash_getiAndFound(hash, &key, &found);
+}
+
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
@@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
+inline UBool Hashtable::containsKey(const UnicodeString& key) const {
+ return uhash_containsKey(hash, &key);
+}
+
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}
diff --git a/thirdparty/icu4c/common/localematcher.cpp b/thirdparty/icu4c/common/localematcher.cpp
index 5795cbf87e..132aee290e 100644
--- a/thirdparty/icu4c/common/localematcher.cpp
+++ b/thirdparty/icu4c/common/localematcher.cpp
@@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return suppLength; }
- int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
- if (index == 0) {
- uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
+ if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
+ uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
if (U_SUCCESS(errorCode)) {
supportedLSRs[suppLength] = &lsr;
supportedIndexes[suppLength++] = i;
@@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
int32_t bestSupportedLsrIndex = -1;
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
// Quick check for exact maximized LSR.
- // Returns suppIndex+1 where 0 means not found.
if (supportedLsrToIndex != nullptr) {
desiredLSR.setHashCode();
- int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
- if (index != 0) {
- int32_t suppIndex = index - 1;
+ UBool found = false;
+ int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
+ if (found) {
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
}
diff --git a/thirdparty/icu4c/common/localeprioritylist.cpp b/thirdparty/icu4c/common/localeprioritylist.cpp
index 8916b121be..4455eedb75 100644
--- a/thirdparty/icu4c/common/localeprioritylist.cpp
+++ b/thirdparty/icu4c/common/localeprioritylist.cpp
@@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
if (U_FAILURE(errorCode)) { return false; }
}
LocalPointer<Locale> clone;
- int32_t index = uhash_geti(map, &locale);
- if (index != 0) {
+ UBool found = false;
+ int32_t index = uhash_getiAndFound(map, &locale, &found);
+ if (found) {
// Duplicate: Remove the old item and append it anew.
- LocaleAndWeight &lw = list->array[index - 1];
+ LocaleAndWeight &lw = list->array[index];
clone.adoptInstead(lw.locale);
lw.locale = nullptr;
lw.weight = 0;
++numRemoved;
}
if (weight <= 0) { // do not add q=0
- if (index != 0) {
+ if (found) {
// Not strictly necessary but cleaner.
uhash_removei(map, &locale);
}
@@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
return false;
}
}
- uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
+ uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
if (U_FAILURE(errorCode)) { return false; }
LocaleAndWeight &lw = list->array[listLength];
lw.locale = clone.orphan();
diff --git a/thirdparty/icu4c/common/locdispnames.cpp b/thirdparty/icu4c/common/locdispnames.cpp
index 47c0667417..96af3f9aa8 100644
--- a/thirdparty/icu4c/common/locdispnames.cpp
+++ b/thirdparty/icu4c/common/locdispnames.cpp
@@ -698,7 +698,7 @@ uloc_getDisplayName(const char *locale,
} /* end switch */
if (len>0) {
- /* we addeed a component, so add separator and write it if there's room. */
+ /* we added a component, so add separator and write it if there's room. */
if(len+sepLen<=cap) {
const UChar * plimit = p + len;
for (; p < plimit; p++) {
diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp
index 874e4a7055..0d506293a9 100644
--- a/thirdparty/icu4c/common/locid.cpp
+++ b/thirdparty/icu4c/common/locid.cpp
@@ -254,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
Locale::~Locale()
{
- if (baseName != fullName) {
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@@ -466,7 +466,7 @@ Locale& Locale::operator=(const Locale& other) {
}
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
- if (baseName != fullName) uprv_free(baseName);
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer) {
@@ -524,7 +524,7 @@ static const char* const KNOWN_CANONICALIZED[] = {
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
- "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
@@ -627,6 +627,17 @@ private:
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
+
+ // Read the subdivisionAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readSubdivisionAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
};
/**
@@ -647,6 +658,7 @@ public:
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
+ const CharStringMap& subdivisionMap() const { return subdivision; }
static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
@@ -658,11 +670,13 @@ private:
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
+ CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
+ subdivision(std::move(subdivisionMap)),
strings(strings) {
}
@@ -676,6 +690,7 @@ private:
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
+ CharStringMap subdivision;
CharString* strings;
friend class AliasDataBuilder;
@@ -867,6 +882,34 @@ AliasDataBuilder::readVariantAlias(
}
/**
+ * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readSubdivisionAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
*
@@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+ LocalUResourceBundlePointer subdivisionAlias(
+ ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
- variantLength = 0;
+ variantLength = 0, subdivisionLength = 0;
// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
@@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);
+ // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
+ // and strings
+ LocalMemory<const char*> subdivisionTypes;
+ LocalMemory<int32_t> subdivisionReplacementIndexes;
+ readSubdivisionAlias(subdivisionAlias.getAlias(),
+ &strings,
+ subdivisionTypes,
+ subdivisionReplacementIndexes,
+ subdivisionLength, status);
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}
+ // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
+ CharStringMap subdivisionMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
+ subdivisionMap.put(subdivisionTypes[i],
+ strings.get(subdivisionReplacementIndexes[i]),
+ status);
+ }
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
+ std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
@@ -1105,6 +1169,14 @@ private:
// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);
+
+ // Replace by using subdivisionAlias.
+ bool replaceSubdivision(StringPiece subdivision,
+ CharString& output, UErrorCode& status);
+
+ // Replace transformed extensions.
+ bool replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status);
};
CharString&
@@ -1294,7 +1366,6 @@ AliasReplacer::replaceLanguage(
}
}
if (replacedExtensions != nullptr) {
- // TODO(ICU-21292)
// DO NOTHING
// UTS35 does not specifiy what should we do if we have extensions in the
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
@@ -1435,6 +1506,106 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}
+bool
+AliasReplacer::replaceSubdivision(
+ StringPiece subdivision, CharString& output, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->subdivisionMap().get(subdivision.data());
+ if (replacement != nullptr) {
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ // Found replacement data for this subdivision.
+ size_t len = (firstSpace != nullptr) ?
+ (firstSpace - replacement) : uprv_strlen(replacement);
+ if (2 <= len && len <= 8) {
+ output.append(replacement, (int32_t)len, status);
+ if (2 == len) {
+ // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
+ output.append("zzzz", 4, status);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+AliasReplacer::replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status)
+{
+ // The content of the transformedExtensions will be modified in this
+ // function to NULL-terminating (tkey-tvalue) pairs.
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ int32_t len = transformedExtensions.length();
+ const char* str = transformedExtensions.data();
+ const char* tkey = ultag_getTKeyStart(str);
+ int32_t tlangLen = (tkey == str) ? 0 :
+ ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
+ CharStringByteSink sink(&output);
+ if (tlangLen > 0) {
+ Locale tlang = LocaleBuilder()
+ .setLanguageTag(StringPiece(str, tlangLen))
+ .build(status);
+ tlang.canonicalize(status);
+ tlang.toLanguageTag(sink, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ T_CString_toLowerCase(output.data());
+ }
+ if (tkey != nullptr) {
+ // We need to sort the tfields by tkey
+ UVector tfields(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ do {
+ const char* tvalue = uprv_strchr(tkey, '-');
+ if (tvalue == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ const char* nextTKey = ultag_getTKeyStart(tvalue);
+ if (nextTKey != nullptr) {
+ *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
+ }
+ tfields.insertElementAt((void*)tkey, tfields.size(), status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ tkey = nextTKey;
+ } while (tkey != nullptr);
+ tfields.sort([](UElement e1, UElement e2) -> int8_t {
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
+ }, status);
+ for (int32_t i = 0; i < tfields.size(); i++) {
+ if (output.length() > 0) {
+ output.append('-', status);
+ }
+ const char* tfield = (const char*) tfields.elementAt(i);
+ const char* tvalue = uprv_strchr(tfield, '-');
+ // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
+ U_ASSERT(tvalue != nullptr);
+ *((char*)tvalue++) = '\0'; // NULL terminate tkey
+ output.append(tfield, status).append('-', status);
+ const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
+ output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ return true;
+}
+
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
@@ -1453,8 +1624,12 @@ AliasReplacer::outputToString(
out.append(SEP_CHAR, status);
}
variants.sort([](UElement e1, UElement e2) -> int8_t {
- return uprv_strcmp(
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
int32_t variantsStart = out.length();
for (int32_t i = 0; i < variants.size(); i++) {
@@ -1497,7 +1672,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
region = nullptr;
}
const char* variantsStr = locale.getVariant();
- const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
@@ -1516,8 +1690,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
// Sort the variants
variants.sort([](UElement e1, UElement e2) -> int8_t {
- return uprv_strcmp(
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
// A changed count to assert when loop too many times.
@@ -1561,11 +1739,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
- if (changed == 0 && variants.size() <= 1) {
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
if (extensionsStr != nullptr) {
+ changed = 0;
+ Locale temp(locale);
+ LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
+ if (U_SUCCESS(status) && !iter.isNull()) {
+ const char* key;
+ while ((key = iter->next(nullptr, status)) != nullptr) {
+ if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
+ uprv_strcmp("t", key) == 0) {
+ CharString value;
+ CharStringByteSink valueSink(&value);
+ locale.getKeywordValue(key, valueSink, status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ CharString replacement;
+ if (uprv_strlen(key) == 2) {
+ if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ } else {
+ U_ASSERT(uprv_strcmp(key, "t") == 0);
+ if (replaceTransformedExtensions(value, replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ }
+ }
+ }
+ if (changed != 0) {
+ extensionsStr = locale_getKeywordsStart(temp.getName());
+ }
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
@@ -1573,8 +1792,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
}
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
- U_ASSERT(changed == 0);
- U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
@@ -1636,7 +1853,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
{
fIsBogus = FALSE;
/* Free our current storage */
- if (baseName != fullName) {
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@@ -1672,6 +1889,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
+ U_ASSERT(baseName == nullptr);
/*Go to heap for the fullName if necessary*/
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
if(fullName == 0) {
@@ -1825,7 +2043,7 @@ Locale::hashCode() const
void
Locale::setToBogus() {
/* Free our current storage */
- if(baseName != fullName) {
+ if((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
diff --git a/thirdparty/icu4c/common/loclikelysubtags.cpp b/thirdparty/icu4c/common/loclikelysubtags.cpp
index a031bfa587..aa592e6ea8 100644
--- a/thirdparty/icu4c/common/loclikelysubtags.cpp
+++ b/thirdparty/icu4c/common/loclikelysubtags.cpp
@@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() {
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
- // Private use language tag x-subtag-subtag...
+ // Private use language tag x-subtag-subtag... which CLDR changes to
+ // und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
diff --git a/thirdparty/icu4c/common/norm2allmodes.h b/thirdparty/icu4c/common/norm2allmodes.h
index e8bd52c6ae..584835da57 100644
--- a/thirdparty/icu4c/common/norm2allmodes.h
+++ b/thirdparty/icu4c/common/norm2allmodes.h
@@ -38,7 +38,7 @@ public:
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
- UErrorCode &errorCode) const {
+ UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
@@ -64,13 +64,13 @@ public:
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
- UErrorCode &errorCode) const {
+ UErrorCode &errorCode) const U_OVERRIDE {
return normalizeSecondAndAppend(first, second, true, errorCode);
}
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
- UErrorCode &errorCode) const {
+ UErrorCode &errorCode) const U_OVERRIDE {
return normalizeSecondAndAppend(first, second, false, errorCode);
}
UnicodeString &
@@ -107,7 +107,7 @@ public:
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
- getDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
UChar buffer[4];
int32_t length;
const UChar *d=impl.getDecomposition(c, buffer, length);
@@ -122,7 +122,7 @@ public:
return true;
}
virtual UBool
- getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
UChar buffer[30];
int32_t length;
const UChar *d=impl.getRawDecomposition(c, buffer, length);
@@ -137,18 +137,18 @@ public:
return true;
}
virtual UChar32
- composePair(UChar32 a, UChar32 b) const {
+ composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
return impl.composePair(a, b);
}
virtual uint8_t
- getCombiningClass(UChar32 c) const {
+ getCombiningClass(UChar32 c) const U_OVERRIDE {
return impl.getCC(impl.getNorm16(c));
}
// quick checks
virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return false;
}
@@ -161,11 +161,11 @@ public:
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
}
virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
}
virtual int32_t
- spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return 0;
}
@@ -194,27 +194,57 @@ public:
private:
virtual void
normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.decompose(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
+
+ void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
+ impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
+ sink.Flush();
+ }
+ virtual UBool
+ isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
+ const uint8_t *sLimit = s + sp.length();
+ return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
+ }
+
virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
return impl.decompose(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
+ virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
- virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
- virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
- virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
+ virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
+ return impl.hasDecompBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
+ return impl.hasDecompBoundaryAfter(c);
+ }
+ virtual UBool isInert(UChar32 c) const U_OVERRIDE {
+ return impl.isDecompInert(c);
+ }
};
class ComposeNormalizer2 : public Normalizer2WithImpl {
@@ -321,24 +351,30 @@ public:
private:
virtual void
normalize(const UChar *src, const UChar *limit,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.makeFCD(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
- ReorderingBuffer &buffer, UErrorCode &errorCode) const {
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
- spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
+ spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
return impl.makeFCD(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
- virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
- virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
- virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
+ virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
+ return impl.hasFCDBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
+ return impl.hasFCDBoundaryAfter(c);
+ }
+ virtual UBool isInert(UChar32 c) const U_OVERRIDE {
+ return impl.isFCDInert(c);
+ }
};
struct Norm2AllModes : public UMemory {
diff --git a/thirdparty/icu4c/common/normalizer2impl.cpp b/thirdparty/icu4c/common/normalizer2impl.cpp
index cbf6b4d980..c0ad5c69f3 100644
--- a/thirdparty/icu4c/common/normalizer2impl.cpp
+++ b/thirdparty/icu4c/common/normalizer2impl.cpp
@@ -731,9 +731,131 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
}
+// Dual functionality:
+// sink != nullptr: normalize
+// sink == nullptr: isNormalized/spanQuickCheckYes
+const uint8_t *
+Normalizer2Impl::decomposeUTF8(uint32_t options,
+ const uint8_t *src, const uint8_t *limit,
+ ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
+ U_ASSERT(limit != nullptr);
+ UnicodeString s16;
+ uint8_t minNoLead = leadByteForCP(minDecompNoCP);
+
+ const uint8_t *prevBoundary = src;
+ // only for quick check
+ uint8_t prevCC = 0;
+
+ for (;;) {
+ // Fast path: Scan over a sequence of characters below the minimum "no" code point,
+ // or with (decompYes && ccc==0) properties.
+ const uint8_t *fastStart = src;
+ const uint8_t *prevSrc;
+ uint16_t norm16 = 0;
+
+ for (;;) {
+ if (src == limit) {
+ if (prevBoundary != limit && sink != nullptr) {
+ ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+ *sink, options, edits, errorCode);
+ }
+ return src;
+ }
+ if (*src < minNoLead) {
+ ++src;
+ } else {
+ prevSrc = src;
+ UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
+ if (!isMostDecompYesAndZeroCC(norm16)) {
+ break;
+ }
+ }
+ }
+ // isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
+ // and the current character at [prevSrc..src[ is not a common case with cc=0
+ // (MIN_NORMAL_MAYBE_YES or JAMO_VT).
+ // It could still be a maybeYes with cc=0.
+ if (prevSrc != fastStart) {
+ // The fast path looped over yes/0 characters before the current one.
+ if (sink != nullptr &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = prevSrc;
+ prevCC = 0;
+ }
+
+ // Medium-fast path: Quick check.
+ if (isMaybeOrNonZeroCC(norm16)) {
+ // Does not decompose.
+ uint8_t cc = getCCFromYesOrMaybe(norm16);
+ if (prevCC <= cc || cc == 0) {
+ prevCC = cc;
+ if (cc <= 1) {
+ if (sink != nullptr &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, src,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ }
+ continue;
+ }
+ }
+ if (sink == nullptr) {
+ return prevBoundary; // quick check: "no" or cc out of order
+ }
+
+ // Slow path
+ // Decompose up to and including the current character.
+ if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
+ if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = prevSrc;
+ }
+ ReorderingBuffer buffer(*this, s16, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE /* onlyContiguous */,
+ buffer, errorCode);
+ // Decompose until the next boundary.
+ if (buffer.getLastCC() > 1) {
+ src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE /* onlyContiguous */,
+ buffer, errorCode);
+ }
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ break;
+ }
+ // We already know there was a change if the original character decomposed;
+ // otherwise compare.
+ if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
+ if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ } else {
+ if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
+ *sink, edits, errorCode)) {
+ break;
+ }
+ }
+ prevBoundary = src;
+ prevCC = 0;
+ }
+ return src;
+}
+
const uint8_t *
Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
+ StopAt stopAt, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return nullptr;
@@ -746,21 +868,28 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
UChar32 c = U_SENTINEL;
if (norm16 >= limitNoNo) {
if (isMaybeOrNonZeroCC(norm16)) {
- // No boundaries around this character.
+ // No comp boundaries around this character.
+ uint8_t cc = getCCFromYesOrMaybe(norm16);
+ if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
+ return prevSrc;
+ }
c = codePointFromValidUTF8(prevSrc, src);
- if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
+ if (!buffer.append(c, cc, errorCode)) {
return nullptr;
}
+ if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
+ return src;
+ }
continue;
}
// Maps to an isCompYesAndZeroCC.
- if (stopAtCompBoundary) {
+ if (stopAt != STOP_AT_LIMIT) {
return prevSrc;
}
c = codePointFromValidUTF8(prevSrc, src);
c = mapAlgorithmic(c, norm16);
norm16 = getRawNorm16(c);
- } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
+ } else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
return prevSrc;
}
// norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
@@ -768,7 +897,8 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
// its norm16==INERT is normalization-inert,
// so it gets copied unchanged in the fast path,
// and we stop the slow path where invalid UTF-8 begins.
- U_ASSERT(norm16 != INERT);
+ // c >= 0 is the result of an algorithmic mapping.
+ U_ASSERT(c >= 0 || norm16 != INERT);
if (norm16 < minYesNo) {
if (c < 0) {
c = codePointFromValidUTF8(prevSrc, src);
@@ -798,11 +928,15 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
} else {
leadCC = 0;
}
+ if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
+ return prevSrc;
+ }
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
return nullptr;
}
}
- if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) ||
+ (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
return src;
}
}
@@ -1954,10 +2088,10 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
break;
}
// We know there is not a boundary here.
- decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
+ decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
buffer, errorCode);
// Decompose until the next boundary.
- src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
+ src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
buffer, errorCode);
if (U_FAILURE(errorCode)) {
break;
diff --git a/thirdparty/icu4c/common/normalizer2impl.h b/thirdparty/icu4c/common/normalizer2impl.h
index 4218a30a34..bdb6767a92 100644
--- a/thirdparty/icu4c/common/normalizer2impl.h
+++ b/thirdparty/icu4c/common/normalizer2impl.h
@@ -491,6 +491,12 @@ public:
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
+
+ /** sink==nullptr: isNormalized()/spanQuickCheckYes() */
+ const uint8_t *decomposeUTF8(uint32_t options,
+ const uint8_t *src, const uint8_t *limit,
+ ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
+
UBool compose(const UChar *src, const UChar *limit,
UBool onlyContiguous,
UBool doCompose,
@@ -649,6 +655,9 @@ private:
UChar32 minNeedDataCP,
ReorderingBuffer *buffer,
UErrorCode &errorCode) const;
+
+ enum StopAt { STOP_AT_LIMIT, STOP_AT_DECOMP_BOUNDARY, STOP_AT_COMP_BOUNDARY };
+
const UChar *decomposeShort(const UChar *src, const UChar *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
@@ -656,7 +665,7 @@ private:
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
- UBool stopAtCompBoundary, UBool onlyContiguous,
+ StopAt stopAt, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail);
diff --git a/thirdparty/icu4c/common/pluralmap.h b/thirdparty/icu4c/common/pluralmap.h
index d898ac4671..2a14a07af1 100644
--- a/thirdparty/icu4c/common/pluralmap.h
+++ b/thirdparty/icu4c/common/pluralmap.h
@@ -24,7 +24,7 @@ class U_COMMON_API PluralMapBase : public UMemory {
public:
/**
* The names of all the plural categories. NONE is not an actual plural
- * category, but rather represents the absense of a plural category.
+ * category, but rather represents the absence of a plural category.
*/
enum Category {
NONE = -1,
diff --git a/thirdparty/icu4c/common/putil.cpp b/thirdparty/icu4c/common/putil.cpp
index 3ed6a05d22..ffcbbcce59 100644
--- a/thirdparty/icu4c/common/putil.cpp
+++ b/thirdparty/icu4c/common/putil.cpp
@@ -1139,7 +1139,7 @@ uprv_tzname(int n)
#endif
if (tzid != NULL && isValidOlsonID(tzid)
#if U_PLATFORM == U_PF_SOLARIS
- /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
+ /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
&& uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
#endif
) {
@@ -1361,7 +1361,7 @@ uprv_pathIsAbsolute(const char *path)
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
(needed for some Darwin ICU build environments) */
-#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
+#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
# endif
diff --git a/thirdparty/icu4c/common/putilimp.h b/thirdparty/icu4c/common/putilimp.h
index a325c6c359..5b95a68418 100644
--- a/thirdparty/icu4c/common/putilimp.h
+++ b/thirdparty/icu4c/common/putilimp.h
@@ -527,7 +527,7 @@ U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
* on the destination pointer and capacity cannot overflow.
*
* The pinned capacity must fulfill the following conditions (for positive capacities):
- * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
+ * - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
* - (dest + capacity) >= dest
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
*
diff --git a/thirdparty/icu4c/common/rbbi.cpp b/thirdparty/icu4c/common/rbbi.cpp
index 9b7e70c3cf..b821ca4463 100644
--- a/thirdparty/icu4c/common/rbbi.cpp
+++ b/thirdparty/icu4c/common/rbbi.cpp
@@ -812,7 +812,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
}
#endif
- // handleNext alway sets the break tag value.
+ // handleNext always sets the break tag value.
// Set the default for it.
fRuleStatusIndex = 0;
diff --git a/thirdparty/icu4c/common/rbbi_cache.cpp b/thirdparty/icu4c/common/rbbi_cache.cpp
index 63ff3001c7..44f19d8697 100644
--- a/thirdparty/icu4c/common/rbbi_cache.cpp
+++ b/thirdparty/icu4c/common/rbbi_cache.cpp
@@ -258,7 +258,7 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode
previous(status);
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
- // if the requested position is between two bounaries.
+ // if the requested position is between two boundaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
U_ASSERT(startPos > fTextIdx);
current();
diff --git a/thirdparty/icu4c/common/rbbiscan.cpp b/thirdparty/icu4c/common/rbbiscan.cpp
index 9c406af671..45911b1cfe 100644
--- a/thirdparty/icu4c/common/rbbiscan.cpp
+++ b/thirdparty/icu4c/common/rbbiscan.cpp
@@ -284,7 +284,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
case doEndAssign:
{
- // We have reached the end of an assignement statement.
+ // We have reached the end of an assignment statement.
// Current scan char is the ';' that terminates the assignment.
// Terminate expression, leaves expression parse tree rooted in TOS node.
@@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() {
return (UChar32)-1;
}
ch = fRB->fRules.char32At(fNextIndex);
+ if (U_IS_SURROGATE(ch)) {
+ error(U_ILLEGAL_CHAR_FOUND);
+ return U_SENTINEL;
+ }
fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
if (ch == chCR ||
diff --git a/thirdparty/icu4c/common/rbbitblb.cpp b/thirdparty/icu4c/common/rbbitblb.cpp
index 70e260fc08..dd76337bc6 100644
--- a/thirdparty/icu4c/common/rbbitblb.cpp
+++ b/thirdparty/icu4c/common/rbbitblb.cpp
@@ -151,7 +151,7 @@ void RBBITableBuilder::buildForwardTable() {
//
// calculate the functions nullable, firstpos, lastpos and followpos on
// nodes in the parse tree.
- // See the alogrithm description in Aho.
+ // See the algorithm description in Aho.
// Understanding how this works by looking at the code alone will be
// nearly impossible.
//
diff --git a/thirdparty/icu4c/common/resource.h b/thirdparty/icu4c/common/resource.h
index 3795694412..48f5b9fa6e 100644
--- a/thirdparty/icu4c/common/resource.h
+++ b/thirdparty/icu4c/common/resource.h
@@ -274,8 +274,10 @@ public:
*
* @param key The key string of the enumeration-start resource.
* Empty if the enumeration starts at the top level of the bundle.
- * @param value Call getArray() or getTable() as appropriate.
- * Then reuse for output values from Array and Table getters.
+ * @param value Call getArray() or getTable() as appropriate. Then reuse for
+ * output values from Array and Table getters. Note: ResourceTable and
+ * ResourceArray instances must outlive the ResourceValue instance for
+ * ResourceTracer to be happy.
* @param noFallback true if the bundle has no parent;
* that is, its top-level table has the nofallback attribute,
* or it is the root bundle of a locale tree.
diff --git a/thirdparty/icu4c/common/restrace.cpp b/thirdparty/icu4c/common/restrace.cpp
index 5c6498850e..1f83372d68 100644
--- a/thirdparty/icu4c/common/restrace.cpp
+++ b/thirdparty/icu4c/common/restrace.cpp
@@ -54,6 +54,9 @@ void ResourceTracer::traceOpen() const {
CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
if (fResB) {
+ // Note: if you get a segfault around here, check that ResourceTable and
+ // ResourceArray instances outlive ResourceValue instances referring to
+ // their contents:
output.append(fResB->fData->fPath, status);
output.append('/', status);
output.append(fResB->fData->fName, status);
diff --git a/thirdparty/icu4c/common/servnotf.h b/thirdparty/icu4c/common/servnotf.h
index 305570c1e6..73ce38c772 100644
--- a/thirdparty/icu4c/common/servnotf.h
+++ b/thirdparty/icu4c/common/servnotf.h
@@ -82,7 +82,7 @@ public:
/**
* Add a listener to be notified when notifyChanged is called.
* The listener must not be null. AcceptsListener must return
- * true for the listener. Attempts to concurrently
+ * true for the listener. Attempts to concurrently
* register the identical listener more than once will be
* silently ignored.
*/
@@ -90,7 +90,7 @@ public:
/**
* Stop notifying this listener. The listener must
- * not be null. Attemps to remove a listener that is
+ * not be null. Attempts to remove a listener that is
* not registered will be silently ignored.
*/
virtual void removeListener(const EventListener* l, UErrorCode& status);
diff --git a/thirdparty/icu4c/common/ubrk.cpp b/thirdparty/icu4c/common/ubrk.cpp
index f8bdf5a6b6..bb5bdd1b50 100644
--- a/thirdparty/icu4c/common/ubrk.cpp
+++ b/thirdparty/icu4c/common/ubrk.cpp
@@ -174,6 +174,18 @@ ubrk_safeClone(
return (UBreakIterator *)newBI;
}
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+ BreakIterator *newBI = ((BreakIterator *)bi)->clone();
+ if (newBI == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ return (UBreakIterator *)newBI;
+}
U_CAPI void U_EXPORT2
diff --git a/thirdparty/icu4c/common/ucase.cpp b/thirdparty/icu4c/common/ucase.cpp
index 2b142f5bc2..4f4c274d60 100644
--- a/thirdparty/icu4c/common/ucase.cpp
+++ b/thirdparty/icu4c/common/ucase.cpp
@@ -681,7 +681,7 @@ ucase_isCaseSensitive(UChar32 c) {
* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
* - Given D = NFD(C), then it is not the case that:
* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
- * (This third criterium does not add any characters to the list
+ * (This third criterion does not add any characters to the list
* for Unicode 3.2. Ignored.)
*
* D2. A character C is defined to be case-ignorable
diff --git a/thirdparty/icu4c/common/uchar.cpp b/thirdparty/icu4c/common/uchar.cpp
index eb14e4c75d..61e9c3d900 100644
--- a/thirdparty/icu4c/common/uchar.cpp
+++ b/thirdparty/icu4c/common/uchar.cpp
@@ -194,7 +194,7 @@ u_isISOControl(UChar32 c) {
/* Some control characters that are used as space. */
#define IS_THAT_CONTROL_SPACE(c) \
- (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
+ (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==0x85))
/* Java has decided that U+0085 New Line is not whitespace any more. */
#define IS_THAT_ASCII_CONTROL_SPACE(c) \
@@ -677,14 +677,14 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
sa->add(sa->set, CR+1); /* range TAB..CR */
sa->add(sa->set, 0x1c);
sa->add(sa->set, 0x1f+1);
- USET_ADD_CP_AND_NEXT(sa, NL);
+ USET_ADD_CP_AND_NEXT(sa, 0x85); // NEXT LINE (NEL)
/* add for u_isIDIgnorable() what was not added above */
- sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
+ sa->add(sa->set, 0x7f); /* range DEL..NBSP-1, NBSP added below */
sa->add(sa->set, HAIRSP);
sa->add(sa->set, RLM+1);
- sa->add(sa->set, INHSWAP);
- sa->add(sa->set, NOMDIG+1);
+ sa->add(sa->set, 0x206a); // INHIBIT SYMMETRIC SWAPPING
+ sa->add(sa->set, 0x206f+1); // NOMINAL DIGIT SHAPES
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
/* add no-break spaces for u_isWhitespace() what was not added above */
@@ -693,23 +693,25 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
USET_ADD_CP_AND_NEXT(sa, NNBSP);
/* add for u_digit() */
- sa->add(sa->set, U_a);
- sa->add(sa->set, U_z+1);
- sa->add(sa->set, U_A);
- sa->add(sa->set, U_Z+1);
- sa->add(sa->set, U_FW_a);
- sa->add(sa->set, U_FW_z+1);
- sa->add(sa->set, U_FW_A);
- sa->add(sa->set, U_FW_Z+1);
+ sa->add(sa->set, u'a');
+ sa->add(sa->set, u'z'+1);
+ sa->add(sa->set, u'A');
+ sa->add(sa->set, u'Z'+1);
+ // fullwidth
+ sa->add(sa->set, u'a');
+ sa->add(sa->set, u'z'+1);
+ sa->add(sa->set, u'A');
+ sa->add(sa->set, u'Z'+1);
/* add for u_isxdigit() */
- sa->add(sa->set, U_f+1);
- sa->add(sa->set, U_F+1);
- sa->add(sa->set, U_FW_f+1);
- sa->add(sa->set, U_FW_F+1);
+ sa->add(sa->set, u'f'+1);
+ sa->add(sa->set, u'F'+1);
+ // fullwidth
+ sa->add(sa->set, u'f'+1);
+ sa->add(sa->set, u'F'+1);
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
- sa->add(sa->set, WJ); /* range WJ..NOMDIG */
+ sa->add(sa->set, 0x2060); /* range 2060..206f */
sa->add(sa->set, 0xfff0);
sa->add(sa->set, 0xfffb+1);
sa->add(sa->set, 0xe0000);
diff --git a/thirdparty/icu4c/common/ucnv2022.cpp b/thirdparty/icu4c/common/ucnv2022.cpp
index 169ad4c526..1726440b94 100644
--- a/thirdparty/icu4c/common/ucnv2022.cpp
+++ b/thirdparty/icu4c/common/ucnv2022.cpp
@@ -820,7 +820,7 @@ getKey_2022(char c,int32_t* key,int32_t* offset){
return INVALID_2022;
}
-/*runs through a state machine to determine the escape sequence - codepage correspondance
+/*runs through a state machine to determine the escape sequence - codepage correspondence
*/
static void
changeState_2022(UConverter* _this,
@@ -1424,7 +1424,7 @@ toUnicodeCallback(UConverter *cnv,
* KSC5601 : alias to ibm-949 mapping table
* GB2312 : alias to ibm-1386 mapping table
* ISO-8859-1 : Algorithmic implemented as LATIN1 case
-* ISO-8859-7 : alisas to ibm-9409 mapping table
+* ISO-8859-7 : alias to ibm-9409 mapping table
*/
/* preference order of JP charsets */
@@ -2324,7 +2324,7 @@ endloop:
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
-* at the begining of a line before any KSC5601 characters. This usually
+* at the beginning of a line before any KSC5601 characters. This usually
* means that it appears by itself on the first line of the file
* ii) There are only 2 shifting sequences SO to shift into double byte mode
* and SI to shift into single byte mode
diff --git a/thirdparty/icu4c/common/ucnv_bld.cpp b/thirdparty/icu4c/common/ucnv_bld.cpp
index 0e198892f1..d08eec7369 100644
--- a/thirdparty/icu4c/common/ucnv_bld.cpp
+++ b/thirdparty/icu4c/common/ucnv_bld.cpp
@@ -427,7 +427,7 @@ getAlgorithmicTypeFromName(const char *realName)
#define UCNV_CACHE_LOAD_FACTOR 2
/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
-/* Will always be called with the cnvCacheMutex alrady being held */
+/* Will always be called with the cnvCacheMutex already being held */
/* by the calling function. */
/* Stores the shared data in the SHARED_DATA_HASHTABLE
* @param data The shared data
diff --git a/thirdparty/icu4c/common/ucnv_err.cpp b/thirdparty/icu4c/common/ucnv_err.cpp
index 6b738face5..e1f2b934aa 100644
--- a/thirdparty/icu4c/common/ucnv_err.cpp
+++ b/thirdparty/icu4c/common/ucnv_err.cpp
@@ -321,7 +321,7 @@ UCNV_FROM_U_CALLBACK_ESCAPE (
case UCNV_PRV_ESCAPE_CSS2:
valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
- /* Always add space character, becase the next character might be whitespace,
+ /* Always add space character, because the next character might be whitespace,
which would erroneously be considered the termination of the escape sequence. */
valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
break;
diff --git a/thirdparty/icu4c/common/ucnv_lmb.cpp b/thirdparty/icu4c/common/ucnv_lmb.cpp
index 168392837b..41317d1cc0 100644
--- a/thirdparty/icu4c/common/ucnv_lmb.cpp
+++ b/thirdparty/icu4c/common/ucnv_lmb.cpp
@@ -81,7 +81,7 @@
[G] D1 [D2]
That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
- data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
+ data bytes. The maximum size of a LMBCS character is 3 bytes:
*/
#define ULMBCS_CHARSIZE_MAX 3
/*
@@ -164,7 +164,7 @@ beginning of internal 'system' range names: */
/* Then we needed a place to put all the other ansi control characters
that must be moved to different values because LMBCS reserves those
values for other purposes. To represent the control characters, we start
-with a first byte of 0xF & add the control chaarcter value as the
+with a first byte of 0xF & add the control character value as the
second byte */
#define ULMBCS_GRP_CTRL 0x0F
diff --git a/thirdparty/icu4c/common/ucnv_u7.cpp b/thirdparty/icu4c/common/ucnv_u7.cpp
index 87ba8cf37e..de9f3f42ec 100644
--- a/thirdparty/icu4c/common/ucnv_u7.cpp
+++ b/thirdparty/icu4c/common/ucnv_u7.cpp
@@ -814,7 +814,7 @@ const UConverterSharedData _UTF7Data=
* the use of "~" in some servers as a home directory indicator.
*
* 5) UTF-7 permits multiple alternate forms to represent the same
- * string; in particular, printable US-ASCII chararacters can be
+ * string; in particular, printable US-ASCII characters can be
* represented in encoded form.
*
* In modified UTF-7, printable US-ASCII characters except for "&"
diff --git a/thirdparty/icu4c/common/ucnvisci.cpp b/thirdparty/icu4c/common/ucnvisci.cpp
index 44a7c05a3c..ffb8c7ac3e 100644
--- a/thirdparty/icu4c/common/ucnvisci.cpp
+++ b/thirdparty/icu4c/common/ucnvisci.cpp
@@ -992,7 +992,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
if (sourceChar == PNJ_TIPPI) {
- /* Make sure Tippi is converterd to Bindi. */
+ /* Make sure Tippi is converted to Bindi. */
sourceChar = PNJ_BINDI;
} else if (sourceChar == PNJ_ADHAK) {
/* This is for consonant cluster handling. */
@@ -1147,7 +1147,7 @@ static const uint16_t lookupTable[][2]={
/* is the code point valid in current script? */ \
if(sourceChar> ASCII_END && \
(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
- /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
+ /* Vocallic RR is assigned in ISCII Telugu and Unicode */ \
if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
targetUniChar!=VOCALLIC_RR){ \
targetUniChar=missingCharMarker; \
@@ -1272,7 +1272,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo
goto CALLBACK;
} else if (*contextCharToUnicode==ISCII_INV) {
if (sourceChar==ISCII_HALANT) {
- targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
+ targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
} else {
targetUniChar = ZWJ;
}
diff --git a/thirdparty/icu4c/common/ucurr.cpp b/thirdparty/icu4c/common/ucurr.cpp
index 0e14cddcff..20bbd51488 100644
--- a/thirdparty/icu4c/common/ucurr.cpp
+++ b/thirdparty/icu4c/common/ucurr.cpp
@@ -844,7 +844,7 @@ typedef struct {
#endif
-// Comparason function used in quick sort.
+// Comparison function used in quick sort.
static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
@@ -1530,7 +1530,7 @@ uprv_parseCurrency(const char* locale,
int32_t max = 0;
int32_t matchIndex = -1;
- // case in-sensitive comparision against currency names
+ // case in-sensitive comparison against currency names
searchCurrencyName(currencyNames, total_currency_name_count,
upperText, textLen, partialMatchLen, &max, &matchIndex);
diff --git a/thirdparty/icu4c/common/uhash.cpp b/thirdparty/icu4c/common/uhash.cpp
index 86311ceb0b..67c7c36354 100644
--- a/thirdparty/icu4c/common/uhash.cpp
+++ b/thirdparty/icu4c/common/uhash.cpp
@@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = {
* or a pointer. If a hint bit is zero, then the associated
* token is assumed to be an integer.
*/
+#define HINT_BOTH_INTEGERS (0)
#define HINT_KEY_POINTER (1)
#define HINT_VALUE_POINTER (2)
+#define HINT_ALLOW_ZERO (4)
/********************************************************************
* PRIVATE Implementation
@@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash,
goto err;
}
U_ASSERT(hash != NULL);
- /* Cannot always check pointer here or iSeries sees NULL every time. */
- if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
+ if ((hint & HINT_VALUE_POINTER) ?
+ value.pointer == NULL :
+ value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
/* Disallow storage of NULL values, since NULL is returned by
* get() to indicate an absent key. Storing NULL == removing.
*/
@@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash,
return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
}
+U_CAPI int32_t U_EXPORT2
+uhash_getiAndFound(const UHashtable *hash,
+ const void *key,
+ UBool *found) {
+ UHashTok keyholder;
+ keyholder.pointer = (void *)key;
+ const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
+ *found = !IS_EMPTY_OR_DELETED(e->hashcode);
+ return e->value.integer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_igetiAndFound(const UHashtable *hash,
+ int32_t key,
+ UBool *found) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
+ *found = !IS_EMPTY_OR_DELETED(e->hashcode);
+ return e->value.integer;
+}
+
U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
void* key,
@@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash,
keyholder.integer = key;
valueholder.integer = value;
return _uhash_put(hash, keyholder, valueholder,
- 0, /* neither is a ptr */
+ HINT_BOTH_INTEGERS,
+ status).integer;
+}
+
+U_CAPI int32_t U_EXPORT2
+uhash_putiAllowZero(UHashtable *hash,
+ void *key,
+ int32_t value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.pointer = key;
+ valueholder.integer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ HINT_KEY_POINTER | HINT_ALLOW_ZERO,
+ status).integer;
+}
+
+
+U_CAPI int32_t U_EXPORT2
+uhash_iputiAllowZero(UHashtable *hash,
+ int32_t key,
+ int32_t value,
+ UErrorCode *status) {
+ UHashTok keyholder, valueholder;
+ keyholder.integer = key;
+ valueholder.integer = value;
+ return _uhash_put(hash, keyholder, valueholder,
+ HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
status).integer;
}
@@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) {
U_ASSERT(hash->count == 0);
}
+U_CAPI UBool U_EXPORT2
+uhash_containsKey(const UHashtable *hash, const void *key) {
+ UHashTok keyholder;
+ keyholder.pointer = (void *)key;
+ const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
+ return !IS_EMPTY_OR_DELETED(e->hashcode);
+}
+
+/**
+ * Returns true if the UHashtable contains an item with this integer key.
+ *
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return true if the key is found.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_icontainsKey(const UHashtable *hash, int32_t key) {
+ UHashTok keyholder;
+ keyholder.integer = key;
+ const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
+ return !IS_EMPTY_OR_DELETED(e->hashcode);
+}
+
U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key) {
UHashTok keyholder;
diff --git a/thirdparty/icu4c/common/uhash.h b/thirdparty/icu4c/common/uhash.h
index b59d2711bb..af75999860 100644
--- a/thirdparty/icu4c/common/uhash.h
+++ b/thirdparty/icu4c/common/uhash.h
@@ -23,7 +23,7 @@
/**
* UHashtable stores key-value pairs and does moderately fast lookup
* based on keys. It provides a good tradeoff between access time and
- * storage space. As elements are added to it, it grows to accomodate
+ * storage space. As elements are added to it, it grows to accommodate
* them. By default, the table never shrinks, even if all elements
* are removed from it.
*
@@ -54,6 +54,13 @@
* uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
* and uhash_nextElement() consistent with one another.
*
+ * Keys and values can be integers.
+ * Functions that work with an integer key have an "i" prefix.
+ * Functions that work with an integer value have an "i" suffix.
+ * As with putting a NULL value pointer, putting a zero value integer removes the item.
+ * Except, there are pairs of functions that allow setting zero values
+ * and fetching (value, found) pairs.
+ *
* To see everything in a hashtable, use uhash_nextElement() to
* iterate through its contents. Each call to this function returns a
* UHashElement pointer. A hash element contains a key, value, and
@@ -406,6 +413,44 @@ uhash_iputi(UHashtable *hash,
UErrorCode *status);
/**
+ * Put a (key=pointer, value=integer) item in a UHashtable. If the
+ * keyDeleter is non-NULL, then the hashtable owns 'key' after this
+ * call. valueDeleter must be NULL.
+ * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
+ *
+ * @param hash The target UHashtable.
+ * @param key The key to store.
+ * @param value The integer value to store.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or 0 if none.
+ * @see uhash_getiAndFound
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_putiAllowZero(UHashtable *hash,
+ void *key,
+ int32_t value,
+ UErrorCode *status);
+
+/**
+ * Put a (key=integer, value=integer) item in a UHashtable. If the
+ * keyDeleter is non-NULL, then the hashtable owns 'key' after this
+ * call. valueDeleter must be NULL.
+ * Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
+ *
+ * @param hash The target UHashtable.
+ * @param key The key to store.
+ * @param value The integer value to store.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The previous value, or 0 if none.
+ * @see uhash_igetiAndFound
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_iputiAllowZero(UHashtable *hash,
+ int32_t key,
+ int32_t value,
+ UErrorCode *status);
+
+/**
* Retrieve a pointer value from a UHashtable using a pointer key,
* as previously stored by uhash_put().
* @param hash The target UHashtable.
@@ -449,6 +494,34 @@ uhash_igeti(const UHashtable *hash,
int32_t key);
/**
+ * Retrieves an integer value from a UHashtable using a pointer key,
+ * as previously stored by uhash_putiAllowZero() or uhash_puti().
+ *
+ * @param hash The target UHashtable.
+ * @param key A pointer key stored in a hashtable
+ * @param found A pointer to a boolean which will be set for whether the key was found.
+ * @return The requested item, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_getiAndFound(const UHashtable *hash,
+ const void *key,
+ UBool *found);
+
+/**
+ * Retrieves an integer value from a UHashtable using an integer key,
+ * as previously stored by uhash_iputiAllowZero() or uhash_iputi().
+ *
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @param found A pointer to a boolean which will be set for whether the key was found.
+ * @return The requested item, or 0 if not found.
+ */
+U_CAPI int32_t U_EXPORT2
+uhash_igetiAndFound(const UHashtable *hash,
+ int32_t key,
+ UBool *found);
+
+/**
* Remove an item from a UHashtable stored by uhash_put().
* @param hash The target UHashtable.
* @param key A key stored in a hashtable
@@ -496,6 +569,26 @@ U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);
/**
+ * Returns true if the UHashtable contains an item with this pointer key.
+ *
+ * @param hash The target UHashtable.
+ * @param key A pointer key stored in a hashtable
+ * @return true if the key is found.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_containsKey(const UHashtable *hash, const void *key);
+
+/**
+ * Returns true if the UHashtable contains an item with this integer key.
+ *
+ * @param hash The target UHashtable.
+ * @param key An integer key stored in a hashtable
+ * @return true if the key is found.
+ */
+U_CAPI UBool U_EXPORT2
+uhash_icontainsKey(const UHashtable *hash, int32_t key);
+
+/**
* Locate an element of a UHashtable. The caller must not modify the
* returned object. The primary use of this function is to obtain the
* stored key when it may not be identical to the search key. For
diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp
index ebfbb50650..d96e79b8fd 100644
--- a/thirdparty/icu4c/common/uloc.cpp
+++ b/thirdparty/icu4c/common/uloc.cpp
@@ -143,7 +143,7 @@ static const char * const LANGUAGES[] = {
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
- "ml", "mn", "mnc", "mni", "mo",
+ "ml", "mn", "mnc", "mni",
"moh", "mos", "mr", "mrj",
"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
"my", "mye", "myv", "mzn",
@@ -166,9 +166,9 @@ static const char * const LANGUAGES[] = {
"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
- "sv", "sw", "swb", "swc", "syc", "syr", "szl",
+ "sv", "sw", "swb", "syc", "syr", "szl",
"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
- "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
+ "th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
@@ -181,7 +181,7 @@ static const char * const LANGUAGES[] = {
"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
"zun", "zxx", "zza",
NULL,
- "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
+ "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
NULL
};
@@ -260,7 +260,7 @@ static const char * const LANGUAGES_3[] = {
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
- "mal", "mon", "mnc", "mni", "mol",
+ "mal", "mon", "mnc", "mni",
"moh", "mos", "mar", "mrj",
"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
"mya", "mye", "myv", "mzn",
@@ -283,9 +283,9 @@ static const char * const LANGUAGES_3[] = {
"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
- "swe", "swa", "swb", "swc", "syc", "syr", "szl",
+ "swe", "swa", "swb", "syc", "syr", "szl",
"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
- "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
+ "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
@@ -298,8 +298,8 @@ static const char * const LANGUAGES_3[] = {
"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
"zun", "zxx", "zza",
NULL,
-/* "in", "iw", "ji", "jw", "sh", */
- "ind", "heb", "yid", "jaw", "srp",
+/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
+ "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
NULL
};
@@ -334,13 +334,13 @@ static const char * const COUNTRIES[] = {
"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
- "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
- "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
+ "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
+ "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
- "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
+ "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
@@ -357,7 +357,7 @@ static const char * const COUNTRIES[] = {
"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
- "WS", "YE", "YT", "ZA", "ZM", "ZW",
+ "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
NULL,
"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
NULL
@@ -397,10 +397,10 @@ static const char * const COUNTRIES_3[] = {
"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
-/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
- "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
-/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
- "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
+/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
+ "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
+/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
+ "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
@@ -409,8 +409,8 @@ static const char * const COUNTRIES_3[] = {
"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
-/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
- "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
+/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
+ "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
@@ -443,8 +443,8 @@ static const char * const COUNTRIES_3[] = {
"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
-/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
- "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
+/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
+ "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
NULL,
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
diff --git a/thirdparty/icu4c/common/uloc_keytype.cpp b/thirdparty/icu4c/common/uloc_keytype.cpp
index 019da058cf..c289ebe76f 100644
--- a/thirdparty/icu4c/common/uloc_keytype.cpp
+++ b/thirdparty/icu4c/common/uloc_keytype.cpp
@@ -271,7 +271,7 @@ initFromResourceBundle(UErrorCode& sts) {
if (U_FAILURE(sts)) {
break;
}
- // check if this is an alias of canoncal legacy type
+ // check if this is an alias of canonical legacy type
if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
const char* from = ures_getKey(typeAliasDataEntry.getAlias());
if (isTZ) {
diff --git a/thirdparty/icu4c/common/uloc_tag.cpp b/thirdparty/icu4c/common/uloc_tag.cpp
index 7f7fd9119e..1235081bf3 100644
--- a/thirdparty/icu4c/common/uloc_tag.cpp
+++ b/thirdparty/icu4c/common/uloc_tag.cpp
@@ -129,7 +129,6 @@ static const char* const LEGACY[] = {
// Legacy tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
- "cel-gaulish", "xtg-x-cel-gaulish",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-mingo", "see-x-i-mingo",
@@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len)
return FALSE;
}
+U_CAPI const char * U_EXPORT2
+ultag_getTKeyStart(const char *localeID) {
+ const char *result = localeID;
+ const char *sep;
+ while((sep = uprv_strchr(result, SEP)) != nullptr) {
+ if (_isTKey(result, static_cast<int32_t>(sep - result))) {
+ return result;
+ }
+ result = ++sep;
+ }
+ if (_isTKey(result, -1)) {
+ return result;
+ }
+ return nullptr;
+}
+
static UBool
_isTValue(const char* s, int32_t len)
{
@@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
+
+ if (len < 0) {
+ len = (int32_t)uprv_strlen(s);
+ }
switch (state) {
case kStart:
- if (ultag_isLanguageSubtag(s, len)) {
+ if (ultag_isLanguageSubtag(s, len) && len != 4) {
state = kGotLanguage;
return TRUE;
}
@@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
return;
}
- /* Determine if variants already exists */
- if (ultag_getVariantsSize(langtag)) {
- posixVariant = TRUE;
- }
-
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
@@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
key = ultag_getExtensionKey(langtag, i);
type = ultag_getExtensionValue(langtag, i);
if (*key == LDMLEXT) {
+ /* Determine if variants already exists */
+ if (ultag_getVariantsSize(langtag)) {
+ posixVariant = TRUE;
+ }
+
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
if (U_FAILURE(*status)) {
break;
@@ -2028,7 +2047,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(tagBuf, tag, tagLen);
+
+ if (tagLen > 0) {
+ uprv_memcpy(tagBuf, tag, tagLen);
+ }
*(tagBuf + tagLen) = 0;
/* create a ULanguageTag */
@@ -2692,8 +2714,7 @@ ulocimp_toLanguageTag(const char* localeID,
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
- static const char PREFIX[] = { PRIVATEUSE, SEP };
- sink.Append(PREFIX, sizeof(PREFIX));
+ sink.Append("und-x-", 6);
sink.Append(buf.data(), buf.length());
done = TRUE;
} else if (strict) {
diff --git a/thirdparty/icu4c/common/ulocimp.h b/thirdparty/icu4c/common/ulocimp.h
index 5691fe9a77..1f796aa213 100644
--- a/thirdparty/icu4c/common/ulocimp.h
+++ b/thirdparty/icu4c/common/ulocimp.h
@@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len);
U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len);
+U_CAPI const char * U_EXPORT2
+ultag_getTKeyStart(const char *localeID);
+
U_CFUNC const char*
ulocimp_toBcpKey(const char* key);
diff --git a/thirdparty/icu4c/common/unicode/bytestream.h b/thirdparty/icu4c/common/unicode/bytestream.h
index 044f7a77e7..9735ee0bf8 100644
--- a/thirdparty/icu4c/common/unicode/bytestream.h
+++ b/thirdparty/icu4c/common/unicode/bytestream.h
@@ -71,7 +71,6 @@ public:
*/
virtual void Append(const char* bytes, int32_t n) = 0;
-#ifndef U_HIDE_DRAFT_API
/**
* Appends n bytes to this. Same as Append().
* Call AppendU8() with u8"string literals" which are const char * in C++11
@@ -81,7 +80,7 @@ public:
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
- * @draft ICU 67
+ * @stable ICU 67
*/
inline void AppendU8(const char* bytes, int32_t n) {
Append(bytes, n);
@@ -97,13 +96,12 @@ public:
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
- * @draft ICU 67
+ * @stable ICU 67
*/
inline void AppendU8(const char8_t* bytes, int32_t n) {
Append(reinterpret_cast<const char*>(bytes), n);
}
#endif
-#endif // U_HIDE_DRAFT_API
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
diff --git a/thirdparty/icu4c/common/unicode/bytestrie.h b/thirdparty/icu4c/common/unicode/bytestrie.h
index 85f802df42..271a81d1b4 100644
--- a/thirdparty/icu4c/common/unicode/bytestrie.h
+++ b/thirdparty/icu4c/common/unicode/bytestrie.h
@@ -30,6 +30,8 @@
#include "unicode/uobject.h"
#include "unicode/ustringtrie.h"
+class BytesTrieTest;
+
U_NAMESPACE_BEGIN
class ByteSink;
@@ -378,6 +380,7 @@ public:
private:
friend class BytesTrieBuilder;
+ friend class ::BytesTrieTest;
/**
* Constructs a BytesTrie reader instance.
diff --git a/thirdparty/icu4c/common/unicode/bytestriebuilder.h b/thirdparty/icu4c/common/unicode/bytestriebuilder.h
index cae16e48b4..3cff89e443 100644
--- a/thirdparty/icu4c/common/unicode/bytestriebuilder.h
+++ b/thirdparty/icu4c/common/unicode/bytestriebuilder.h
@@ -30,6 +30,8 @@
#include "unicode/stringpiece.h"
#include "unicode/stringtriebuilder.h"
+class BytesTrieTest;
+
U_NAMESPACE_BEGIN
class BytesTrieElement;
@@ -125,6 +127,8 @@ public:
BytesTrieBuilder &clear();
private:
+ friend class ::BytesTrieTest;
+
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
@@ -168,6 +172,7 @@ private:
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
virtual int32_t writeDeltaTo(int32_t jumpTarget);
+ static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
BytesTrieElement *elements;
diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h
index edcb5d4e83..e82678c95f 100644
--- a/thirdparty/icu4c/common/unicode/docmain.h
+++ b/thirdparty/icu4c/common/unicode/docmain.h
@@ -15,7 +15,7 @@
* \file
* \brief (Non API- contains Doxygen definitions)
*
- * This file contains documentation for Doxygen and doesnot have
+ * This file contains documentation for Doxygen and does not have
* any significance with respect to C or C++ API
*/
@@ -74,7 +74,7 @@
* </tr>
* <tr>
* <td>Strings and Character Iteration</td>
- * <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td>
+ * <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
* </tr>
* <tr>
@@ -128,9 +128,9 @@
* <td>icu::Normalizer2</td>
* </tr>
* <tr>
- * <td>Calendars</td>
+ * <td>Calendars and Time Zones</td>
* <td>ucal.h</td>
- * <td>icu::Calendar</td>
+ * <td>icu::Calendar, icu::TimeZone</td>
* </tr>
* <tr>
* <td>Date and Time Formatting</td>
diff --git a/thirdparty/icu4c/common/unicode/icuplug.h b/thirdparty/icu4c/common/unicode/icuplug.h
index 52f810da57..205af360d4 100644
--- a/thirdparty/icu4c/common/unicode/icuplug.h
+++ b/thirdparty/icu4c/common/unicode/icuplug.h
@@ -117,14 +117,13 @@
/* === Basic types === */
#ifndef U_HIDE_INTERNAL_API
+struct UPlugData;
/**
* @{
- * Opaque structure passed to/from a plugin.
- * use the APIs to access it.
+ * Typedef for opaque structure passed to/from a plugin.
+ * Use the APIs to access it.
* @internal ICU 4.4 Technology Preview
*/
-
-struct UPlugData;
typedef struct UPlugData UPlugData;
/** @} */
diff --git a/thirdparty/icu4c/common/unicode/localematcher.h b/thirdparty/icu4c/common/unicode/localematcher.h
index 63a68b0b7f..0cd068ef32 100644
--- a/thirdparty/icu4c/common/unicode/localematcher.h
+++ b/thirdparty/icu4c/common/unicode/localematcher.h
@@ -91,8 +91,6 @@ enum ULocMatchDemotion {
typedef enum ULocMatchDemotion ULocMatchDemotion;
#endif
-#ifndef U_FORCE_HIDE_DRAFT_API
-
/**
* Builder option for whether to include or ignore one-way (fallback) match data.
* The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
@@ -108,20 +106,20 @@ typedef enum ULocMatchDemotion ULocMatchDemotion;
* but not if it is merely a fallback.
*
* @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
- * @draft ICU 67
+ * @stable ICU 67
*/
enum ULocMatchDirection {
/**
* Locale matching includes one-way matches such as Breton→French. (default)
*
- * @draft ICU 67
+ * @stable ICU 67
*/
ULOCMATCH_DIRECTION_WITH_ONE_WAY,
/**
* Locale matching limited to two-way matches including e.g. Danish↔Norwegian
* but ignoring one-way matches.
*
- * @draft ICU 67
+ * @stable ICU 67
*/
ULOCMATCH_DIRECTION_ONLY_TWO_WAY
};
@@ -129,8 +127,6 @@ enum ULocMatchDirection {
typedef enum ULocMatchDirection ULocMatchDirection;
#endif
-#endif // U_FORCE_HIDE_DRAFT_API
-
struct UHashtable;
U_NAMESPACE_BEGIN
@@ -463,14 +459,13 @@ public:
*/
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
-#ifndef U_HIDE_DRAFT_API
/**
* Option for whether to include or ignore one-way (fallback) match data.
* By default, they are included.
*
* @param direction the match direction to set.
* @return this Builder object
- * @draft ICU 67
+ * @stable ICU 67
*/
Builder &setDirection(ULocMatchDirection direction) {
if (U_SUCCESS(errorCode_)) {
@@ -478,7 +473,6 @@ public:
}
return *this;
}
-#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
@@ -704,7 +698,7 @@ private:
LSR *lsrs;
int32_t supportedLocalesLength;
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
- UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
+ UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
// Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
const LSR **supportedLSRs;
diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h
index ba858d702a..81f4685d65 100644
--- a/thirdparty/icu4c/common/unicode/locid.h
+++ b/thirdparty/icu4c/common/unicode/locid.h
@@ -571,15 +571,13 @@ public:
*/
void minimizeSubtags(UErrorCode& status);
-#ifndef U_HIDE_DRAFT_API
/**
* Canonicalize the locale ID of this object according to CLDR.
* @param status the status code
- * @draft ICU 67
+ * @stable ICU 67
* @see createCanonical
*/
void canonicalize(UErrorCode& status);
-#endif // U_HIDE_DRAFT_API
/**
* Gets the list of keywords for the specified locale.
diff --git a/thirdparty/icu4c/common/unicode/normalizer2.h b/thirdparty/icu4c/common/unicode/normalizer2.h
index 5eb1d95caf..2d355250c2 100644
--- a/thirdparty/icu4c/common/unicode/normalizer2.h
+++ b/thirdparty/icu4c/common/unicode/normalizer2.h
@@ -225,10 +225,8 @@ public:
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
- * Currently implemented completely only for "compose" modes,
- * such as for NFC, NFKC, and NFKC_Casefold
- * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
- * Otherwise currently converts to & from UTF-16 and does not support edits.
+ * Implemented completely for all built-in modes except for FCD.
+ * The base class implementation converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
@@ -381,11 +379,9 @@ public:
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
- * This works for all normalization modes,
- * but it is currently optimized for UTF-8 only for "compose" modes,
- * such as for NFC, NFKC, and NFKC_Casefold
- * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
- * For other modes it currently converts to UTF-16 and calls isNormalized().
+ * This works for all normalization modes.
+ * It is optimized for UTF-8 for all built-in modes except for FCD.
+ * The base class implementation converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
@@ -543,10 +539,8 @@ public:
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
- * Currently implemented completely only for "compose" modes,
- * such as for NFC, NFKC, and NFKC_Casefold
- * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
- * Otherwise currently converts to & from UTF-16 and does not support edits.
+ * Implemented completely for most built-in modes except for FCD.
+ * The base class implementation converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
@@ -676,11 +670,9 @@ public:
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
- * This works for all normalization modes,
- * but it is currently optimized for UTF-8 only for "compose" modes,
- * such as for NFC, NFKC, and NFKC_Casefold
- * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
- * For other modes it currently converts to UTF-16 and calls isNormalized().
+ * This works for all normalization modes.
+ * It is optimized for UTF-8 for all built-in modes except for FCD.
+ * The base class implementation converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
diff --git a/thirdparty/icu4c/common/unicode/platform.h b/thirdparty/icu4c/common/unicode/platform.h
index 2bb2f8b318..cb3a833fef 100644
--- a/thirdparty/icu4c/common/unicode/platform.h
+++ b/thirdparty/icu4c/common/unicode/platform.h
@@ -880,6 +880,6 @@ namespace std {
#else
# define U_CALLCONV_FPTR
#endif
-/* @} */
+/** @} */
#endif // _PLATFORM_H
diff --git a/thirdparty/icu4c/common/unicode/stringpiece.h b/thirdparty/icu4c/common/unicode/stringpiece.h
index 7d7d871e1f..8c96789e73 100644
--- a/thirdparty/icu4c/common/unicode/stringpiece.h
+++ b/thirdparty/icu4c/common/unicode/stringpiece.h
@@ -75,12 +75,11 @@ class U_COMMON_API StringPiece : public UMemory {
* @stable ICU 4.2
*/
StringPiece(const char* str);
-#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a NUL-terminated const char8_t * pointer.
* @param str a NUL-terminated const char8_t * pointer
- * @draft ICU 67
+ * @stable ICU 67
*/
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
#endif
@@ -88,10 +87,9 @@ class U_COMMON_API StringPiece : public UMemory {
* Constructs an empty StringPiece.
* Needed for type disambiguation from multiple other overloads.
* @param p nullptr
- * @draft ICU 67
+ * @stable ICU 67
*/
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
-#endif // U_HIDE_DRAFT_API
/**
* Constructs from a std::string.
@@ -99,17 +97,15 @@ class U_COMMON_API StringPiece : public UMemory {
*/
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
-#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a std::u8string.
- * @draft ICU 67
+ * @stable ICU 67
*/
StringPiece(const std::u8string& str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) { }
#endif
-#endif // U_HIDE_DRAFT_API
/**
* Constructs from some other implementation of a string piece class, from any
@@ -152,18 +148,16 @@ class U_COMMON_API StringPiece : public UMemory {
* @stable ICU 4.2
*/
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
-#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a const char8_t * pointer and a specified length.
* @param str a const char8_t * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
- * @draft ICU 67
+ * @stable ICU 67
*/
StringPiece(const char8_t* str, int32_t len) :
StringPiece(reinterpret_cast<const char*>(str), len) {}
#endif
-#endif // U_HIDE_DRAFT_API
/**
* Substring of another StringPiece.
@@ -233,13 +227,12 @@ class U_COMMON_API StringPiece : public UMemory {
*/
void set(const char* str);
-#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Resets the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be NUL-terminated.
* @param len the length of the new data
- * @draft ICU 67
+ * @stable ICU 67
*/
inline void set(const char8_t* xdata, int32_t len) {
set(reinterpret_cast<const char*>(xdata), len);
@@ -248,13 +241,12 @@ class U_COMMON_API StringPiece : public UMemory {
/**
* Resets the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
- * @draft ICU 67
+ * @stable ICU 67
*/
inline void set(const char8_t* str) {
set(reinterpret_cast<const char*>(str));
}
#endif
-#endif // U_HIDE_DRAFT_API
/**
* Removes the first n string units.
@@ -286,13 +278,12 @@ class U_COMMON_API StringPiece : public UMemory {
}
}
-#ifndef U_HIDE_DRAFT_API
/**
* Searches the StringPiece for the given search string (needle);
* @param needle The string for which to search.
* @param offset Where to start searching within this string (haystack).
* @return The offset of needle in haystack, or -1 if not found.
- * @draft ICU 67
+ * @stable ICU 67
*/
int32_t find(StringPiece needle, int32_t offset);
@@ -301,10 +292,9 @@ class U_COMMON_API StringPiece : public UMemory {
* similar to std::string::compare().
* @param other The string to compare to.
* @return below zero if this < other; above zero if this > other; 0 if this == other.
- * @draft ICU 67
+ * @stable ICU 67
*/
int32_t compare(StringPiece other);
-#endif // U_HIDE_DRAFT_API
/**
* Maximum integer, used as a default value for substring methods.
diff --git a/thirdparty/icu4c/common/unicode/ubrk.h b/thirdparty/icu4c/common/unicode/ubrk.h
index 37189a8598..1249b0b160 100644
--- a/thirdparty/icu4c/common/unicode/ubrk.h
+++ b/thirdparty/icu4c/common/unicode/ubrk.h
@@ -296,6 +296,8 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
const UChar * text, int32_t textLength,
UErrorCode * status);
+#ifndef U_HIDE_DEPRECATED_API
+
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
@@ -312,7 +314,7 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
* @return pointer to the new clone
- * @stable ICU 2.0
+ * @deprecated ICU 69 Use ubrk_clone() instead.
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(
@@ -321,6 +323,23 @@ ubrk_safeClone(
int32_t *pBufferSize,
UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Thread safe cloning operation.
+ * @param bi iterator to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @draft ICU 69
+ */
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_clone(const UBreakIterator *bi,
+ UErrorCode *status);
+
+#endif // U_HIDE_DRAFT_API
+
#ifndef U_HIDE_DEPRECATED_API
/**
diff --git a/thirdparty/icu4c/common/unicode/ucnv.h b/thirdparty/icu4c/common/unicode/ucnv.h
index 58f271cfb5..5d784990f2 100644
--- a/thirdparty/icu4c/common/unicode/ucnv.h
+++ b/thirdparty/icu4c/common/unicode/ucnv.h
@@ -1699,10 +1699,10 @@ ucnv_countAvailable(void);
/**
* Gets the canonical converter name of the specified converter from a list of
- * all available converters contaied in the alias file. All converters
+ * all available converters contained in the alias file. All converters
* in this list can be opened.
*
- * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
* @see ucnv_countAvailable
* @stable ICU 2.0
diff --git a/thirdparty/icu4c/common/unicode/ucnvsel.h b/thirdparty/icu4c/common/unicode/ucnvsel.h
index 5e0a71cf35..3d7d3327f7 100644
--- a/thirdparty/icu4c/common/unicode/ucnvsel.h
+++ b/thirdparty/icu4c/common/unicode/ucnvsel.h
@@ -45,11 +45,11 @@
* from the serialized form.
*/
+struct UConverterSelector;
/**
* @{
- * The selector data structure
+ * Typedef for selector data structure.
*/
-struct UConverterSelector;
typedef struct UConverterSelector UConverterSelector;
/** @} */
diff --git a/thirdparty/icu4c/common/unicode/unifilt.h b/thirdparty/icu4c/common/unicode/unifilt.h
index 420e1a1905..7870b55939 100644
--- a/thirdparty/icu4c/common/unicode/unifilt.h
+++ b/thirdparty/icu4c/common/unicode/unifilt.h
@@ -40,8 +40,8 @@ U_NAMESPACE_BEGIN
*
* <code>UnicodeFilter</code> defines a protocol for selecting a
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
- * Currently, filters are used in conjunction with classes like {@link
- * Transliterator} to only process selected characters through a
+ * Currently, filters are used in conjunction with classes like
+ * {@link Transliterator} to only process selected characters through a
* transformation.
*
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h
index 50b6360f3a..8403c4026c 100644
--- a/thirdparty/icu4c/common/unicode/uniset.h
+++ b/thirdparty/icu4c/common/unicode/uniset.h
@@ -178,8 +178,6 @@ class RuleCharacterIterator;
* Unicode property
* </table>
*
- * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
- *
* <p><b>Formal syntax</b></p>
*
* \htmlonly<blockquote>\endhtmlonly
@@ -601,7 +599,7 @@ public:
/**
* Make this object represent the range `start - end`.
- * If `end > start` then this object is set to an empty range.
+ * If `start > end` then this object is set to an empty range.
* A frozen set will not be modified.
*
* @param start first character in the set, inclusive
@@ -1077,7 +1075,7 @@ public:
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
- * the call leaves this set unchanged. If <code>end > start</code>
+ * the call leaves this set unchanged. If <code>start > end</code>
* then an empty range is added, leaving the set unchanged.
* This is equivalent to a boolean logic OR, or a set UNION.
* A frozen set will not be modified.
@@ -1095,6 +1093,9 @@ public:
* present. If this set already contains the specified character,
* the call leaves this set unchanged.
* A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& add(UChar32 c);
@@ -1104,8 +1105,8 @@ public:
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
* A frozen set will not be modified.
+ *
* @param s the source string
* @return this object, for chaining
* @stable ICU 2.4
@@ -1124,8 +1125,8 @@ public:
public:
/**
- * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
+ * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
+ * If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@@ -1135,7 +1136,6 @@ public:
/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@@ -1145,7 +1145,6 @@ public:
/**
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@@ -1155,7 +1154,6 @@ public:
/**
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@@ -1165,7 +1163,7 @@ public:
/**
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ *
* @param s the source string
* @return a newly created set containing the given string.
* The caller owns the return object and is responsible for deleting it.
@@ -1185,15 +1183,13 @@ public:
/**
* Retain only the elements in this set that are contained in the
- * specified range. If <code>end > start</code> then an empty range is
+ * specified range. If <code>start > end</code> then an empty range is
* retained, leaving the set empty. This is equivalent to
* a boolean logic AND, or a set INTERSECTION.
* A frozen set will not be modified.
*
- * @param start first character, inclusive, of range to be retained
- * to this set.
- * @param end last character, inclusive, of range to be retained
- * to this set.
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
@@ -1202,14 +1198,31 @@ public:
/**
* Retain the specified character from this set if it is present.
* A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& retain(UChar32 c);
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Retains only the specified string from this set if it is present.
+ * Upon return this set will be empty if it did not contain s, or
+ * will only contain s if it did contain s.
+ * A frozen set will not be modified.
+ *
+ * @param s the source string
+ * @return this object, for chaining
+ * @draft ICU 69
+ */
+ UnicodeSet& retain(const UnicodeString &s);
+#endif // U_HIDE_DRAFT_API
+
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
- * returns. If <code>end > start</code> then an empty range is
+ * returns. If <code>start > end</code> then an empty range is
* removed, leaving the set unchanged.
* A frozen set will not be modified.
*
@@ -1226,6 +1239,9 @@ public:
* The set will not contain the specified range once the call
* returns.
* A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& remove(UChar32 c);
@@ -1253,15 +1269,13 @@ public:
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
- * added if it is not in this set. If <code>end > start</code>
+ * added if it is not in this set. If <code>start > end</code>
* then an empty range is complemented, leaving the set unchanged.
* This is equivalent to a boolean logic XOR.
* A frozen set will not be modified.
*
- * @param start first character, inclusive, of range to be removed
- * from this set.
- * @param end last character, inclusive, of range to be removed
- * from this set.
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
@@ -1271,16 +1285,18 @@ public:
* will be removed if it is in this set, or will be added if it is
* not in this set.
* A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& complement(UChar32 c);
/**
* Complement the specified string in this set.
- * The set will not contain the specified string once the call
- * returns.
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ * The string will be removed if it is in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
+ *
* @param s the string to complement
* @return this object, for chaining
* @stable ICU 2.4
diff --git a/thirdparty/icu4c/common/unicode/unistr.h b/thirdparty/icu4c/common/unicode/unistr.h
index 456389f265..85bd964951 100644
--- a/thirdparty/icu4c/common/unicode/unistr.h
+++ b/thirdparty/icu4c/common/unicode/unistr.h
@@ -44,9 +44,10 @@ struct UConverter; // unicode/ucnv.h
#ifndef USTRING_H
/**
* \ingroup ustring_ustrlen
+ * @param s Pointer to sequence of UChars.
+ * @return Length of sequence.
*/
-U_CAPI int32_t U_EXPORT2
-u_strlen(const UChar *s);
+U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
#endif
U_NAMESPACE_BEGIN
@@ -2766,7 +2767,6 @@ public:
* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
- * @param options Options bit set, see ucasemap_open().
* @return A reference to this.
* @stable ICU 3.8
*/
@@ -3614,7 +3614,7 @@ private:
// turn a bogus string into an empty one
void unBogus();
- // implements assigment operator, copy constructor, and fastCopyFrom()
+ // implements assignment operator, copy constructor, and fastCopyFrom()
UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
// Copies just the fields without memory management.
diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h
index fe59fdd893..737f4b308e 100644
--- a/thirdparty/icu4c/common/unicode/urename.h
+++ b/thirdparty/icu4c/common/unicode/urename.h
@@ -482,6 +482,7 @@
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
+#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
@@ -534,6 +535,7 @@
#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal)
#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
@@ -962,6 +964,7 @@
#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
+#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey)
#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
@@ -970,6 +973,7 @@
#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
+#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound)
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
@@ -977,12 +981,15 @@
#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
+#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey)
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
+#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound)
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
+#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero)
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
@@ -990,6 +997,7 @@
#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
+#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero)
#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
@@ -1150,6 +1158,8 @@
#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
+#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase)
+#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower)
#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
@@ -1672,6 +1682,9 @@
#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
+#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints)
+#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange)
+#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString)
#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
@@ -1695,12 +1708,15 @@
#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
+#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints)
#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
+#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints)
+#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString)
#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h
index 502ea8dc14..1d0daf9d09 100644
--- a/thirdparty/icu4c/common/unicode/uset.h
+++ b/thirdparty/icu4c/common/unicode/uset.h
@@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen);
/**
- * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
- * If this set already any particular character, it has no effect on that character.
+ * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
+ * If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the source string
@@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
+#endif // U_HIDE_DRAFT_API
+
/**
* Removes from this set all of its elements that are contained in the
* specified set. This operation effectively modifies this
@@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet);
* A frozen set will not be modified.
*
* @param set the object for which to retain only the specified range
- * @param start first character, inclusive, of range to be retained
- * to this set.
- * @param end last character, inclusive, of range to be retained
- * to this set.
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
* @stable ICU 3.2
*/
U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Retains only the specified string from this set if it is present.
+ * Upon return this set will be empty if it did not contain s, or
+ * will only contain s if it did contain s.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_retainString(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
+#endif // U_HIDE_DRAFT_API
+
/**
* Retains only the elements in this set that are contained in the
* specified set. In other words, removes from this set all of
@@ -696,6 +736,49 @@ uset_compact(USet* set);
U_CAPI void U_EXPORT2
uset_complement(USet* set);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If <code>start > end</code>
+ * then an empty range is complemented, leaving the set unchanged.
+ * This is equivalent to a boolean logic XOR.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementRange(USet *set, UChar32 start, UChar32 end);
+
+/**
+ * Complements the specified string in this set.
+ * The string will be removed if it is in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementString(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @draft ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
+#endif // U_HIDE_DRAFT_API
+
/**
* Complements in this set all elements contained in the specified
* set. Any character in the other set will be removed if it is
diff --git a/thirdparty/icu4c/common/unicode/ushape.h b/thirdparty/icu4c/common/unicode/ushape.h
index fed4869abd..14371edc8f 100644
--- a/thirdparty/icu4c/common/unicode/ushape.h
+++ b/thirdparty/icu4c/common/unicode/ushape.h
@@ -323,7 +323,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
/** Presentation form option:
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
- * their unshaped correspondants in range 0+06xx, before shaping.
+ * their unshaped correspondents in range 0+06xx, before shaping.
* @stable ICU 3.6
*/
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
diff --git a/thirdparty/icu4c/common/unicode/utrace.h b/thirdparty/icu4c/common/unicode/utrace.h
index 28c313c582..677486f473 100644
--- a/thirdparty/icu4c/common/unicode/utrace.h
+++ b/thirdparty/icu4c/common/unicode/utrace.h
@@ -173,24 +173,23 @@ typedef enum UTraceFunctionNumber {
UTRACE_RES_DATA_LIMIT,
#endif // U_HIDE_INTERNAL_API
-#ifndef U_HIDE_DRAFT_API
/**
* The lowest break iterator location.
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_START=0x4000,
/**
* Indicates that a character instance of break iterator was created.
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
/**
* Indicates that a word instance of break iterator was created.
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_WORD,
@@ -200,21 +199,21 @@ typedef enum UTraceFunctionNumber {
* Provides one C-style string to UTraceData: the lb value ("",
* "loose", "strict", or "normal").
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_LINE,
/**
* Indicates that a sentence instance of break iterator was created.
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_SENTENCE,
/**
* Indicates that a title instance of break iterator was created.
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_TITLE,
@@ -224,12 +223,10 @@ typedef enum UTraceFunctionNumber {
* Provides one C-style string to UTraceData: the script code of what
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
*
- * @draft ICU 67
+ * @stable ICU 67
*/
UTRACE_UBRK_CREATE_BREAK_ENGINE,
-#endif // U_HIDE_DRAFT_API
-
#ifndef U_HIDE_INTERNAL_API
/**
* One more than the highest normal break iterator trace location.
diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h
index a46481a3fe..b09d4943c1 100644
--- a/thirdparty/icu4c/common/unicode/uvernum.h
+++ b/thirdparty/icu4c/common/unicode/uvernum.h
@@ -60,13 +60,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION_MAJOR_NUM 68
+#define U_ICU_VERSION_MAJOR_NUM 69
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_MINOR_NUM 2
+#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@@ -86,7 +86,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _68
+#define U_ICU_VERSION_SUFFIX _69
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -139,7 +139,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "68.2"
+#define U_ICU_VERSION "69.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@@ -152,13 +152,13 @@
*
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SHORT "68"
+#define U_ICU_VERSION_SHORT "69"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "68.2"
+#define U_ICU_DATA_VERSION "69.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
diff --git a/thirdparty/icu4c/common/uniset.cpp b/thirdparty/icu4c/common/uniset.cpp
index b73d612f24..461e5a7197 100644
--- a/thirdparty/icu4c/common/uniset.cpp
+++ b/thirdparty/icu4c/common/uniset.cpp
@@ -30,24 +30,6 @@
#include "bmpset.h"
#include "unisetspan.h"
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN ((UChar)0x005B) /*[*/
-#define SET_CLOSE ((UChar)0x005D) /*]*/
-#define HYPHEN ((UChar)0x002D) /*-*/
-#define COMPLEMENT ((UChar)0x005E) /*^*/
-#define COLON ((UChar)0x003A) /*:*/
-#define BACKSLASH ((UChar)0x005C) /*\*/
-#define INTERSECTION ((UChar)0x0026) /*&*/
-#define UPPER_U ((UChar)0x0055) /*U*/
-#define LOWER_U ((UChar)0x0075) /*u*/
-#define OPEN_BRACE ((UChar)123) /*{*/
-#define CLOSE_BRACE ((UChar)125) /*}*/
-#define UPPER_P ((UChar)0x0050) /*P*/
-#define LOWER_P ((UChar)0x0070) /*p*/
-#define UPPER_N ((UChar)78) /*N*/
-#define EQUALS ((UChar)0x003D) /*=*/
-
// HIGH_VALUE > all valid values. 110000 for codepoints
#define UNICODESET_HIGH 0x0110000
@@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
* @return <tt>true</tt> if this set contains the specified string
*/
UBool UnicodeSet::contains(const UnicodeString& s) const {
- if (s.length() == 0) return FALSE;
int32_t cp = getSingleCP(s);
if (cp < 0) {
return stringsContains(s);
@@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
if (hasStrings()) {
for (i=0; i<strings->size(); ++i) {
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
- //if (s.length() == 0) {
- // // Empty strings match everything
- // return TRUE;
- //}
- // assert(s.length() != 0); // We enforce this elsewhere
+ if (s.isEmpty()) {
+ continue; // skip the empty string
+ }
UChar32 c = s.char32At(0);
if ((c & 0xFF) == v) {
return TRUE;
@@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
int32_t limit,
UBool incremental) {
if (offset == limit) {
- // Strings, if any, have length != 0, so we don't worry
- // about them here. If we ever allow zero-length strings
- // we much check for them here.
if (contains(U_ETHER)) {
return incremental ? U_PARTIAL_MATCH : U_MATCH;
} else {
@@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
for (i=0; i<strings->size(); ++i) {
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
-
- //if (trial.length() == 0) {
- // return U_MATCH; // null-string always matches
- //}
- // assert(trial.length() != 0); // We ensure this elsewhere
+ if (trial.isEmpty()) {
+ continue; // skip the empty string
+ }
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
@@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ *
* @param s the source string
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (!stringsContains(s)) {
@@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
/**
* Adds the given string, in order, to 'strings'. The given string
- * must have been checked by the caller to not be empty and to not
- * already be in 'strings'.
+ * must have been checked by the caller to not already be in 'strings'.
*/
void UnicodeSet::_add(const UnicodeString& s) {
if (isFrozen() || isBogus()) {
@@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
* @param string to test
*/
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
- //if (s.length() < 1) {
- // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
- //}
- if (s.length() > 2) return -1;
- if (s.length() == 1) return s.charAt(0);
-
- // at this point, len = 2
- UChar32 cp = s.char32At(0);
- if (cp > 0xFFFF) { // is surrogate pair
- return cp;
+ int32_t sLength = s.length();
+ if (sLength == 1) return s.charAt(0);
+ if (sLength == 2) {
+ UChar32 cp = s.char32At(0);
+ if (cp > 0xFFFF) { // is surrogate pair
+ return cp;
+ }
}
return -1;
}
@@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
return retain(c, c);
}
+UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
+ if (isFrozen() || isBogus()) { return *this; }
+ UChar32 cp = getSingleCP(s);
+ if (cp < 0) {
+ bool isIn = stringsContains(s);
+ // Check for getRangeCount() first to avoid somewhat-expensive size()
+ // when there are single code points.
+ if (isIn && getRangeCount() == 0 && size() == 1) {
+ return *this;
+ }
+ clear();
+ if (isIn) {
+ _add(s);
+ }
+ } else {
+ retain(cp, cp);
+ }
+ return *this;
+}
+
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
@@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (strings != nullptr && strings->removeElement((void*) &s)) {
@@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) {
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
- * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
+ *
* @param s the string to complement
* @return this object, for chaining
*/
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
- if (s.length() == 0 || isFrozen() || isBogus()) return *this;
+ if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (stringsContains(s)) {
@@ -2001,22 +1991,22 @@ escapeUnprintable) {
}
// Okay to let ':' pass through
switch (c) {
- case SET_OPEN:
- case SET_CLOSE:
- case HYPHEN:
- case COMPLEMENT:
- case INTERSECTION:
- case BACKSLASH:
- case OPEN_BRACE:
- case CLOSE_BRACE:
- case COLON:
+ case u'[':
+ case u']':
+ case u'-':
+ case u'^':
+ case u'&':
+ case u'\\':
+ case u'{':
+ case u'}':
+ case u':':
case SymbolTable::SYMBOL_REF:
- buf.append(BACKSLASH);
+ buf.append(u'\\');
break;
default:
// Escape whitespace
if (PatternProps::isWhiteSpace(c)) {
- buf.append(BACKSLASH);
+ buf.append(u'\\');
}
break;
}
@@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
backslashCount = 0;
} else {
result.append(c);
- if (c == BACKSLASH) {
+ if (c == u'\\') {
++backslashCount;
} else {
backslashCount = 0;
@@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
UBool escapeUnprintable) const
{
- result.append(SET_OPEN);
+ result.append(u'[');
// // Check against the predefined categories. We implicitly build
// // up ALL category sets the first time toPattern() is called.
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
// if (*this == getCategorySet(cat)) {
-// result.append(COLON);
+// result.append(u':');
// result.append(CATEGORY_NAMES, cat*2, 2);
// return result.append(CATEGORY_CLOSE);
// }
@@ -2104,7 +2094,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
getRangeEnd(count-1) == MAX_VALUE) {
// Emit the inverse
- result.append(COMPLEMENT);
+ result.append(u'^');
for (int32_t i = 1; i < count; ++i) {
UChar32 start = getRangeEnd(i-1)+1;
@@ -2112,7 +2102,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
- result.append(HYPHEN);
+ result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@@ -2127,7 +2117,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
- result.append(HYPHEN);
+ result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@@ -2136,14 +2126,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
if (strings != nullptr) {
for (int32_t i = 0; i<strings->size(); ++i) {
- result.append(OPEN_BRACE);
+ result.append(u'{');
_appendToPat(result,
*(const UnicodeString*) strings->elementAt(i),
escapeUnprintable);
- result.append(CLOSE_BRACE);
+ result.append(u'}');
}
}
- return result.append(SET_CLOSE);
+ return result.append(u']');
}
/**
diff --git a/thirdparty/icu4c/common/uniset_props.cpp b/thirdparty/icu4c/common/uniset_props.cpp
index 37277fcb75..8fde5abcdd 100644
--- a/thirdparty/icu4c/common/uniset_props.cpp
+++ b/thirdparty/icu4c/common/uniset_props.cpp
@@ -47,31 +47,6 @@
U_NAMESPACE_USE
-// Define UChar constants using hex for EBCDIC compatibility
-// Used #define to reduce private static exports and memory access time.
-#define SET_OPEN ((UChar)0x005B) /*[*/
-#define SET_CLOSE ((UChar)0x005D) /*]*/
-#define HYPHEN ((UChar)0x002D) /*-*/
-#define COMPLEMENT ((UChar)0x005E) /*^*/
-#define COLON ((UChar)0x003A) /*:*/
-#define BACKSLASH ((UChar)0x005C) /*\*/
-#define INTERSECTION ((UChar)0x0026) /*&*/
-#define UPPER_U ((UChar)0x0055) /*U*/
-#define LOWER_U ((UChar)0x0075) /*u*/
-#define OPEN_BRACE ((UChar)123) /*{*/
-#define CLOSE_BRACE ((UChar)125) /*}*/
-#define UPPER_P ((UChar)0x0050) /*P*/
-#define LOWER_P ((UChar)0x0070) /*p*/
-#define UPPER_N ((UChar)78) /*N*/
-#define EQUALS ((UChar)0x003D) /*=*/
-
-//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
-static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
-//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
-//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
-//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
-static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
-
// Special property set IDs
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
@@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
#define NAME_PROP "na"
#define NAME_PROP_LENGTH 2
-/**
- * Delimiter string used in patterns to close a category reference:
- * ":]". Example: "[:Lu:]".
- */
-//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
-
// Cached sets ------------------------------------------------------------- ***
U_CDECL_BEGIN
@@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
static inline UBool
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
UChar c;
- return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
+ return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
}
/*static inline UBool
isPerlClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==CLOSE_BRACE;
+ return pattern.charAt(pos)==u'}';
}*/
static inline UBool
isNameOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
+ return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
}
static inline UBool
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
+ return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
}
/*static inline UBool
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
- return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
+ return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
}*/
// TODO memory debugging provided inside uniset.cpp
@@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (mode != 2 && !chars.atEnd()) {
U_ASSERT((lastItem == 0 && op == 0) ||
- (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
- (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
- op == INTERSECTION /*'&'*/)));
+ (lastItem == 1 && (op == 0 || op == u'-')) ||
+ (lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
UChar32 c = 0;
UBool literal = FALSE;
@@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5B /*'['*/ && !literal) {
+ if (c == u'[' && !literal) {
if (mode == 1) {
chars.setPos(backup); // backup
setMode = 1;
} else {
// Handle opening '[' delimiter
mode = 1;
- patLocal.append((UChar) 0x5B /*'['*/);
+ patLocal.append(u'[');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5E /*'^'*/ && !literal) {
+ if (c == u'^' && !literal) {
invert = TRUE;
- patLocal.append((UChar) 0x5E /*'^'*/);
+ patLocal.append(u'^');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
}
// Fall through to handle special leading '-';
// otherwise restart loop for nested [], \p{}, etc.
- if (c == HYPHEN /*'-'*/) {
+ if (c == u'-') {
literal = TRUE;
// Fall through to handle literal '-' below
} else {
@@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
op = 0;
}
- if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
+ if (op == u'-' || op == u'&') {
patLocal.append(op);
}
@@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (op) {
- case HYPHEN: /*'-'*/
+ case u'-':
removeAll(*nested);
break;
- case INTERSECTION: /*'&'*/
+ case u'&':
retainAll(*nested);
break;
case 0:
@@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
if (!literal) {
switch (c) {
- case 0x5D /*']'*/:
+ case u']':
if (lastItem == 1) {
add(lastChar, lastChar);
_appendToPat(patLocal, lastChar, FALSE);
}
// Treat final trailing '-' as a literal
- if (op == HYPHEN /*'-'*/) {
+ if (op == u'-') {
add(op, op);
patLocal.append(op);
- } else if (op == INTERSECTION /*'&'*/) {
+ } else if (op == u'&') {
// syntaxError(chars, "Trailing '&'");
ec = U_MALFORMED_SET;
return;
}
- patLocal.append((UChar) 0x5D /*']'*/);
+ patLocal.append(u']');
mode = 2;
continue;
- case HYPHEN /*'-'*/:
+ case u'-':
if (op == 0) {
if (lastItem != 0) {
op = (UChar) c;
@@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(c, c);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x5D /*']'*/ && !literal) {
- patLocal.append(HYPHEN_RIGHT_BRACE, 2);
+ if (c == u']' && !literal) {
+ patLocal.append(u"-]", 2);
mode = 2;
continue;
}
@@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'-' not after char or set");
ec = U_MALFORMED_SET;
return;
- case INTERSECTION /*'&'*/:
+ case u'&':
if (lastItem == 2 && op == 0) {
op = (UChar) c;
continue;
@@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'&' not after set");
ec = U_MALFORMED_SET;
return;
- case 0x5E /*'^'*/:
+ case u'^':
// syntaxError(chars, "'^' not after '['");
ec = U_MALFORMED_SET;
return;
- case 0x7B /*'{'*/:
+ case u'{':
if (op != 0) {
// syntaxError(chars, "Missing operand after operator");
ec = U_MALFORMED_SET;
@@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (!chars.atEnd()) {
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- if (c == 0x7D /*'}'*/ && !literal) {
+ if (c == u'}' && !literal) {
ok = TRUE;
break;
}
buf.append(c);
}
- if (buf.length() < 1 || !ok) {
+ if (!ok) {
// syntaxError(chars, "Invalid multicharacter string");
ec = U_MALFORMED_SET;
return;
@@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// we don't need to drop through to the further
// processing
add(buf);
- patLocal.append((UChar) 0x7B /*'{'*/);
+ patLocal.append(u'{');
_appendToPat(patLocal, buf, FALSE);
- patLocal.append((UChar) 0x7D /*'}'*/);
+ patLocal.append(u'}');
continue;
case SymbolTable::SYMBOL_REF:
// symbols nosymbols
@@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
chars.getPos(backup);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
- UBool anchor = (c == 0x5D /*']'*/ && !literal);
+ UBool anchor = (c == u']' && !literal);
if (symbols == 0 && !anchor) {
c = SymbolTable::SYMBOL_REF;
chars.setPos(backup);
@@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(U_ETHER);
usePat = TRUE;
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
- patLocal.append((UChar) 0x5D /*']'*/);
+ patLocal.append(u']');
mode = 2;
continue;
}
@@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
lastChar = c;
break;
case 1:
- if (op == HYPHEN /*'-'*/) {
+ if (op == u'-') {
if (lastChar >= c) {
// Don't allow redundant (a-a) or empty (b-a) ranges;
// these are most likely typos.
@@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
RuleCharacterIterator::Pos pos;
chars.getPos(pos);
UChar32 c = chars.next(iterOpts, literal, ec);
- if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
+ if (c == u'[' || c == u'\\') {
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
literal, ec);
- result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
- (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
+ result = (c == u'[') ? (d == u':') :
+ (d == u'N' || d == u'p' || d == u'P');
}
chars.setPos(pos);
return result && U_SUCCESS(ec);
@@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
posix = TRUE;
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
+ if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
++pos;
invert = TRUE;
}
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
UChar c = pattern.charAt(pos+1);
- invert = (c == UPPER_P);
- isName = (c == UPPER_N);
+ invert = (c == u'P');
+ isName = (c == u'N');
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
- if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
+ if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
// Syntax error; "\p" or "\P" not followed by "{"
FAIL(ec);
}
@@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for the matching close delimiter, either :] or }
int32_t close;
if (posix) {
- close = pattern.indexOf(POSIX_CLOSE, 2, pos);
+ close = pattern.indexOf(u":]", 2, pos);
} else {
- close = pattern.indexOf(CLOSE_BRACE, pos);
+ close = pattern.indexOf(u'}', pos);
}
if (close < 0) {
// Syntax error; close delimiter missing
@@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for an '=' sign. If this is present, we will parse a
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
// pattern.
- int32_t equals = pattern.indexOf(EQUALS, pos);
+ int32_t equals = pattern.indexOf(u'=', pos);
UnicodeString propName, valueName;
if (equals >= 0 && equals < close && !isName) {
// Equals seen; parse medium/long pattern
diff --git a/thirdparty/icu4c/common/unisetspan.cpp b/thirdparty/icu4c/common/unisetspan.cpp
index 68e44d91ee..fe0d74f5b2 100644
--- a/thirdparty/icu4c/common/unisetspan.cpp
+++ b/thirdparty/icu4c/common/unisetspan.cpp
@@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
UBool thisRelevant;
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
if(spanLength<length16) { // Relevant string.
@@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
- if(spanLength<length16) { // Relevant string.
+ if(spanLength<length16 && length16>0) { // Relevant string.
if(which&UTF16) {
if(which&CONTAINED) {
if(which&FWD) {
@@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
addToSpanNotSet(c);
}
}
- } else { // Irrelevant string.
+ } else { // Irrelevant string. (Also the empty string.)
if(which&UTF8) {
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
uint8_t *s8=utf8+utf8Count;
@@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanLengths[i];
if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanBackLengths[i];
if(overlap==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ if (length16==0) {
+ continue; // skip the empty string
+ }
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
// Try to match the strings at pos.
for(i=0; i<stringsLength; ++i) {
if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
return pos; // There is a set element at pos.
}
@@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
// it is easier and we only need to know whether the string is irrelevant
// which is the same in either array.
if(spanLengths[i]==ALL_CP_CONTAINED) {
- continue; // Irrelevant string.
+ continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
+ U_ASSERT(length>0);
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
return pos; // There is a set element at pos.
}
diff --git a/thirdparty/icu4c/common/uprops.h b/thirdparty/icu4c/common/uprops.h
index 8bf929919f..09830bdeb9 100644
--- a/thirdparty/icu4c/common/uprops.h
+++ b/thirdparty/icu4c/common/uprops.h
@@ -310,55 +310,12 @@ u_isgraphPOSIX(UChar32 c);
U_CFUNC UBool
u_isprintPOSIX(UChar32 c);
-/** Turn a bit index into a bit flag. @internal */
-#define FLAG(n) ((uint32_t)1<<(n))
-
-/** Flags for general categories in the order of UCharCategory. @internal */
-#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
-#define _Lu FLAG(U_UPPERCASE_LETTER)
-#define _Ll FLAG(U_LOWERCASE_LETTER)
-#define _Lt FLAG(U_TITLECASE_LETTER)
-#define _Lm FLAG(U_MODIFIER_LETTER)
-/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
-#define _Mn FLAG(U_NON_SPACING_MARK)
-#define _Me FLAG(U_ENCLOSING_MARK)
-#define _Mc FLAG(U_COMBINING_SPACING_MARK)
-#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
-#define _Nl FLAG(U_LETTER_NUMBER)
-#define _No FLAG(U_OTHER_NUMBER)
-#define _Zs FLAG(U_SPACE_SEPARATOR)
-#define _Zl FLAG(U_LINE_SEPARATOR)
-#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
-#define _Cc FLAG(U_CONTROL_CHAR)
-#define _Cf FLAG(U_FORMAT_CHAR)
-#define _Co FLAG(U_PRIVATE_USE_CHAR)
-#define _Cs FLAG(U_SURROGATE)
-#define _Pd FLAG(U_DASH_PUNCTUATION)
-#define _Ps FLAG(U_START_PUNCTUATION)
-/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
-/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
-#define _Po FLAG(U_OTHER_PUNCTUATION)
-#define _Sm FLAG(U_MATH_SYMBOL)
-#define _Sc FLAG(U_CURRENCY_SYMBOL)
-#define _Sk FLAG(U_MODIFIER_SYMBOL)
-#define _So FLAG(U_OTHER_SYMBOL)
-#define _Pi FLAG(U_INITIAL_PUNCTUATION)
-/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
-
/** Some code points. @internal */
enum {
TAB =0x0009,
LF =0x000a,
FF =0x000c,
CR =0x000d,
- U_A =0x0041,
- U_F =0x0046,
- U_Z =0x005a,
- U_a =0x0061,
- U_f =0x0066,
- U_z =0x007a,
- DEL =0x007f,
- NL =0x0085,
NBSP =0x00a0,
CGJ =0x034f,
FIGURESP=0x2007,
@@ -367,15 +324,6 @@ enum {
ZWJ =0x200d,
RLM =0x200f,
NNBSP =0x202f,
- WJ =0x2060,
- INHSWAP =0x206a,
- NOMDIG =0x206f,
- U_FW_A =0xff21,
- U_FW_F =0xff26,
- U_FW_Z =0xff3a,
- U_FW_a =0xff41,
- U_FW_f =0xff46,
- U_FW_z =0xff5a,
ZWNBSP =0xfeff
};
diff --git a/thirdparty/icu4c/common/uresbund.cpp b/thirdparty/icu4c/common/uresbund.cpp
index 2ece87897d..5ea4187100 100644
--- a/thirdparty/icu4c/common/uresbund.cpp
+++ b/thirdparty/icu4c/common/uresbund.cpp
@@ -92,6 +92,15 @@ static UBool chopLocale(char *name) {
}
/**
+ * Called to check whether a name without '_' needs to be checked for a parent.
+ * Some code had assumed that locale IDs with '_' could not have a non-root parent.
+ * We may want a better way of doing this.
+ */
+static UBool mayHaveParent(char *name) {
+ return (name[0] != 0 && uprv_strstr("nb nn",name) != nullptr);
+}
+
+/**
* Internal function
*/
static void entryIncrease(UResourceDataEntry *entry) {
@@ -529,8 +538,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
char name[], int32_t nameCapacity,
UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
if (U_FAILURE(*status)) { return FALSE; }
- UBool hasChopped = TRUE;
- while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
+ UBool checkParent = TRUE;
+ while (checkParent && t1->fParent == NULL && !t1->fData.noFallback &&
res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
Resource parentRes = res_getResource(&t1->fData, "%%Parent");
if (parentRes != RES_BOGUS) { // An explicit parent was found.
@@ -573,7 +582,7 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
}
}
t1 = t2;
- hasChopped = chopLocale(name);
+ checkParent = chopLocale(name) || mayHaveParent(name);
}
return TRUE;
}
@@ -692,7 +701,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
}
}
}
- if (hasChopped && !isRoot) {
+ if ((hasChopped || mayHaveParent(name)) && !isRoot) {
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
goto finish;
}
@@ -716,7 +725,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
hasRealData = TRUE;
isDefault = TRUE;
// TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
- if (hasChopped && !isRoot) {
+ if ((hasChopped || mayHaveParent(name)) && !isRoot) {
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
goto finish;
}
@@ -1908,6 +1917,8 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
} else {
break;
}
+ } else if (res == RES_BOGUS) {
+ break;
}
} while(*myPath); /* Continue until the whole path is consumed */
}
@@ -3019,7 +3030,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
U_CAPI UBool U_EXPORT2
ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
if(res1==NULL || res2==NULL){
- return res1==res2; /* pointer comparision */
+ return res1==res2; /* pointer comparison */
}
if(res1->fKey==NULL|| res2->fKey==NULL){
return (res1->fKey==res2->fKey);
diff --git a/thirdparty/icu4c/common/uresdata.cpp b/thirdparty/icu4c/common/uresdata.cpp
index ae731e4544..9af081be40 100644
--- a/thirdparty/icu4c/common/uresdata.cpp
+++ b/thirdparty/icu4c/common/uresdata.cpp
@@ -960,14 +960,6 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch
if(URES_IS_TABLE(type)) {
*key = pathP;
t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
- if(t2 == RES_BOGUS) {
- /* if we fail to get the resource by key, maybe we got an index */
- indexR = uprv_strtol(pathP, &closeIndex, 10);
- if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) {
- /* if we indeed have an index, try to get the item by index */
- t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
- } // else t2 is already RES_BOGUS
- }
} else if(URES_IS_ARRAY(type)) {
indexR = uprv_strtol(pathP, &closeIndex, 10);
if(indexR >= 0 && *closeIndex == 0) {
diff --git a/thirdparty/icu4c/common/uresimp.h b/thirdparty/icu4c/common/uresimp.h
index 69d82566fe..f038dedace 100644
--- a/thirdparty/icu4c/common/uresimp.h
+++ b/thirdparty/icu4c/common/uresimp.h
@@ -270,11 +270,13 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
* function can perform fallback on the sub-resources of the table.
* @param resB a resource
* @param inKey a key associated with the requested resource
+ * @param len if not NULL, used to return the length of the string
* @param status: fills in the outgoing error code
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
* could be a non-failing error
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
- * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
+ * @return returns a pointer to a zero-terminated UChar array which lives in a
+ * memory mapped/DLL file.
*/
U_CAPI const UChar* U_EXPORT2
ures_getStringByKeyWithFallback(const UResourceBundle *resB,
diff --git a/thirdparty/icu4c/common/uset.cpp b/thirdparty/icu4c/common/uset.cpp
index eae7981d52..a7e3046dbf 100644
--- a/thirdparty/icu4c/common/uset.cpp
+++ b/thirdparty/icu4c/common/uset.cpp
@@ -117,6 +117,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) {
}
U_CAPI void U_EXPORT2
+uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) {
+ UnicodeString s(length==-1, str, length);
+ ((UnicodeSet*) set)->UnicodeSet::removeAll(s);
+}
+
+U_CAPI void U_EXPORT2
uset_removeAll(USet* set, const USet* remove) {
((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
}
@@ -127,6 +133,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) {
}
U_CAPI void U_EXPORT2
+uset_retainString(USet *set, const UChar *str, int32_t length) {
+ UnicodeString s(length==-1, str, length);
+ ((UnicodeSet*) set)->UnicodeSet::retain(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) {
+ UnicodeString s(length==-1, str, length);
+ ((UnicodeSet*) set)->UnicodeSet::retainAll(s);
+}
+
+U_CAPI void U_EXPORT2
uset_retainAll(USet* set, const USet* retain) {
((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
}
@@ -142,6 +160,23 @@ uset_complement(USet* set) {
}
U_CAPI void U_EXPORT2
+uset_complementRange(USet *set, UChar32 start, UChar32 end) {
+ ((UnicodeSet*) set)->UnicodeSet::complement(start, end);
+}
+
+U_CAPI void U_EXPORT2
+uset_complementString(USet *set, const UChar *str, int32_t length) {
+ UnicodeString s(length==-1, str, length);
+ ((UnicodeSet*) set)->UnicodeSet::complement(s);
+}
+
+U_CAPI void U_EXPORT2
+uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) {
+ UnicodeString s(length==-1, str, length);
+ ((UnicodeSet*) set)->UnicodeSet::complementAll(s);
+}
+
+U_CAPI void U_EXPORT2
uset_complementAll(USet* set, const USet* complement) {
((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
}
diff --git a/thirdparty/icu4c/common/usprep.cpp b/thirdparty/icu4c/common/usprep.cpp
index 8351a77370..874ffc63a8 100644
--- a/thirdparty/icu4c/common/usprep.cpp
+++ b/thirdparty/icu4c/common/usprep.cpp
@@ -575,7 +575,7 @@ usprep_map( const UStringPrepProfile* profile,
}
}else if(type==USPREP_DELETE){
- // just consume the codepoint and contine
+ // just consume the codepoint and continue
continue;
}
//copy the code point into destination
diff --git a/thirdparty/icu4c/common/ustr_wcs.cpp b/thirdparty/icu4c/common/ustr_wcs.cpp
index e9f278e969..89d0762480 100644
--- a/thirdparty/icu4c/common/ustr_wcs.cpp
+++ b/thirdparty/icu4c/common/ustr_wcs.cpp
@@ -364,7 +364,7 @@ _strFromWCS( UChar *dest,
}
/* we have found a null so convert the
- * chunk from begining of non-null char to null
+ * chunk from beginning of non-null char to null
*/
retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
@@ -387,7 +387,7 @@ _strFromWCS( UChar *dest,
* null terminate it and convert wchar_ts to chars
*/
if(nulLen >= _STACK_BUFFER_CAPACITY){
- /* Should rarely occcur */
+ /* Should rarely occur */
/* allocate new buffer buffer */
pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
if(pWStack==NULL){
diff --git a/thirdparty/icu4c/common/utext.cpp b/thirdparty/icu4c/common/utext.cpp
index 763b6684fb..d79f8141bb 100644
--- a/thirdparty/icu4c/common/utext.cpp
+++ b/thirdparty/icu4c/common/utext.cpp
@@ -382,7 +382,7 @@ utext_previous32From(UText *ut, int64_t index) {
//
UChar32 cPrev; // The character preceding cCurr, which is what we will return.
- // Address the chunk containg the position preceding the incoming index
+ // Address the chunk containing the position preceding the incoming index
// A tricky edge case:
// We try to test the requested native index against the chunkNativeStart to determine
// whether the character preceding the one at the index is in the current chunk.
@@ -894,7 +894,7 @@ struct UTF8Buf {
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
- // correspoding offset in filled part of buf.
+ // corresponding offset in filled part of buf.
int32_t align;
};
@@ -1545,7 +1545,7 @@ utf8TextMapOffsetToNative(const UText *ut) {
}
//
-// Map a native index to the corrsponding chunk offset
+// Map a native index to the corresponding chunk offset
//
static int32_t U_CALLCONV
utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
diff --git a/thirdparty/icu4c/common/util.h b/thirdparty/icu4c/common/util.h
index 9c3b76d9ed..b5fac383a2 100644
--- a/thirdparty/icu4c/common/util.h
+++ b/thirdparty/icu4c/common/util.h
@@ -13,10 +13,10 @@
#ifndef ICU_UTIL_H
#define ICU_UTIL_H
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
+#include "charstr.h"
#include "unicode/unistr.h"
-
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
//--------------------------------------------------------------------
// class ICU_Utility
// i18n utility functions, scoped into the class ICU_Utility.
diff --git a/thirdparty/icu4c/common/utracimp.h b/thirdparty/icu4c/common/utracimp.h
index f32fe1db39..945540d25a 100644
--- a/thirdparty/icu4c/common/utracimp.h
+++ b/thirdparty/icu4c/common/utracimp.h
@@ -193,7 +193,7 @@ UPRV_BLOCK_MACRO_BEGIN { \
* Trace statement for each exit point of a function that has a UTRACE_ENTRY()
* statement, and that returns a value.
*
- * @param val The function's return value, int32_t or comatible type.
+ * @param val The function's return value, int32_t or compatible type.
*
* @internal
*/
diff --git a/thirdparty/icu4c/common/uvector.cpp b/thirdparty/icu4c/common/uvector.cpp
index cf19edf646..9c7e74c6d5 100644
--- a/thirdparty/icu4c/common/uvector.cpp
+++ b/thirdparty/icu4c/common/uvector.cpp
@@ -312,7 +312,7 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
} else {
for (i=startIndex; i<count; ++i) {
/* Pointers are not always the same size as ints so to perform
- * a valid comparision we need to know whether we are being
+ * a valid comparison we need to know whether we are being
* provided an int or a pointer. */
if (hint & HINT_KEY_POINTER) {
if (key.pointer == elements[i].pointer) {
@@ -518,7 +518,7 @@ sortiComparator(const void * /*context */, const void *left, const void *right)
}
/**
- * Sort the vector, assuming it constains ints.
+ * Sort the vector, assuming it contains ints.
* (A more general sort would take a comparison function, but it's
* not clear whether UVector's UElementComparator or
* UComparator from uprv_sortAray would be more appropriate.)
diff --git a/thirdparty/icu4c/common/wintz.cpp b/thirdparty/icu4c/common/wintz.cpp
index 580cedadb6..ebf31650c2 100644
--- a/thirdparty/icu4c/common/wintz.cpp
+++ b/thirdparty/icu4c/common/wintz.cpp
@@ -124,10 +124,26 @@ uprv_detectWindowsTimeZone()
// No way to support when DST is turned off and the offset in minutes is not a multiple of 60.
if (utcOffsetMins % 60 == 0) {
char gmtOffsetTz[11] = {}; // "Etc/GMT+dd" is 11-char long with a terminal null.
- // Note '-' before 'utcOffsetMin'. The timezone ID's sign convention
- // is that a timezone ahead of UTC is Etc/GMT-<offset> and a timezone
- // behind UTC is Etc/GMT+<offset>.
- int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", -utcOffsetMins / 60);
+ // Important note on the sign convention for zones:
+ //
+ // From https://en.wikipedia.org/wiki/Tz_database#Area
+ // "In order to conform with the POSIX style, those zone names beginning with "Etc/GMT" have their sign reversed
+ // from the standard ISO 8601 convention. In the "Etc" area, zones west of GMT have a positive sign and those
+ // east have a negative sign in their name (e.g "Etc/GMT-14" is 14 hours ahead of GMT)."
+ //
+ // Regarding the POSIX style, from https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html
+ // "The offset specifies the time value you must add to the local time to get a Coordinated Universal Time value."
+ //
+ // However, the Bias value in DYNAMIC_TIME_ZONE_INFORMATION *already* follows the POSIX convention.
+ //
+ // From https://docs.microsoft.com/en-us/windows/win32/api/timezoneapi/ns-timezoneapi-dynamic_time_zone_information
+ // "The bias is the difference, in minutes, between Coordinated Universal Time (UTC) and
+ // local time. All translations between UTC and local time are based on the following formula:
+ // UTC = local time + bias"
+ //
+ // For example, a time zone that is 3 hours ahead of UTC (UTC+03:00) would have a Bias value of -180, and the
+ // corresponding time zone ID would be "Etc/GMT-3". (So there is no need to negate utcOffsetMins below.)
+ int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", utcOffsetMins / 60);
if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) {
return uprv_strdup(gmtOffsetTz);
}