diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2020-12-18 23:02:48 +0200 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2020-12-18 23:03:46 +0200 |
commit | bf5ca85d619fbb2b4635bb3294ffbf1cdf5e8656 (patch) | |
tree | 0e09c4605d57a4835ec5fab6eccae3ced4c201be | |
parent | 7ad29ed64e850fd43ba7ceb1cfaab4e015ef7b97 (diff) |
ICU: Update to upstream release 68.2
-rw-r--r-- | thirdparty/README.md | 2 | ||||
-rw-r--r-- | thirdparty/icu4c/APIChangeReport.md | 34 | ||||
-rw-r--r-- | thirdparty/icu4c/common/cmemory.h | 14 | ||||
-rw-r--r-- | thirdparty/icu4c/common/locid.cpp | 33 | ||||
-rw-r--r-- | thirdparty/icu4c/common/rbbitblb.cpp | 18 | ||||
-rw-r--r-- | thirdparty/icu4c/common/uloc.cpp | 21 | ||||
-rw-r--r-- | thirdparty/icu4c/common/unicode/docmain.h | 5 | ||||
-rw-r--r-- | thirdparty/icu4c/common/unicode/urename.h | 1 | ||||
-rw-r--r-- | thirdparty/icu4c/common/unicode/uvernum.h | 6 | ||||
-rw-r--r-- | thirdparty/icu4c/common/wintz.cpp | 192 | ||||
-rw-r--r-- | thirdparty/icu4c/icudt68l.dat | bin | 3846608 -> 3846720 bytes |
11 files changed, 275 insertions, 51 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 1db7f5d583..34a198d788 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -206,7 +206,7 @@ Files extracted from upstream source: ## International Components for Unicode - Upstream: https://github.com/unicode-org/icu -- Version: 68.1 +- Version: 68.2 - License: Unicode Files extracted from upstream source: diff --git a/thirdparty/icu4c/APIChangeReport.md b/thirdparty/icu4c/APIChangeReport.md index 0cf9ed5bfc..5385904fd1 100644 --- a/thirdparty/icu4c/APIChangeReport.md +++ b/thirdparty/icu4c/APIChangeReport.md @@ -23,8 +23,10 @@ Removed from ICU 67 | File | API | ICU 67 | ICU 68 | |---|---|---|---| +| fmtable.h | const UFormattable* icu::Formattable::toUFormattable() | StableICU 52 | (missing) | measunit.h | LocalArray<MeasureUnit> icu::MeasureUnit::splitToSingleUnits(int32_t&, UErrorCode&) const | InternalICU 67 | (missing) | measunit.h | int32_t icu::MeasureUnit::getIndex() const | Internal | (missing) +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::resolveUnitPerUnit(const MeasureUnit&, const MeasureUnit&, bool*) | Internal | (missing) | measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::getIndexCount() | Internal | (missing) | measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::internalGetIndexForTypeAndSubtype(const char*, const char*) | Internal | (missing) | nounit.h | UClassID icu::NoUnit::getDynamicClassID() const | DraftICU 60 | (missing) @@ -35,6 +37,7 @@ Removed from ICU 67 | nounit.h | <tt>static</tt> NoUnit icu::NoUnit::permille() | DraftICU 60 | (missing) | nounit.h | <tt>static</tt> UClassID icu::NoUnit::getStaticClassID() | DraftICU 60 | (missing) | nounit.h | void* icu::NoUnit::clone() const | DraftICU 60 | (missing) +| uniset.h | const USet* icu::UnicodeSet::toUSet() | StableICU 4.2 | (missing) ## Deprecated @@ -57,6 +60,7 @@ Changed in ICU 68 (old, new) |---|---|---|---| | bytestrie.h | BytesTrie& icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65 | bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65 +| listformatter.h | <tt>static</tt> ListFormatter* icu::ListFormatter::createInstance(const Locale&, UListFormatterType, UListFormatterWidth, UErrorCode&) | Draft→StableICU 67 | localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 | localematcher.h | Builder& icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&) | Draft→StableICU 65 | localematcher.h | Builder& icu::LocaleMatcher::Builder::operator=(Builder&&) | Draft→StableICU 65 @@ -132,6 +136,13 @@ Changed in ICU 68 (old, new) | ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65 | ucharstrie.h | UCharsTrie& icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65 | ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65 +| ulistformatter.h | UListFormatter* ulistfmt_openForType(const char*, UListFormatterType, UListFormatterWidth, UErrorCode*) | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_AND | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_OR | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_UNITS | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_NARROW | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_SHORT | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_WIDE | Draft→StableICU 67 | uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65 | uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65 | uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65 @@ -151,6 +162,8 @@ Promoted to stable in ICU 68 |---|---|---|---| | bytestrie.h | BytesTrie& icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65 | bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65 +| fmtable.h | UFormattable* icu::Formattable::toUFormattable() | (missing) | StableICU 52 +| listformatter.h | <tt>static</tt> ListFormatter* icu::ListFormatter::createInstance(const Locale&, UListFormatterType, UListFormatterWidth, UErrorCode&) | Draft→StableICU 67 | localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 | localematcher.h | Builder& icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&) | Draft→StableICU 65 | localematcher.h | Builder& icu::LocaleMatcher::Builder::operator=(Builder&&) | Draft→StableICU 65 @@ -224,10 +237,18 @@ Promoted to stable in ICU 68 | ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65 | ucharstrie.h | UCharsTrie& icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65 | ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65 +| ulistformatter.h | UListFormatter* ulistfmt_openForType(const char*, UListFormatterType, UListFormatterWidth, UErrorCode*) | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_AND | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_OR | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_UNITS | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_NARROW | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_SHORT | Draft→StableICU 67 +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_WIDE | Draft→StableICU 67 | uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65 | uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65 | uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65 | uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_WITH_LEGACY_ALIASES | Draft→StableICU 65 +| uniset.h | USet* icu::UnicodeSet::toUSet() | (missing) | StableICU 4.2 | utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_BUNDLE | Draft→StableICU 65 | utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_DATA_FILE | Draft→StableICU 65 | utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_RES_FILE | Draft→StableICU 65 @@ -242,6 +263,7 @@ Added in ICU 68 | dtitvfmt.h | UDisplayContext icu::DateIntervalFormat::getContext(UDisplayContextType, UErrorCode&) const | (missing) | DraftICU 68 | dtitvfmt.h | void icu::DateIntervalFormat::setContext(UDisplayContext, UErrorCode&) | (missing) | DraftICU 68 | dtptngen.h | <tt>static</tt> DateTimePatternGenerator* icu::DateTimePatternGenerator::createInstanceNoStdPat(const Locale&, UErrorCode&) | (missing) | Internal +| fmtable.h | UFormattable* icu::Formattable::toUFormattable() | (missing) | StableICU 52 | localematcher.h | Builder& icu::LocaleMatcher::Builder::setMaxDistance(const Locale&, const Locale&) | (missing) | DraftICU 68 | localematcher.h | Builder& icu::LocaleMatcher::Builder::setNoDefaultLocale() | (missing) | DraftICU 68 | localematcher.h | UBool icu::LocaleMatcher::isMatch(const Locale&, const Locale&, UErrorCode&) const | (missing) | DraftICU 68 @@ -285,12 +307,14 @@ Added in ICU 68 | numberrangeformatter.h | std::pair< StringClass, StringClass > icu::number::FormattedNumberRange::getDecimalNumbers(UErrorCode&) const | (missing) | DraftICU 68 | plurrule.h | UnicodeString icu::PluralRules::select(const number::FormattedNumberRange&, UErrorCode&) const | (missing) | DraftICU 68 | plurrule.h | UnicodeString icu::PluralRules::select(const number::impl::UFormattedNumberRangeData*, UErrorCode&) const | (missing) | Internal +| plurrule.h | int32_t icu::PluralRules::getSamples(const UnicodeString&, FixedDecimal*, int32_t, UErrorCode&) | (missing) | Internal | timezone.h | <tt>static</tt> TimeZone* icu::TimeZone::forLocaleOrDefault(const Locale&) | (missing) | Internal | ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_FORMAL_SYMBOL_NAME | (missing) | DraftICU 68 | ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_VARIANT_SYMBOL_NAME | (missing) | DraftICU 68 | udateintervalformat.h | UDisplayContext udtitvfmt_getContext(const UDateIntervalFormat*, UDisplayContextType, UErrorCode*) | (missing) | DraftICU 68 | udateintervalformat.h | void udtitvfmt_setContext(UDateIntervalFormat*, UDisplayContext, UErrorCode*) | (missing) | DraftICU 68 | umachine.h | <tt>#define</tt> U_DEFINE_FALSE_AND_TRUE | (missing) | InternalICU 68 +| uniset.h | USet* icu::UnicodeSet::toUSet() | (missing) | StableICU 4.2 | unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_AUTO | (missing) | DraftICU 68 | unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_MIN2 | (missing) | DraftICU 68 | unumberformatter.h | <tt>enum</tt> UNumberUnitWidth::UNUM_UNIT_WIDTH_FORMAL | (missing) | DraftICU 68 @@ -317,7 +341,6 @@ Other existing drafts in ICU 68 | bytestream.h | void icu::ByteSink::AppendU8(const char*, int32_t) | DraftICU 67 | | bytestream.h | void icu::ByteSink::AppendU8(const char8_t*, int32_t) | DraftICU 67 | | dtptngen.h | UDateFormatHourCycle icu::DateTimePatternGenerator::getDefaultHourCycle(UErrorCode&) const | DraftICU 67 | -| listformatter.h | <tt>static</tt> ListFormatter* icu::ListFormatter::createInstance(const Locale&, UListFormatterType, UListFormatterWidth, UErrorCode&) | DraftICU 67 | | localematcher.h | Builder& icu::LocaleMatcher::Builder::setDirection(ULocMatchDirection) | DraftICU 67 | | localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_ONLY_TWO_WAY | DraftICU 67 | | localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_WITH_ONE_WAY | DraftICU 67 | @@ -349,13 +372,6 @@ Other existing drafts in ICU 68 | udateintervalformat.h | void udtitvfmt_formatCalendarToResult(const UDateIntervalFormat*, UCalendar*, UCalendar*, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 | | udateintervalformat.h | void udtitvfmt_formatToResult(const UDateIntervalFormat*, UDate, UDate, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 | | udatpg.h | UDateFormatHourCycle udatpg_getDefaultHourCycle(const UDateTimePatternGenerator*, UErrorCode*) | DraftICU 67 | -| ulistformatter.h | UListFormatter* ulistfmt_openForType(const char*, UListFormatterType, UListFormatterWidth, UErrorCode*) | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_AND | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_OR | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_UNITS | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_NARROW | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_SHORT | DraftICU 67 | -| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_WIDE | DraftICU 67 | | uregex.h | <tt>enum</tt> URegexpFlag::UREGEX_CANON_EQ | DraftICU 2.4 | | utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_BREAK_ENGINE | DraftICU 67 | | utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_CHARACTER | DraftICU 67 | @@ -373,7 +389,7 @@ This section shows cases where the signature was "simplified" for the sake of co ## Colophon -Contents generated by StableAPI tool on Wed Sep 30 17:44:26 PDT 2020 +Contents generated by StableAPI tool on Fri Oct 23 11:32:42 PDT 2020 Copyright © 2019 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html diff --git a/thirdparty/icu4c/common/cmemory.h b/thirdparty/icu4c/common/cmemory.h index 210bc7645e..a9d9424b4e 100644 --- a/thirdparty/icu4c/common/cmemory.h +++ b/thirdparty/icu4c/common/cmemory.h @@ -725,9 +725,14 @@ public: } MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { - fCount = other.fCount; - fPool = std::move(other.fPool); - other.fCount = 0; + // Since `this` may contain instances that need to be deleted, we can't + // just throw them away and replace them with `other`. The normal way of + // dealing with this in C++ is to swap `this` and `other`, rather than + // simply overwrite: the destruction of `other` can then take care of + // running MemoryPool::~MemoryPool() over the still-to-be-deallocated + // instances. + std::swap(fCount, other.fCount); + std::swap(fPool, other.fPool); return *this; } @@ -796,9 +801,6 @@ protected: template<typename T, int32_t stackCapacity = 8> class MaybeStackVector : protected MemoryPool<T, stackCapacity> { public: - using MemoryPool<T, stackCapacity>::MemoryPool; - using MemoryPool<T, stackCapacity>::operator=; - template<typename... Args> T* emplaceBack(Args&&... args) { return this->create(args...); diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp index 2804e36bf6..874e4a7055 100644 --- a/thirdparty/icu4c/common/locid.cpp +++ b/thirdparty/icu4c/common/locid.cpp @@ -35,6 +35,7 @@ #include "unicode/bytestream.h" #include "unicode/locid.h" +#include "unicode/localebuilder.h" #include "unicode/strenum.h" #include "unicode/stringpiece.h" #include "unicode/uloc.h" @@ -1028,7 +1029,7 @@ public: // place the the replaced locale ID in out and return true. // Otherwise return false for no replacement or error. bool replace( - const Locale& locale, CharString& out, UErrorCode status); + const Locale& locale, CharString& out, UErrorCode& status); private: const char* language; @@ -1336,10 +1337,13 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) // Cannot use nullptr for language because that will construct // the default locale, in that case, use "und" to get the correct // locale. - Locale l(language == nullptr ? "und" : language, nullptr, script); + Locale l = LocaleBuilder() + .setLanguage(language == nullptr ? "und" : language) + .setScript(script) + .build(status); l.addLikelySubtags(status); const char* likelyRegion = l.getCountry(); - CharString* item = nullptr; + LocalPointer<CharString> item; if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) { size_t len = uprv_strlen(likelyRegion); const char* foundInReplacement = uprv_strstr(replacement, @@ -1351,20 +1355,22 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) *(foundInReplacement-1) == ' '); U_ASSERT(foundInReplacement[len] == ' ' || foundInReplacement[len] == '\0'); - item = new CharString(foundInReplacement, (int32_t)len, status); + item.adoptInsteadAndCheckErrorCode( + new CharString(foundInReplacement, (int32_t)len, status), status); } } - if (item == nullptr) { - item = new CharString(replacement, - (int32_t)(firstSpace - replacement), status); + if (item.isNull() && U_SUCCESS(status)) { + item.adoptInsteadAndCheckErrorCode( + new CharString(replacement, + (int32_t)(firstSpace - replacement), status), status); } if (U_FAILURE(status)) { return false; } - if (item == nullptr) { + if (item.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; return false; } replacedRegion = item->data(); - toBeFreed.addElement(item, status); + toBeFreed.addElement(item.orphan(), status); } U_ASSERT(!same(region, replacedRegion)); region = replacedRegion; @@ -1453,7 +1459,7 @@ AliasReplacer::outputToString( int32_t variantsStart = out.length(); for (int32_t i = 0; i < variants.size(); i++) { out.append(SEP_CHAR, status) - .append((const char*)((UVector*)variants.elementAt(i)), + .append((const char*)(variants.elementAt(i)), status); } T_CString_toUpperCase(out.data() + variantsStart); @@ -1470,7 +1476,7 @@ AliasReplacer::outputToString( } bool -AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) +AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status) { data = AliasData::singleton(status); if (U_FAILURE(status)) { @@ -2453,9 +2459,13 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro if (U_FAILURE(status)) { return; } + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; + } int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY); int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName, bufferLength, &status) + 1; + U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); /* Handle the case the current buffer is not enough to hold the new id */ if (status == U_BUFFER_OVERFLOW_ERROR) { U_ASSERT(newLength > bufferLength); @@ -2472,6 +2482,7 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro fullName = newFullName; status = U_ZERO_ERROR; uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status); + U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING); } else { U_ASSERT(newLength <= bufferLength); } diff --git a/thirdparty/icu4c/common/rbbitblb.cpp b/thirdparty/icu4c/common/rbbitblb.cpp index bcbdab9227..70e260fc08 100644 --- a/thirdparty/icu4c/common/rbbitblb.cpp +++ b/thirdparty/icu4c/common/rbbitblb.cpp @@ -1402,12 +1402,13 @@ void RBBITableBuilder::exportTable(void *where) { U_ASSERT (sd->fAccepting <= 255); U_ASSERT (sd->fLookAhead <= 255); U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255); - row->r8.fAccepting = sd->fAccepting; - row->r8.fLookAhead = sd->fLookAhead; - row->r8.fTagsIdx = sd->fTagsIdx; + RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row; + r8->fAccepting = sd->fAccepting; + r8->fLookAhead = sd->fLookAhead; + r8->fTagsIdx = sd->fTagsIdx; for (col=0; col<catCount; col++) { U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable); - row->r8.fNextState[col] = sd->fDtran->elementAti(col); + r8->fNextState[col] = sd->fDtran->elementAti(col); } } else { U_ASSERT (sd->fAccepting <= 0xffff); @@ -1603,12 +1604,13 @@ void RBBITableBuilder::exportSafeTable(void *where) { UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state); RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); if (use8BitsForSafeTable()) { - row->r8.fAccepting = 0; - row->r8.fLookAhead = 0; - row->r8.fTagsIdx = 0; + RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row; + r8->fAccepting = 0; + r8->fLookAhead = 0; + r8->fTagsIdx = 0; for (col=0; col<catCount; col++) { U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable); - row->r8.fNextState[col] = static_cast<uint8_t>(rowString->charAt(col)); + r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col)); } } else { row->r16.fAccepting = 0; diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp index 522f33dbe2..ebfbb50650 100644 --- a/thirdparty/icu4c/common/uloc.cpp +++ b/thirdparty/icu4c/common/uloc.cpp @@ -877,6 +877,9 @@ uloc_setKeywordValue(const char* keywordName, if(U_FAILURE(*status)) { return -1; } + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -914,6 +917,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere = (char*)locale_getKeywordsStart(buffer); if(startSearchHere == NULL || (startSearchHere[1]==0)) { if(keywordValueLen == 0) { /* no keywords = nothing to remove */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } @@ -933,6 +937,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere += keywordNameLen; *startSearchHere++ = '='; uprv_strcpy(startSearchHere, keywordValueBuffer); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return needLen; } /* end shortcut - no @ */ @@ -1047,13 +1052,27 @@ uloc_setKeywordValue(const char* keywordName, if (!handledInputKeyAndValue || U_FAILURE(*status)) { /* if input key/value specified removal of a keyword not present in locale, or * there was an error in CharString.append, leave original locale alone. */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } // needLen = length of the part before '@' needLen = (int32_t)(startSearchHere - buffer); - return needLen + updatedKeysAndValues.extract( + // Check to see can we fit the startSearchHere, if not, return + // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it. + // We do this because this API function does not behave like most others: + // It promises never to set a U_STRING_NOT_TERMINATED_WARNING. + // When the contents fits but without the terminating NUL, in this case we need to not change + // the buffer contents and return with a buffer overflow error. + int32_t appendLength = updatedKeysAndValues.length(); + if (appendLength >= bufferCapacity - needLen) { + *status = U_BUFFER_OVERFLOW_ERROR; + return needLen + appendLength; + } + needLen += updatedKeysAndValues.extract( startSearchHere, bufferCapacity - needLen, *status); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); + return needLen; } /* ### ID parsing implementation **************************************************/ diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h index b7984ada03..edcb5d4e83 100644 --- a/thirdparty/icu4c/common/unicode/docmain.h +++ b/thirdparty/icu4c/common/unicode/docmain.h @@ -143,6 +143,11 @@ * <td>icu::MessageFormat</td> * </tr> * <tr> + * <td>List Formatting</td> + * <td>ulistformatter.h</td> + * <td>icu::ListFormatter</td> + * </tr> + * <tr> * <td>Number Formatting<br/>(includes currency and unit formatting)</td> * <td>unumberformatter.h, unum.h</td> * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td> diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h index 20232cd209..fe59fdd893 100644 --- a/thirdparty/icu4c/common/unicode/urename.h +++ b/thirdparty/icu4c/common/unicode/urename.h @@ -1137,6 +1137,7 @@ #define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag) #define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) #define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) +#define ultag_getTKeyStart U_ICU_ENTRY_POINT_RENAME(ultag_getTKeyStart) #define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags) #define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag) #define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags) diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h index a4cbb9e0fe..a46481a3fe 100644 --- a/thirdparty/icu4c/common/unicode/uvernum.h +++ b/thirdparty/icu4c/common/unicode/uvernum.h @@ -66,7 +66,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_MINOR_NUM 1 +#define U_ICU_VERSION_MINOR_NUM 2 /** The current ICU patchlevel version as an integer. * This value will change in the subsequent releases of ICU @@ -139,7 +139,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "68.1" +#define U_ICU_VERSION "68.2" /** * The current ICU library major version number as a string, for library name suffixes. @@ -158,7 +158,7 @@ /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "68.1" +#define U_ICU_DATA_VERSION "68.2" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/thirdparty/icu4c/common/wintz.cpp b/thirdparty/icu4c/common/wintz.cpp index 3730232286..580cedadb6 100644 --- a/thirdparty/icu4c/common/wintz.cpp +++ b/thirdparty/icu4c/common/wintz.cpp @@ -36,17 +36,58 @@ U_NAMESPACE_BEGIN +// Note these constants and the struct are only used when dealing with the fallback path for RDP sesssions. + +// This is the location of the time zones in the registry on Vista+ systems. +// See: https://docs.microsoft.com/windows/win32/api/timezoneapi/ns-timezoneapi-dynamic_time_zone_information +#define WINDOWS_TIMEZONES_REG_KEY_PATH L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones" + +// Max length for a registry key is 255. +1 for null. +// See: https://docs.microsoft.com/windows/win32/sysinfo/registry-element-size-limits +#define WINDOWS_MAX_REG_KEY_LENGTH 256 + +#if U_PLATFORM_HAS_WINUWP_API == 0 + +// This is the layout of the TZI binary value in the registry. +// See: https://docs.microsoft.com/windows/win32/api/timezoneapi/ns-timezoneapi-time_zone_information +typedef struct _REG_TZI_FORMAT { + LONG Bias; + LONG StandardBias; + LONG DaylightBias; + SYSTEMTIME StandardDate; + SYSTEMTIME DaylightDate; +} REG_TZI_FORMAT; + +#endif // U_PLATFORM_HAS_WINUWP_API + /** -* Main Windows time zone detection function. -* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. +* This is main Windows time zone detection function. +* +* It returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. * -* Note: We use the Win32 API GetDynamicTimeZoneInformation (available since Vista+) to get the current time zone info. -* This API returns a non-localized time zone name, which is mapped to an ICU time zone ID (~ Olsen ID). +* We use the Win32 API GetDynamicTimeZoneInformation (which is available since Vista) to get the current time zone info, +* as this API returns a non-localized time zone name which can be then mapped to an ICU time zone. +* +* However, in some RDP/terminal services situations, this struct isn't always fully complete, and the TimeZoneKeyName +* field of the struct might be NULL. This can happen with some 3rd party RDP clients, and also when using older versions +* of the RDP protocol, which don't send the newer TimeZoneKeyNamei information and only send the StandardName and DaylightName. +* +* Since these 3rd party clients and older RDP clients only send the pre-Vista time zone information to the server, this means that we +* need to fallback on using the pre-Vista methods to determine the time zone. This unfortunately requires examining the registry directly +* in order to try and determine the current time zone. +* +* Note that this can however still fail in some cases though if the client and server are using different languages, as the StandardName +* that is sent by client is localized in the client's language. However, we must compare this to the names that are on the server, which +* are localized in registry using the server's language. Despite that, this is the best we can do. +* +* Note: This fallback method won't work for the UWP version though, as we can't use the registry APIs in UWP. +* +* Once we have the current Windows time zone, then we can then map it to an ICU time zone ID (~ Olsen ID). */ U_CAPI const char* U_EXPORT2 uprv_detectWindowsTimeZone() { - // Obtain the DYNAMIC_TIME_ZONE_INFORMATION info to get the non-localized time zone name. + // We first try to obtain the time zone directly by using the TimeZoneKeyName field of the DYNAMIC_TIME_ZONE_INFORMATION struct. DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI; uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI)); SYSTEMTIME systemTimeAllZero; @@ -86,22 +127,138 @@ uprv_detectWindowsTimeZone() // Note '-' before 'utcOffsetMin'. The timezone ID's sign convention // is that a timezone ahead of UTC is Etc/GMT-<offset> and a timezone // behind UTC is Etc/GMT+<offset>. - int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+d", -utcOffsetMins / 60); + int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+ld", -utcOffsetMins / 60); if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) { return uprv_strdup(gmtOffsetTz); } } } - // If DST is NOT disabled, but we have an empty TimeZoneKeyName, then it is unclear - // what we should do as this should not happen. + // If DST is NOT disabled, but the TimeZoneKeyName field of the struct is NULL, then we may be dealing with a + // RDP/terminal services session where the 'Time Zone Redirection' feature is enabled. However, either the RDP + // client sent the server incomplete info (some 3rd party RDP clients only send the StandardName and DaylightName, + // but do not send the important TimeZoneKeyName), or if the RDP server has not appropriately populated the struct correctly. + // + // In this case we unfortunately have no choice but to fallback to using the pre-Vista method of determining the + // time zone, which requires examining the registry directly. + // + // Note that this can however still fail though if the client and server are using different languages, as the StandardName + // that is sent by client is *localized* in the client's language. However, we must compare this to the names that are + // on the server, which are *localized* in registry using the server's language. + // + // One other note is that this fallback method doesn't work for the UWP version, as we can't use the registry APIs. + + // windowsTimeZoneName will point at timezoneSubKeyName if we had to fallback to using the registry, and we found a match. + WCHAR timezoneSubKeyName[WINDOWS_MAX_REG_KEY_LENGTH]; + WCHAR *windowsTimeZoneName = dynamicTZI.TimeZoneKeyName; + if (dynamicTZI.TimeZoneKeyName[0] == 0) { + +// We can't use the registry APIs in the UWP version. +#if U_PLATFORM_HAS_WINUWP_API == 1 + (void)timezoneSubKeyName; // suppress unused variable warnings. return nullptr; +#else + // Open the path to the time zones in the Windows registry. + LONG ret; + HKEY hKeyAllTimeZones = nullptr; + ret = RegOpenKeyExW(HKEY_LOCAL_MACHINE, WINDOWS_TIMEZONES_REG_KEY_PATH, 0, KEY_READ, + reinterpret_cast<PHKEY>(&hKeyAllTimeZones)); + + if (ret != ERROR_SUCCESS) { + // If we can't open the key, then we can't do much, so fail. + return nullptr; + } + + // Read the number of subkeys under the time zone registry path. + DWORD numTimeZoneSubKeys; + ret = RegQueryInfoKeyW(hKeyAllTimeZones, nullptr, nullptr, nullptr, &numTimeZoneSubKeys, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Examine each of the subkeys to try and find a match for the localized standard name ("Std"). + // + // Note: The name of the time zone subkey itself is not localized, but the "Std" name is localized. This means + // that we could fail to find a match if the RDP client and RDP server are using different languages, but unfortunately + // there isn't much we can do about it. + HKEY hKeyTimeZoneSubKey = nullptr; + ULONG registryValueType; + WCHAR registryStandardName[WINDOWS_MAX_REG_KEY_LENGTH]; + + for (DWORD i = 0; i < numTimeZoneSubKeys; i++) { + // Note: RegEnumKeyExW wants the size of the buffer in characters. + DWORD size = UPRV_LENGTHOF(timezoneSubKeyName); + ret = RegEnumKeyExW(hKeyAllTimeZones, i, timezoneSubKeyName, &size, nullptr, nullptr, nullptr, nullptr); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + ret = RegOpenKeyExW(hKeyAllTimeZones, timezoneSubKeyName, 0, KEY_READ, + reinterpret_cast<PHKEY>(&hKeyTimeZoneSubKey)); + + if (ret != ERROR_SUCCESS) { + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Note: RegQueryValueExW wants the size of the buffer in bytes. + size = sizeof(registryStandardName); + ret = RegQueryValueExW(hKeyTimeZoneSubKey, L"Std", nullptr, ®istryValueType, + reinterpret_cast<LPBYTE>(registryStandardName), &size); + + if (ret != ERROR_SUCCESS || registryValueType != REG_SZ) { + RegCloseKey(hKeyTimeZoneSubKey); + RegCloseKey(hKeyAllTimeZones); + return nullptr; + } + + // Note: wcscmp does an ordinal (byte) comparison. + if (wcscmp(reinterpret_cast<WCHAR *>(registryStandardName), dynamicTZI.StandardName) == 0) { + // Since we are comparing the *localized* time zone name, it's possible that some languages might use + // the same string for more than one time zone. Thus we need to examine the TZI data in the registry to + // compare the GMT offset (the bias), and the DST transition dates, to ensure it's the same time zone + // as the currently reported one. + REG_TZI_FORMAT registryTziValue; + uprv_memset(®istryTziValue, 0, sizeof(registryTziValue)); + + // Note: RegQueryValueExW wants the size of the buffer in bytes. + DWORD timezoneTziValueSize = sizeof(registryTziValue); + ret = RegQueryValueExW(hKeyTimeZoneSubKey, L"TZI", nullptr, ®istryValueType, + reinterpret_cast<LPBYTE>(®istryTziValue), &timezoneTziValueSize); + + if (ret == ERROR_SUCCESS) { + if ((dynamicTZI.Bias == registryTziValue.Bias) && + (memcmp((const void *)&dynamicTZI.StandardDate, (const void *)®istryTziValue.StandardDate, sizeof(SYSTEMTIME)) == 0) && + (memcmp((const void *)&dynamicTZI.DaylightDate, (const void *)®istryTziValue.DaylightDate, sizeof(SYSTEMTIME)) == 0)) + { + // We found a matching time zone. + windowsTimeZoneName = timezoneSubKeyName; + break; + } + } + } + RegCloseKey(hKeyTimeZoneSubKey); + hKeyTimeZoneSubKey = nullptr; + } + + if (hKeyTimeZoneSubKey != nullptr) { + RegCloseKey(hKeyTimeZoneSubKey); + } + if (hKeyAllTimeZones != nullptr) { + RegCloseKey(hKeyAllTimeZones); + } +#endif // U_PLATFORM_HAS_WINUWP_API } CharString winTZ; UErrorCode status = U_ZERO_ERROR; - winTZ.appendInvariantChars(UnicodeString(TRUE, dynamicTZI.TimeZoneKeyName, -1), status); + winTZ.appendInvariantChars(UnicodeString(TRUE, windowsTimeZoneName, -1), status); // Map Windows Timezone name (non-localized) to ICU timezone ID (~ Olson timezone id). StackUResourceBundle winTZBundle; @@ -123,18 +280,29 @@ uprv_detectWindowsTimeZone() int regionCodeLen = GetGeoInfoW(geoId, GEO_ISO2, regionCodeW, UPRV_LENGTHOF(regionCodeW), 0); const UChar *icuTZ16 = nullptr; - int32_t tzLen; + int32_t tzListLen = 0; if (regionCodeLen != 0) { for (int i = 0; i < UPRV_LENGTHOF(regionCodeW); i++) { regionCode[i] = static_cast<char>(regionCodeW[i]); } - icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzLen, &status); + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzListLen, &status); } if (regionCodeLen == 0 || U_FAILURE(status)) { // fallback to default "001" (world) status = U_ZERO_ERROR; - icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzLen, &status); + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzListLen, &status); + } + + // Note: We want the first entry in the string returned by ures_getStringByKey. + // However this string can be a space delimited list of timezones: + // Ex: "America/New_York America/Detroit America/Indiana/Petersburg ..." + // We need to stop at the first space, so we pass tzLen (instead of tzListLen) to appendInvariantChars below. + int32_t tzLen = 0; + if (tzListLen > 0) { + while (!(icuTZ16[tzLen] == u'\0' || icuTZ16[tzLen] == u' ')) { + tzLen++; + } } // Note: cloneData returns nullptr if the status is a failure, so this diff --git a/thirdparty/icu4c/icudt68l.dat b/thirdparty/icu4c/icudt68l.dat Binary files differindex 548c1a5a72..9ecea5d548 100644 --- a/thirdparty/icu4c/icudt68l.dat +++ b/thirdparty/icu4c/icudt68l.dat |