rules.
+ int32_t distance = trieNext(iter, desired, false);
+ if (distance >= 0) {
+ distance = trieNext(iter, supported, true);
+ }
+ if (distance < 0) {
+ UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ if (uprv_strcmp(desired, supported) == 0) {
+ distance = 0; // same script
+ } else {
+ distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ }
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ distance |= DISTANCE_IS_FINAL;
+ }
+ }
+ return distance;
+}
+
+int32_t LocaleDistance::getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) {
+ char desired = *desiredPartitions++;
+ char supported = *supportedPartitions++;
+ U_ASSERT(desired != 0 && supported != 0);
+ // See if we have single desired/supported partitions, from NUL-terminated
+ // partition strings without explicit length.
+ bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character
+ // equivalent to: if (desLength == 1 && suppLength == 1)
+ if (*desiredPartitions == 0 && !suppLengthGt1) {
+ // Fastpath for single desired/supported partitions.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ return iter.getValue();
+ }
+ }
+ return getFallbackRegionDistance(iter, startState);
+ }
+
+ const char *supportedStart = supportedPartitions - 1; // for restart of inner loop
+ int32_t regionDistance = 0;
+ // Fall back to * only once, not for each pair of partition strings.
+ bool star = false;
+ for (;;) {
+ // Look up each desired-partition string only once,
+ // not for each (desired, supported) pair.
+ UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG);
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ uint64_t desState = suppLengthGt1 ? iter.getState64() : 0;
+ for (;;) {
+ result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG);
+ int32_t d;
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ d = iter.getValue();
+ } else if (star) {
+ d = 0;
+ } else {
+ d = getFallbackRegionDistance(iter, startState);
+ star = true;
+ }
+ if (d > threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ if ((supported = *supportedPartitions++) != 0) {
+ iter.resetToState64(desState);
+ } else {
+ break;
+ }
+ }
+ } else if (!star) {
+ int32_t d = getFallbackRegionDistance(iter, startState);
+ if (d > threshold) {
+ return d;
+ } else if (regionDistance < d) {
+ regionDistance = d;
+ }
+ star = true;
+ }
+ if ((desired = *desiredPartitions++) != 0) {
+ iter.resetToState64(startState);
+ supportedPartitions = supportedStart;
+ supported = *supportedPartitions++;
+ } else {
+ break;
+ }
+ }
+ return regionDistance;
+}
+
+int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) {
+#if U_DEBUG
+ UStringTrieResult result =
+#endif
+ iter.resetToState64(startState).next(u'*'); // <*, *>
+ U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
+ int32_t distance = iter.getValue();
+ U_ASSERT(distance >= 0);
+ return distance;
+}
+
+int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) {
+ uint8_t c;
+ if ((c = *s) == 0) {
+ return -1; // no empty subtags in the distance data
+ }
+ for (;;) {
+ c = uprv_invCharToAscii(c);
+ // EBCDIC: If *s is not an invariant character,
+ // then c is now 0 and will simply not match anything, which is harmless.
+ uint8_t next = *++s;
+ if (next != 0) {
+ if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+ return -1;
+ }
+ } else {
+ // last character of this subtag
+ UStringTrieResult result = iter.next(c | END_OF_SUBTAG);
+ if (wantValue) {
+ if (USTRINGTRIE_HAS_VALUE(result)) {
+ int32_t value = iter.getValue();
+ if (result == USTRINGTRIE_FINAL_VALUE) {
+ value |= DISTANCE_IS_FINAL;
+ }
+ return value;
+ }
+ } else {
+ if (USTRINGTRIE_HAS_NEXT(result)) {
+ return 0;
+ }
+ }
+ return -1;
+ }
+ c = next;
+ }
+}
+
+UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+ // Linear search for a very short list (length 6 as of 2019),
+ // because we look for equivalence not equality, and
+ // because it's easy.
+ // If there are many paradigm LSRs we should use a hash set
+ // with custom comparator and hasher.
+ U_ASSERT(paradigmLSRsLength <= 15);
+ for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
+ if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
+ }
+ return false;
+}
+
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/locdistance.h b/thirdparty/icu4c/common/locdistance.h
new file mode 100644
index 0000000000..51b777e627
--- /dev/null
+++ b/thirdparty/icu4c/common/locdistance.h
@@ -0,0 +1,151 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// locdistance.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCDISTANCE_H__
+#define __LOCDISTANCE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localematcher.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "lsr.h"
+
+U_NAMESPACE_BEGIN
+
+struct LocaleDistanceData;
+
+/**
+ * Offline-built data for LocaleMatcher.
+ * Mostly but not only the data for mapping locales to their maximized forms.
+ */
+class LocaleDistance final : public UMemory {
+public:
+ static const LocaleDistance *getSingleton(UErrorCode &errorCode);
+
+ static int32_t shiftDistance(int32_t distance) {
+ return distance << DISTANCE_SHIFT;
+ }
+
+ static int32_t getShiftedDistance(int32_t indexAndDistance) {
+ return indexAndDistance & DISTANCE_MASK;
+ }
+
+ static double getDistanceDouble(int32_t indexAndDistance) {
+ double shiftedDistance = getShiftedDistance(indexAndDistance);
+ return shiftedDistance / (1 << DISTANCE_SHIFT);
+ }
+
+ static int32_t getDistanceFloor(int32_t indexAndDistance) {
+ return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
+ }
+
+ static int32_t getIndex(int32_t indexAndDistance) {
+ // assert indexAndDistance >= 0;
+ return indexAndDistance >> INDEX_SHIFT;
+ }
+
+ /**
+ * Finds the supported LSR with the smallest distance from the desired one.
+ * Equivalent LSR subtags must be normalized into a canonical form.
+ *
+ * Returns the index of the lowest-distance supported LSR in the high bits
+ * (negative if none has a distance below the threshold),
+ * and its distance (0..ABOVE_THRESHOLD) in the low bits.
+ */
+ int32_t getBestIndexAndDistance(const LSR &desired,
+ const LSR **supportedLSRs, int32_t supportedLSRsLength,
+ int32_t shiftedThreshold,
+ ULocMatchFavorSubtag favorSubtag,
+ ULocMatchDirection direction) const;
+
+ UBool isParadigmLSR(const LSR &lsr) const;
+
+ int32_t getDefaultScriptDistance() const {
+ return defaultScriptDistance;
+ }
+
+ int32_t getDefaultDemotionPerDesiredLocale() const {
+ return defaultDemotionPerDesiredLocale;
+ }
+
+private:
+ // The distance is shifted left to gain some fraction bits.
+ static constexpr int32_t DISTANCE_SHIFT = 3;
+ static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
+ // 7 bits for 0..100
+ static constexpr int32_t DISTANCE_INT_SHIFT = 7;
+ static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
+ static constexpr int32_t DISTANCE_MASK = 0x3ff;
+ // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
+ static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
+
+ LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
+ LocaleDistance(const LocaleDistance &other) = delete;
+ LocaleDistance &operator=(const LocaleDistance &other) = delete;
+
+ static void initLocaleDistance(UErrorCode &errorCode);
+
+ UBool isMatch(const LSR &desired, const LSR &supported,
+ int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
+ const LSR *pSupp = &supported;
+ return getBestIndexAndDistance(
+ desired, &pSupp, 1,
+ shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
+ }
+
+ static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
+ const char *desired, const char *supported);
+
+ static int32_t getRegionPartitionsDistance(
+ BytesTrie &iter, uint64_t startState,
+ const char *desiredPartitions, const char *supportedPartitions,
+ int32_t threshold);
+
+ static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
+
+ static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
+
+ const char *partitionsForRegion(const LSR &lsr) const {
+ // ill-formed region -> one non-matching string
+ int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
+ return partitionArrays[pIndex];
+ }
+
+ int32_t getDefaultRegionDistance() const {
+ return defaultRegionDistance;
+ }
+
+ const XLikelySubtags &likelySubtags;
+
+ // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
+ // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
+ // There is also a trie value for each subsequence of whole subtags.
+ // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
+ BytesTrie trie;
+
+ /**
+ * Maps each region to zero or more single-character partitions.
+ */
+ const uint8_t *regionToPartitionsIndex;
+ const char **partitionArrays;
+
+ /**
+ * Used to get the paradigm region for a cluster, if there is one.
+ */
+ const LSR *paradigmLSRs;
+ int32_t paradigmLSRsLength;
+
+ int32_t defaultLanguageDistance;
+ int32_t defaultScriptDistance;
+ int32_t defaultRegionDistance;
+ int32_t minRegionDistance;
+ int32_t defaultDemotionPerDesiredLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // __LOCDISTANCE_H__
diff --git a/thirdparty/icu4c/common/locdspnm.cpp b/thirdparty/icu4c/common/locdspnm.cpp
new file mode 100644
index 0000000000..43334f5196
--- /dev/null
+++ b/thirdparty/icu4c/common/locdspnm.cpp
@@ -0,0 +1,1110 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/locdspnm.h"
+#include "unicode/simpleformatter.h"
+#include "unicode/ucasemap.h"
+#include "unicode/ures.h"
+#include "unicode/udisplaycontext.h"
+#include "unicode/brkiter.h"
+#include "unicode/ucurr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "ureslocs.h"
+#include "uresimp.h"
+
+#include
+
+/**
+ * Concatenate a number of null-terminated strings to buffer, leaving a
+ * null-terminated string. The last argument should be the null pointer.
+ * Return the length of the string in the buffer, not counting the trailing
+ * null. Return -1 if there is an error (buffer is null, or buflen < 1).
+ */
+static int32_t ncat(char *buffer, uint32_t buflen, ...) {
+ va_list args;
+ char *str;
+ char *p = buffer;
+ const char* e = buffer + buflen - 1;
+
+ if (buffer == NULL || buflen < 1) {
+ return -1;
+ }
+
+ va_start(args, buflen);
+ while ((str = va_arg(args, char *)) != 0) {
+ char c;
+ while (p != e && (c = *str++) != 0) {
+ *p++ = c;
+ }
+ }
+ *p = 0;
+ va_end(args);
+
+ return static_cast(p - buffer);
+}
+
+U_NAMESPACE_BEGIN
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Access resource data for locale components.
+// Wrap code in uloc.c for now.
+class ICUDataTable {
+ const char* path;
+ Locale locale;
+
+public:
+ ICUDataTable(const char* path, const Locale& locale);
+ ~ICUDataTable();
+
+ const Locale& getLocale();
+
+ UnicodeString& get(const char* tableKey, const char* itemKey,
+ UnicodeString& result) const;
+ UnicodeString& get(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString& result) const;
+
+ UnicodeString& getNoFallback(const char* tableKey, const char* itemKey,
+ UnicodeString &result) const;
+ UnicodeString& getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString &result) const;
+};
+
+inline UnicodeString &
+ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const {
+ return get(tableKey, NULL, itemKey, result);
+}
+
+inline UnicodeString &
+ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const {
+ return getNoFallback(tableKey, NULL, itemKey, result);
+}
+
+ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
+ : path(NULL), locale(Locale::getRoot())
+{
+ if (path) {
+ int32_t len = static_cast(uprv_strlen(path));
+ this->path = (const char*) uprv_malloc(len + 1);
+ if (this->path) {
+ uprv_strcpy((char *)this->path, path);
+ this->locale = locale;
+ }
+ }
+}
+
+ICUDataTable::~ICUDataTable() {
+ if (path) {
+ uprv_free((void*) path);
+ path = NULL;
+ }
+}
+
+const Locale&
+ICUDataTable::getLocale() {
+ return locale;
+}
+
+UnicodeString &
+ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString &result) const {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+
+ const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+ tableKey, subTableKey, itemKey,
+ &len, &status);
+ if (U_SUCCESS(status) && len > 0) {
+ return result.setTo(s, len);
+ }
+ return result.setTo(UnicodeString(itemKey, -1, US_INV));
+}
+
+UnicodeString &
+ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey,
+ UnicodeString& result) const {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+
+ const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(),
+ tableKey, subTableKey, itemKey,
+ &len, &status);
+ if (U_SUCCESS(status)) {
+ return result.setTo(s, len);
+ }
+
+ result.setToBogus();
+ return result;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+LocaleDisplayNames::~LocaleDisplayNames() {}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if 0 // currently unused
+
+class DefaultLocaleDisplayNames : public LocaleDisplayNames {
+ UDialectHandling dialectHandling;
+
+public:
+ // constructor
+ DefaultLocaleDisplayNames(UDialectHandling dialectHandling);
+
+ virtual ~DefaultLocaleDisplayNames();
+
+ virtual const Locale& getLocale() const;
+ virtual UDialectHandling getDialectHandling() const;
+
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const;
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const;
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const;
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const;
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const;
+};
+
+DefaultLocaleDisplayNames::DefaultLocaleDisplayNames(UDialectHandling dialectHandling)
+ : dialectHandling(dialectHandling) {
+}
+
+DefaultLocaleDisplayNames::~DefaultLocaleDisplayNames() {
+}
+
+const Locale&
+DefaultLocaleDisplayNames::getLocale() const {
+ return Locale::getRoot();
+}
+
+UDialectHandling
+DefaultLocaleDisplayNames::getDialectHandling() const {
+ return dialectHandling;
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::localeDisplayName(const Locale& locale,
+ UnicodeString& result) const {
+ return result = UnicodeString(locale.getName(), -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::localeDisplayName(const char* localeId,
+ UnicodeString& result) const {
+ return result = UnicodeString(localeId, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::languageDisplayName(const char* lang,
+ UnicodeString& result) const {
+ return result = UnicodeString(lang, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::scriptDisplayName(const char* script,
+ UnicodeString& result) const {
+ return result = UnicodeString(script, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const {
+ const char* name = uscript_getName(scriptCode);
+ if (name) {
+ return result = UnicodeString(name, -1, US_INV);
+ }
+ return result.remove();
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::regionDisplayName(const char* region,
+ UnicodeString& result) const {
+ return result = UnicodeString(region, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::variantDisplayName(const char* variant,
+ UnicodeString& result) const {
+ return result = UnicodeString(variant, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::keyDisplayName(const char* key,
+ UnicodeString& result) const {
+ return result = UnicodeString(key, -1, US_INV);
+}
+
+UnicodeString&
+DefaultLocaleDisplayNames::keyValueDisplayName(const char* /* key */,
+ const char* value,
+ UnicodeString& result) const {
+ return result = UnicodeString(value, -1, US_INV);
+}
+
+#endif // currently unused class DefaultLocaleDisplayNames
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+class LocaleDisplayNamesImpl : public LocaleDisplayNames {
+ Locale locale;
+ UDialectHandling dialectHandling;
+ ICUDataTable langData;
+ ICUDataTable regionData;
+ SimpleFormatter separatorFormat;
+ SimpleFormatter format;
+ SimpleFormatter keyTypeFormat;
+ UDisplayContext capitalizationContext;
+#if !UCONFIG_NO_BREAK_ITERATION
+ BreakIterator* capitalizationBrkIter;
+#else
+ UObject* capitalizationBrkIter;
+#endif
+ UnicodeString formatOpenParen;
+ UnicodeString formatReplaceOpenParen;
+ UnicodeString formatCloseParen;
+ UnicodeString formatReplaceCloseParen;
+ UDisplayContext nameLength;
+ UDisplayContext substitute;
+
+ // Constants for capitalization context usage types.
+ enum CapContextUsage {
+ kCapContextUsageLanguage,
+ kCapContextUsageScript,
+ kCapContextUsageTerritory,
+ kCapContextUsageVariant,
+ kCapContextUsageKey,
+ kCapContextUsageKeyValue,
+ kCapContextUsageCount
+ };
+ // Capitalization transforms. For each usage type, indicates whether to titlecase for
+ // the context specified in capitalizationContext (which we know at construction time)
+ UBool fCapitalization[kCapContextUsageCount];
+
+public:
+ // constructor
+ LocaleDisplayNamesImpl(const Locale& locale, UDialectHandling dialectHandling);
+ LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length);
+ virtual ~LocaleDisplayNamesImpl();
+
+ virtual const Locale& getLocale() const;
+ virtual UDialectHandling getDialectHandling() const;
+ virtual UDisplayContext getContext(UDisplayContextType type) const;
+
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const;
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const;
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const;
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const;
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const;
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const;
+ virtual UnicodeString& keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const;
+private:
+ UnicodeString& localeIdName(const char* localeId,
+ UnicodeString& result, bool substitute) const;
+ UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
+ UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
+ UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
+ UnicodeString& keyValueDisplayName(const char* key, const char* value,
+ UnicodeString& result, UBool skipAdjust) const;
+ void initialize(void);
+
+ struct CapitalizationContextSink;
+};
+
+LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
+ UDialectHandling dialectHandling)
+ : dialectHandling(dialectHandling)
+ , langData(U_ICUDATA_LANG, locale)
+ , regionData(U_ICUDATA_REGION, locale)
+ , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
+ , capitalizationBrkIter(NULL)
+ , nameLength(UDISPCTX_LENGTH_FULL)
+ , substitute(UDISPCTX_SUBSTITUTE)
+{
+ initialize();
+}
+
+LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
+ UDisplayContext *contexts, int32_t length)
+ : dialectHandling(ULDN_STANDARD_NAMES)
+ , langData(U_ICUDATA_LANG, locale)
+ , regionData(U_ICUDATA_REGION, locale)
+ , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE)
+ , capitalizationBrkIter(NULL)
+ , nameLength(UDISPCTX_LENGTH_FULL)
+ , substitute(UDISPCTX_SUBSTITUTE)
+{
+ while (length-- > 0) {
+ UDisplayContext value = *contexts++;
+ UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8);
+ switch (selector) {
+ case UDISPCTX_TYPE_DIALECT_HANDLING:
+ dialectHandling = (UDialectHandling)value;
+ break;
+ case UDISPCTX_TYPE_CAPITALIZATION:
+ capitalizationContext = value;
+ break;
+ case UDISPCTX_TYPE_DISPLAY_LENGTH:
+ nameLength = value;
+ break;
+ case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+ substitute = value;
+ break;
+ default:
+ break;
+ }
+ }
+ initialize();
+}
+
+struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
+ UBool hasCapitalizationUsage;
+ LocaleDisplayNamesImpl& parent;
+
+ CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
+ : hasCapitalizationUsage(FALSE), parent(_parent) {}
+ virtual ~CapitalizationContextSink();
+
+ virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
+ UErrorCode &errorCode) {
+ ResourceTable contexts = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) {
+
+ CapContextUsage usageEnum;
+ if (uprv_strcmp(key, "key") == 0) {
+ usageEnum = kCapContextUsageKey;
+ } else if (uprv_strcmp(key, "keyValue") == 0) {
+ usageEnum = kCapContextUsageKeyValue;
+ } else if (uprv_strcmp(key, "languages") == 0) {
+ usageEnum = kCapContextUsageLanguage;
+ } else if (uprv_strcmp(key, "script") == 0) {
+ usageEnum = kCapContextUsageScript;
+ } else if (uprv_strcmp(key, "territory") == 0) {
+ usageEnum = kCapContextUsageTerritory;
+ } else if (uprv_strcmp(key, "variant") == 0) {
+ usageEnum = kCapContextUsageVariant;
+ } else {
+ continue;
+ }
+
+ int32_t len = 0;
+ const int32_t* intVector = value.getIntVector(len, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (len < 2) { continue; }
+
+ int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1];
+ if (titlecaseInt == 0) { continue; }
+
+ parent.fCapitalization[usageEnum] = TRUE;
+ hasCapitalizationUsage = TRUE;
+ }
+ }
+};
+
+// Virtual destructors must be defined out of line.
+LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {}
+
+void
+LocaleDisplayNamesImpl::initialize(void) {
+ LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this;
+ nonConstThis->locale = langData.getLocale() == Locale::getRoot()
+ ? regionData.getLocale()
+ : langData.getLocale();
+
+ UnicodeString sep;
+ langData.getNoFallback("localeDisplayPattern", "separator", sep);
+ if (sep.isBogus()) {
+ sep = UnicodeString("{0}, {1}", -1, US_INV);
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status);
+
+ UnicodeString pattern;
+ langData.getNoFallback("localeDisplayPattern", "pattern", pattern);
+ if (pattern.isBogus()) {
+ pattern = UnicodeString("{0} ({1})", -1, US_INV);
+ }
+ format.applyPatternMinMaxArguments(pattern, 2, 2, status);
+ if (pattern.indexOf((UChar)0xFF08) >= 0) {
+ formatOpenParen.setTo((UChar)0xFF08); // fullwidth (
+ formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [
+ formatCloseParen.setTo((UChar)0xFF09); // fullwidth )
+ formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ]
+ } else {
+ formatOpenParen.setTo((UChar)0x0028); // (
+ formatReplaceOpenParen.setTo((UChar)0x005B); // [
+ formatCloseParen.setTo((UChar)0x0029); // )
+ formatReplaceCloseParen.setTo((UChar)0x005D); // ]
+ }
+
+ UnicodeString ktPattern;
+ langData.get("localeDisplayPattern", "keyTypePattern", ktPattern);
+ if (ktPattern.isBogus()) {
+ ktPattern = UnicodeString("{0}={1}", -1, US_INV);
+ }
+ keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status);
+
+ uprv_memset(fCapitalization, 0, sizeof(fCapitalization));
+#if !UCONFIG_NO_BREAK_ITERATION
+ // Only get the context data if we need it! This is a const object so we know now...
+ // Also check whether we will need a break iterator (depends on the data)
+ UBool needBrkIter = FALSE;
+ if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
+ LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status));
+ if (U_FAILURE(status)) { return; }
+ CapitalizationContextSink sink(*this);
+ ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status);
+ if (status == U_MISSING_RESOURCE_ERROR) {
+ // Silently ignore. Not every locale has contextTransforms.
+ status = U_ZERO_ERROR;
+ } else if (U_FAILURE(status)) {
+ return;
+ }
+ needBrkIter = sink.hasCapitalizationUsage;
+ }
+ // Get a sentence break iterator if we will need it
+ if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) {
+ status = U_ZERO_ERROR;
+ capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
+ if (U_FAILURE(status)) {
+ delete capitalizationBrkIter;
+ capitalizationBrkIter = NULL;
+ }
+ }
+#endif
+}
+
+LocaleDisplayNamesImpl::~LocaleDisplayNamesImpl() {
+#if !UCONFIG_NO_BREAK_ITERATION
+ delete capitalizationBrkIter;
+#endif
+}
+
+const Locale&
+LocaleDisplayNamesImpl::getLocale() const {
+ return locale;
+}
+
+UDialectHandling
+LocaleDisplayNamesImpl::getDialectHandling() const {
+ return dialectHandling;
+}
+
+UDisplayContext
+LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const {
+ switch (type) {
+ case UDISPCTX_TYPE_DIALECT_HANDLING:
+ return (UDisplayContext)dialectHandling;
+ case UDISPCTX_TYPE_CAPITALIZATION:
+ return capitalizationContext;
+ case UDISPCTX_TYPE_DISPLAY_LENGTH:
+ return nameLength;
+ case UDISPCTX_TYPE_SUBSTITUTE_HANDLING:
+ return substitute;
+ default:
+ break;
+ }
+ return (UDisplayContext)0;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
+ UnicodeString& result) const {
+#if !UCONFIG_NO_BREAK_ITERATION
+ // check to see whether we need to titlecase result
+ if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL &&
+ ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) {
+ // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE
+ static UMutex capitalizationBrkIterLock;
+ Mutex lock(&capitalizationBrkIterLock);
+ result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
+ }
+#endif
+ return result;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
+ UnicodeString& result) const {
+ if (loc.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ UnicodeString resultName;
+
+ const char* lang = loc.getLanguage();
+ if (uprv_strlen(lang) == 0) {
+ lang = "root";
+ }
+ const char* script = loc.getScript();
+ const char* country = loc.getCountry();
+ const char* variant = loc.getVariant();
+
+ UBool hasScript = uprv_strlen(script) > 0;
+ UBool hasCountry = uprv_strlen(country) > 0;
+ UBool hasVariant = uprv_strlen(variant) > 0;
+
+ if (dialectHandling == ULDN_DIALECT_NAMES) {
+ char buffer[ULOC_FULLNAME_CAPACITY];
+ do { // loop construct is so we can break early out of search
+ if (hasScript && hasCountry) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasScript = FALSE;
+ hasCountry = FALSE;
+ break;
+ }
+ }
+ if (hasScript) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasScript = FALSE;
+ break;
+ }
+ }
+ if (hasCountry) {
+ ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
+ localeIdName(buffer, resultName, false);
+ if (!resultName.isBogus()) {
+ hasCountry = FALSE;
+ break;
+ }
+ }
+ } while (FALSE);
+ }
+ if (resultName.isBogus() || resultName.isEmpty()) {
+ localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE);
+ if (resultName.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ }
+
+ UnicodeString resultRemainder;
+ UnicodeString temp;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (hasScript) {
+ UnicodeString script_str = scriptDisplayName(script, temp, TRUE);
+ if (script_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ resultRemainder.append(script_str);
+ }
+ if (hasCountry) {
+ UnicodeString region_str = regionDisplayName(country, temp, TRUE);
+ if (region_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ appendWithSep(resultRemainder, region_str);
+ }
+ if (hasVariant) {
+ UnicodeString variant_str = variantDisplayName(variant, temp, TRUE);
+ if (variant_str.isBogus()) {
+ result.setToBogus();
+ return result;
+ }
+ appendWithSep(resultRemainder, variant_str);
+ }
+ resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+
+ LocalPointer e(loc.createKeywords(status));
+ if (e.isValid() && U_SUCCESS(status)) {
+ UnicodeString temp2;
+ char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
+ const char* key;
+ while ((key = e->next((int32_t *)0, status)) != NULL) {
+ value[0] = 0;
+ loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
+ if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
+ return result;
+ }
+ keyDisplayName(key, temp, TRUE);
+ temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+ keyValueDisplayName(key, value, temp2, TRUE);
+ temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
+ temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
+ if (temp2 != UnicodeString(value, -1, US_INV)) {
+ appendWithSep(resultRemainder, temp2);
+ } else if (temp != UnicodeString(key, -1, US_INV)) {
+ UnicodeString temp3;
+ keyTypeFormat.format(temp, temp2, temp3, status);
+ appendWithSep(resultRemainder, temp3);
+ } else {
+ appendWithSep(resultRemainder, temp)
+ .append((UChar)0x3d /* = */)
+ .append(temp2);
+ }
+ }
+ }
+
+ if (!resultRemainder.isEmpty()) {
+ format.format(resultName, resultRemainder, result.remove(), status);
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+ }
+
+ result = resultName;
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString& src) const {
+ if (buffer.isEmpty()) {
+ buffer.setTo(src);
+ } else {
+ const UnicodeString *values[2] = { &buffer, &src };
+ UErrorCode status = U_ZERO_ERROR;
+ separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status);
+ }
+ return buffer;
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::localeDisplayName(const char* localeId,
+ UnicodeString& result) const {
+ return localeDisplayName(Locale(localeId), result);
+}
+
+// private
+UnicodeString&
+LocaleDisplayNamesImpl::localeIdName(const char* localeId,
+ UnicodeString& result, bool substitute) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Languages%short", localeId, result);
+ if (!result.isBogus()) {
+ return result;
+ }
+ }
+ if (substitute) {
+ return langData.get("Languages", localeId, result);
+ } else {
+ return langData.getNoFallback("Languages", localeId, result);
+ }
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
+ UnicodeString& result) const {
+ if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) {
+ return result = UnicodeString(lang, -1, US_INV);
+ }
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Languages%short", lang, result);
+ if (!result.isBogus()) {
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Languages", lang, result);
+ } else {
+ langData.getNoFallback("Languages", lang, result);
+ }
+ return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Scripts%short", script, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Scripts", script, result);
+ } else {
+ langData.getNoFallback("Scripts", script, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
+ UnicodeString& result) const {
+ return scriptDisplayName(script, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const {
+ return scriptDisplayName(uscript_getName(scriptCode), result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::regionDisplayName(const char* region,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ regionData.getNoFallback("Countries%short", region, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ regionData.get("Countries", region, result);
+ } else {
+ regionData.getNoFallback("Countries", region, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::regionDisplayName(const char* region,
+ UnicodeString& result) const {
+ return regionDisplayName(region, result, FALSE);
+}
+
+
+UnicodeString&
+LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ // don't have a resource for short variant names
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Variants", variant, result);
+ } else {
+ langData.getNoFallback("Variants", variant, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
+ UnicodeString& result) const {
+ return variantDisplayName(variant, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyDisplayName(const char* key,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ // don't have a resource for short key names
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Keys", key, result);
+ } else {
+ langData.getNoFallback("Keys", key, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyDisplayName(const char* key,
+ UnicodeString& result) const {
+ return keyDisplayName(key, result, FALSE);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result,
+ UBool skipAdjust) const {
+ if (uprv_strcmp(key, "currency") == 0) {
+ // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
+ UErrorCode sts = U_ZERO_ERROR;
+ UnicodeString ustrValue(value, -1, US_INV);
+ int32_t len;
+ const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(),
+ locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts);
+ if (U_FAILURE(sts)) {
+ // Return the value as is on failure
+ result = ustrValue;
+ return result;
+ }
+ result.setTo(currencyName, len);
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+ }
+
+ if (nameLength == UDISPCTX_LENGTH_SHORT) {
+ langData.getNoFallback("Types%short", key, value, result);
+ if (!result.isBogus()) {
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+ }
+ }
+ if (substitute == UDISPCTX_SUBSTITUTE) {
+ langData.get("Types", key, value, result);
+ } else {
+ langData.getNoFallback("Types", key, value, result);
+ }
+ return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result);
+}
+
+UnicodeString&
+LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
+ const char* value,
+ UnicodeString& result) const {
+ return keyValueDisplayName(key, value, result, FALSE);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+LocaleDisplayNames*
+LocaleDisplayNames::createInstance(const Locale& locale,
+ UDialectHandling dialectHandling) {
+ return new LocaleDisplayNamesImpl(locale, dialectHandling);
+}
+
+LocaleDisplayNames*
+LocaleDisplayNames::createInstance(const Locale& locale,
+ UDisplayContext *contexts, int32_t length) {
+ if (contexts == NULL) {
+ length = 0;
+ }
+ return new LocaleDisplayNamesImpl(locale, contexts, length);
+}
+
+U_NAMESPACE_END
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+U_NAMESPACE_USE
+
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_open(const char * locale,
+ UDialectHandling dialectHandling,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (locale == NULL) {
+ locale = uloc_getDefault();
+ }
+ return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling);
+}
+
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_openForContext(const char * locale,
+ UDisplayContext *contexts, int32_t length,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (locale == NULL) {
+ locale = uloc_getDefault();
+ }
+ return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length);
+}
+
+
+U_CAPI void U_EXPORT2
+uldn_close(ULocaleDisplayNames *ldn) {
+ delete (LocaleDisplayNames *)ldn;
+}
+
+U_CAPI const char * U_EXPORT2
+uldn_getLocale(const ULocaleDisplayNames *ldn) {
+ if (ldn) {
+ return ((const LocaleDisplayNames *)ldn)->getLocale().getName();
+ }
+ return NULL;
+}
+
+U_CAPI UDialectHandling U_EXPORT2
+uldn_getDialectHandling(const ULocaleDisplayNames *ldn) {
+ if (ldn) {
+ return ((const LocaleDisplayNames *)ldn)->getDialectHandling();
+ }
+ return ULDN_STANDARD_NAMES;
+}
+
+U_CAPI UDisplayContext U_EXPORT2
+uldn_getContext(const ULocaleDisplayNames *ldn,
+ UDisplayContextType type,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return (UDisplayContext)0;
+ }
+ return ((const LocaleDisplayNames *)ldn)->getContext(type);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
+ const char *locale,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->localeDisplayName(locale, temp);
+ if (temp.isBogus()) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
+ const char *lang,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->languageDisplayName(lang, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
+ const char *script,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->scriptDisplayName(script, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
+ UScriptCode scriptCode,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
+ const char *region,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->regionDisplayName(region, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
+ const char *variant,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->variantDisplayName(variant, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->keyDisplayName(key, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ const char *value,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode) {
+ if (U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0)
+ || maxResultSize < 0) {
+ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString temp(result, 0, maxResultSize);
+ ((const LocaleDisplayNames *)ldn)->keyValueDisplayName(key, value, temp);
+ return temp.extract(result, maxResultSize, *pErrorCode);
+}
+
+#endif
diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp
new file mode 100644
index 0000000000..2804e36bf6
--- /dev/null
+++ b/thirdparty/icu4c/common/locid.cpp
@@ -0,0 +1,2536 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+*
+* File locid.cpp
+*
+* Created by: Richard Gillam
+*
+* Modification History:
+*
+* Date Name Description
+* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
+* methods to get and set it.
+* 04/02/97 aliu Made operator!= inline; fixed return value
+* of getName().
+* 04/15/97 aliu Cleanup for AIX/Win32.
+* 04/24/97 aliu Numerous changes per code review.
+* 08/18/98 stephen Changed getDisplayName()
+* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
+* Added getISOCountries(), getISOLanguages(),
+* getLanguagesForCountry()
+* 03/16/99 bertrand rehaul.
+* 07/21/99 stephen Added U_CFUNC setDefault
+* 11/09/99 weiv Added const char * getName() const;
+* 04/12/00 srl removing unicodestring api's and cached hash code
+* 08/10/01 grhoten Change the static Locales to accessor functions
+******************************************************************************
+*/
+
+#include
+
+#include "unicode/bytestream.h"
+#include "unicode/locid.h"
+#include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "charstrmap.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "ulocimp.h"
+#include "umutex.h"
+#include "uniquecharstr.h"
+#include "ustr_imp.h"
+#include "uvector.h"
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV locale_cleanup(void);
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+static Locale *gLocaleCache = NULL;
+static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
+
+// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
+static UMutex gDefaultLocaleMutex;
+static UHashtable *gDefaultLocalesHashT = NULL;
+static Locale *gDefaultLocale = NULL;
+
+/**
+ * \def ULOC_STRING_LIMIT
+ * strings beyond this value crash in CharString
+ */
+#define ULOC_STRING_LIMIT 357913941
+
+U_NAMESPACE_END
+
+typedef enum ELocalePos {
+ eENGLISH,
+ eFRENCH,
+ eGERMAN,
+ eITALIAN,
+ eJAPANESE,
+ eKOREAN,
+ eCHINESE,
+
+ eFRANCE,
+ eGERMANY,
+ eITALY,
+ eJAPAN,
+ eKOREA,
+ eCHINA, /* Alias for PRC */
+ eTAIWAN,
+ eUK,
+ eUS,
+ eCANADA,
+ eCANADA_FRENCH,
+ eROOT,
+
+
+ //eDEFAULT,
+ eMAX_LOCALES
+} ELocalePos;
+
+U_CDECL_BEGIN
+//
+// Deleter function for Locales owned by the default Locale hash table/
+//
+static void U_CALLCONV
+deleteLocale(void *obj) {
+ delete (icu::Locale *) obj;
+}
+
+static UBool U_CALLCONV locale_cleanup(void)
+{
+ U_NAMESPACE_USE
+
+ delete [] gLocaleCache;
+ gLocaleCache = NULL;
+ gLocaleCacheInitOnce.reset();
+
+ if (gDefaultLocalesHashT) {
+ uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
+ gDefaultLocalesHashT = NULL;
+ }
+ gDefaultLocale = NULL;
+ return TRUE;
+}
+
+
+static void U_CALLCONV locale_init(UErrorCode &status) {
+ U_NAMESPACE_USE
+
+ U_ASSERT(gLocaleCache == NULL);
+ gLocaleCache = new Locale[(int)eMAX_LOCALES];
+ if (gLocaleCache == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
+ gLocaleCache[eROOT] = Locale("");
+ gLocaleCache[eENGLISH] = Locale("en");
+ gLocaleCache[eFRENCH] = Locale("fr");
+ gLocaleCache[eGERMAN] = Locale("de");
+ gLocaleCache[eITALIAN] = Locale("it");
+ gLocaleCache[eJAPANESE] = Locale("ja");
+ gLocaleCache[eKOREAN] = Locale("ko");
+ gLocaleCache[eCHINESE] = Locale("zh");
+ gLocaleCache[eFRANCE] = Locale("fr", "FR");
+ gLocaleCache[eGERMANY] = Locale("de", "DE");
+ gLocaleCache[eITALY] = Locale("it", "IT");
+ gLocaleCache[eJAPAN] = Locale("ja", "JP");
+ gLocaleCache[eKOREA] = Locale("ko", "KR");
+ gLocaleCache[eCHINA] = Locale("zh", "CN");
+ gLocaleCache[eTAIWAN] = Locale("zh", "TW");
+ gLocaleCache[eUK] = Locale("en", "GB");
+ gLocaleCache[eUS] = Locale("en", "US");
+ gLocaleCache[eCANADA] = Locale("en", "CA");
+ gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
+ // Synchronize this entire function.
+ Mutex lock(&gDefaultLocaleMutex);
+
+ UBool canonicalize = FALSE;
+
+ // If given a NULL string for the locale id, grab the default
+ // name from the system.
+ // (Different from most other locale APIs, where a null name means use
+ // the current ICU default locale.)
+ if (id == NULL) {
+ id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
+ canonicalize = TRUE; // always canonicalize host ID
+ }
+
+ CharString localeNameBuf;
+ {
+ CharStringByteSink sink(&localeNameBuf);
+ if (canonicalize) {
+ ulocimp_canonicalize(id, sink, &status);
+ } else {
+ ulocimp_getName(id, sink, &status);
+ }
+ }
+
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+
+ if (gDefaultLocalesHashT == NULL) {
+ gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+ uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
+ }
+
+ Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
+ if (newDefault == NULL) {
+ newDefault = new Locale(Locale::eBOGUS);
+ if (newDefault == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return gDefaultLocale;
+ }
+ newDefault->init(localeNameBuf.data(), FALSE);
+ uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
+ if (U_FAILURE(status)) {
+ return gDefaultLocale;
+ }
+ }
+ gDefaultLocale = newDefault;
+ return gDefaultLocale;
+}
+
+U_NAMESPACE_END
+
+/* sfb 07/21/99 */
+U_CFUNC void
+locale_set_default(const char *id)
+{
+ U_NAMESPACE_USE
+ UErrorCode status = U_ZERO_ERROR;
+ locale_set_default_internal(id, status);
+}
+/* end */
+
+U_CFUNC const char *
+locale_get_default(void)
+{
+ U_NAMESPACE_USE
+ return Locale::getDefault().getName();
+}
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
+
+/*Character separating the posix id fields*/
+// '_'
+// In the platform codepage.
+#define SEP_CHAR '_'
+#define NULL_CHAR '\0'
+
+Locale::~Locale()
+{
+ if (baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ /*if fullName is on the heap, we free it*/
+ if (fullName != fullNameBuffer)
+ {
+ uprv_free(fullName);
+ fullName = NULL;
+ }
+}
+
+Locale::Locale()
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ init(NULL, FALSE);
+}
+
+/*
+ * Internal constructor to allow construction of a locale object with
+ * NO side effects. (Default constructor tries to get
+ * the default locale.)
+ */
+Locale::Locale(Locale::ELocaleType)
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ setToBogus();
+}
+
+
+Locale::Locale( const char * newLanguage,
+ const char * newCountry,
+ const char * newVariant,
+ const char * newKeywords)
+ : UObject(), fullName(fullNameBuffer), baseName(NULL)
+{
+ if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
+ {
+ init(NULL, FALSE); /* shortcut */
+ }
+ else
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t size = 0;
+ int32_t lsize = 0;
+ int32_t csize = 0;
+ int32_t vsize = 0;
+ int32_t ksize = 0;
+
+ // Calculate the size of the resulting string.
+
+ // Language
+ if ( newLanguage != NULL )
+ {
+ lsize = (int32_t)uprv_strlen(newLanguage);
+ if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ size = lsize;
+ }
+
+ CharString togo(newLanguage, lsize, status); // start with newLanguage
+
+ // _Country
+ if ( newCountry != NULL )
+ {
+ csize = (int32_t)uprv_strlen(newCountry);
+ if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ size += csize;
+ }
+
+ // _Variant
+ if ( newVariant != NULL )
+ {
+ // remove leading _'s
+ while(newVariant[0] == SEP_CHAR)
+ {
+ newVariant++;
+ }
+
+ // remove trailing _'s
+ vsize = (int32_t)uprv_strlen(newVariant);
+ if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
+ setToBogus();
+ return;
+ }
+ while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
+ {
+ vsize--;
+ }
+ }
+
+ if( vsize > 0 )
+ {
+ size += vsize;
+ }
+
+ // Separator rules:
+ if ( vsize > 0 )
+ {
+ size += 2; // at least: __v
+ }
+ else if ( csize > 0 )
+ {
+ size += 1; // at least: _v
+ }
+
+ if ( newKeywords != NULL)
+ {
+ ksize = (int32_t)uprv_strlen(newKeywords);
+ if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
+ setToBogus();
+ return;
+ }
+ size += ksize + 1;
+ }
+
+ // NOW we have the full locale string..
+ // Now, copy it back.
+
+ // newLanguage is already copied
+
+ if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
+ { // ^
+ togo.append(SEP_CHAR, status);
+ }
+
+ if ( csize != 0 )
+ {
+ togo.append(newCountry, status);
+ }
+
+ if ( vsize != 0)
+ {
+ togo.append(SEP_CHAR, status)
+ .append(newVariant, vsize, status);
+ }
+
+ if ( ksize != 0)
+ {
+ if (uprv_strchr(newKeywords, '=')) {
+ togo.append('@', status); /* keyword parsing */
+ }
+ else {
+ togo.append('_', status); /* Variant parsing with a script */
+ if ( vsize == 0) {
+ togo.append('_', status); /* No country found */
+ }
+ }
+ togo.append(newKeywords, status);
+ }
+
+ if (U_FAILURE(status)) {
+ // Something went wrong with appending, etc.
+ setToBogus();
+ return;
+ }
+ // Parse it, because for example 'language' might really be a complete
+ // string.
+ init(togo.data(), FALSE);
+ }
+}
+
+Locale::Locale(const Locale &other)
+ : UObject(other), fullName(fullNameBuffer), baseName(NULL)
+{
+ *this = other;
+}
+
+Locale::Locale(Locale&& other) U_NOEXCEPT
+ : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
+ *this = std::move(other);
+}
+
+Locale& Locale::operator=(const Locale& other) {
+ if (this == &other) {
+ return *this;
+ }
+
+ setToBogus();
+
+ if (other.fullName == other.fullNameBuffer) {
+ uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
+ } else if (other.fullName == nullptr) {
+ fullName = nullptr;
+ } else {
+ fullName = uprv_strdup(other.fullName);
+ if (fullName == nullptr) return *this;
+ }
+
+ if (other.baseName == other.fullName) {
+ baseName = fullName;
+ } else if (other.baseName != nullptr) {
+ baseName = uprv_strdup(other.baseName);
+ if (baseName == nullptr) return *this;
+ }
+
+ uprv_strcpy(language, other.language);
+ uprv_strcpy(script, other.script);
+ uprv_strcpy(country, other.country);
+
+ variantBegin = other.variantBegin;
+ fIsBogus = other.fIsBogus;
+
+ return *this;
+}
+
+Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
+ if (baseName != fullName) uprv_free(baseName);
+ if (fullName != fullNameBuffer) uprv_free(fullName);
+
+ if (other.fullName == other.fullNameBuffer) {
+ uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
+ fullName = fullNameBuffer;
+ } else {
+ fullName = other.fullName;
+ }
+
+ if (other.baseName == other.fullName) {
+ baseName = fullName;
+ } else {
+ baseName = other.baseName;
+ }
+
+ uprv_strcpy(language, other.language);
+ uprv_strcpy(script, other.script);
+ uprv_strcpy(country, other.country);
+
+ variantBegin = other.variantBegin;
+ fIsBogus = other.fIsBogus;
+
+ other.baseName = other.fullName = other.fullNameBuffer;
+
+ return *this;
+}
+
+Locale *
+Locale::clone() const {
+ return new Locale(*this);
+}
+
+UBool
+Locale::operator==( const Locale& other) const
+{
+ return (uprv_strcmp(other.fullName, fullName) == 0);
+}
+
+namespace {
+
+UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER;
+UHashtable *gKnownCanonicalized = nullptr;
+
+static const char* const KNOWN_CANONICALIZED[] = {
+ "c",
+ // Commonly used locales known are already canonicalized
+ "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
+ "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
+ "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
+ "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
+ "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
+ "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
+ "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
+ "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
+ "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
+ "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
+ "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
+ "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
+ "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
+ "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
+ "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
+ "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
+ "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
+ "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
+};
+
+static UBool U_CALLCONV cleanupKnownCanonicalized() {
+ gKnownCanonicalizedInitOnce.reset();
+ if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
+ return TRUE;
+}
+
+static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
+ cleanupKnownCanonicalized);
+ LocalUHashtablePointer newKnownCanonicalizedMap(
+ uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
+ for (int32_t i = 0;
+ U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ i++) {
+ uhash_puti(newKnownCanonicalizedMap.getAlias(),
+ (void*)KNOWN_CANONICALIZED[i],
+ 1, &status);
+ }
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
+}
+
+class AliasData;
+
+/**
+ * A Builder class to build the alias data.
+ */
+class AliasDataBuilder {
+public:
+ AliasDataBuilder() {
+ }
+
+ // Build the AliasData from resource.
+ AliasData* build(UErrorCode &status);
+
+private:
+ void readAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status);
+
+ // Read the languageAlias data from alias to
+ // strings+types+replacementIndexes
+ // The number of record will be stored into length.
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readLanguageAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status);
+
+ // Read the scriptAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readScriptAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the territoryAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement script.
+ void readTerritoryAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+
+ // Read the variantAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readVariantAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length, UErrorCode &status);
+};
+
+/**
+ * A class to hold the Alias Data.
+ */
+class AliasData : public UMemory {
+public:
+ static const AliasData* singleton(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ // Do not get into loadData if the status already has error.
+ return nullptr;
+ }
+ umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
+ return gSingleton;
+ }
+
+ const CharStringMap& languageMap() const { return language; }
+ const CharStringMap& scriptMap() const { return script; }
+ const CharStringMap& territoryMap() const { return territory; }
+ const CharStringMap& variantMap() const { return variant; }
+
+ static void U_CALLCONV loadData(UErrorCode &status);
+ static UBool U_CALLCONV cleanup();
+
+ static UInitOnce gInitOnce;
+
+private:
+ AliasData(CharStringMap languageMap,
+ CharStringMap scriptMap,
+ CharStringMap territoryMap,
+ CharStringMap variantMap,
+ CharString* strings)
+ : language(std::move(languageMap)),
+ script(std::move(scriptMap)),
+ territory(std::move(territoryMap)),
+ variant(std::move(variantMap)),
+ strings(strings) {
+ }
+
+ ~AliasData() {
+ delete strings;
+ }
+
+ static const AliasData* gSingleton;
+
+ CharStringMap language;
+ CharStringMap script;
+ CharStringMap territory;
+ CharStringMap variant;
+ CharString* strings;
+
+ friend class AliasDataBuilder;
+};
+
+
+const AliasData* AliasData::gSingleton = nullptr;
+UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV
+AliasData::cleanup()
+{
+ gInitOnce.reset();
+ delete gSingleton;
+ return TRUE;
+}
+
+void
+AliasDataBuilder::readAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ void (*checkType)(const char* type),
+ void (*checkReplacement)(const UnicodeString& replacement),
+ UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ length = ures_getSize(alias);
+ const char** rawTypes = types.allocateInsteadAndCopy(length);
+ if (rawTypes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
+ if (rawIndexes == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int i = 0;
+ while (ures_hasNext(alias)) {
+ LocalUResourceBundlePointer res(
+ ures_getNextResource(alias, nullptr, &status));
+ const char* aliasFrom = ures_getKey(res.getAlias());
+ UnicodeString aliasTo =
+ ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
+
+ checkType(aliasFrom);
+ checkReplacement(aliasTo);
+
+ rawTypes[i] = aliasFrom;
+ rawIndexes[i] = strings->add(aliasTo, status);
+ i++;
+ }
+}
+
+/**
+ * Read the languageAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement language.
+ */
+void
+AliasDataBuilder::readLanguageAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ // Assert the aliasFrom only contains the following possibilties
+ // language_REGION_variant
+ // language_REGION
+ // language_variant
+ // language
+ // und_variant
+ Locale test(type);
+ // Assert no script in aliasFrom
+ U_ASSERT(test.getScript()[0] == '\0');
+ // Assert when language is und, no REGION in aliasFrom.
+ U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) {}, status);
+}
+
+/**
+ * Read the scriptAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement script.
+ */
+void
+AliasDataBuilder::readScriptAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 4);
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() == 4);
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Read the territoryAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readTerritoryAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
+ * Read the variantAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement variant.
+ */
+void
+AliasDataBuilder::readVariantAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory& types,
+ LocalMemory& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
+ U_ASSERT(uprv_strlen(type) != 4 ||
+ (type[0] >= '0' && type[0] <= '9'));
+ },
+ [](const UnicodeString& replacement) {
+ U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
+ U_ASSERT(replacement.length() != 4 ||
+ (replacement.charAt(0) >= u'0' &&
+ replacement.charAt(0) <= u'9'));
+ },
+#else
+ [](const char*) {},
+ [](const UnicodeString&) { },
+#endif
+ status);
+}
+
+/**
+ * Initializes the alias data from the ICU resource bundles. The alias data
+ * contains alias of language, country, script and variants.
+ *
+ * If the alias data has already loaded, then this method simply returns without
+ * doing anything meaningful.
+ */
+void U_CALLCONV
+AliasData::loadData(UErrorCode &status)
+{
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate start = uprv_getRawUTCtime();
+#endif // LOCALE_CANONICALIZATION_DEBUG
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
+ AliasDataBuilder builder;
+ gSingleton = builder.build(status);
+#ifdef LOCALE_CANONICALIZATION_DEBUG
+ UDate end = uprv_getRawUTCtime();
+ printf("AliasData::loadData took total %f ms\n", end - start);
+#endif // LOCALE_CANONICALIZATION_DEBUG
+}
+
+/**
+ * Build the alias data from resources.
+ */
+AliasData*
+AliasDataBuilder::build(UErrorCode &status) {
+ LocalUResourceBundlePointer metadata(
+ ures_openDirect(nullptr, "metadata", &status));
+ LocalUResourceBundlePointer metadataAlias(
+ ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
+ LocalUResourceBundlePointer languageAlias(
+ ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
+ LocalUResourceBundlePointer scriptAlias(
+ ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
+ LocalUResourceBundlePointer territoryAlias(
+ ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
+ LocalUResourceBundlePointer variantAlias(
+ ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
+ variantLength = 0;
+
+ // Read the languageAlias into languageTypes, languageReplacementIndexes
+ // and strings
+ UniqueCharStrings strings(status);
+ LocalMemory languageTypes;
+ LocalMemory languageReplacementIndexes;
+ readLanguageAlias(languageAlias.getAlias(),
+ &strings,
+ languageTypes,
+ languageReplacementIndexes,
+ languagesLength,
+ status);
+
+ // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
+ // and strings
+ LocalMemory scriptTypes;
+ LocalMemory scriptReplacementIndexes;
+ readScriptAlias(scriptAlias.getAlias(),
+ &strings,
+ scriptTypes,
+ scriptReplacementIndexes,
+ scriptLength,
+ status);
+
+ // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
+ // and strings
+ LocalMemory territoryTypes;
+ LocalMemory territoryReplacementIndexes;
+ readTerritoryAlias(territoryAlias.getAlias(),
+ &strings,
+ territoryTypes,
+ territoryReplacementIndexes,
+ territoryLength, status);
+
+ // Read the variantAlias into variantTypes, variantReplacementIndexes
+ // and strings
+ LocalMemory variantTypes;
+ LocalMemory variantReplacementIndexes;
+ readVariantAlias(variantAlias.getAlias(),
+ &strings,
+ variantTypes,
+ variantReplacementIndexes,
+ variantLength, status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // We can only use strings after freeze it.
+ strings.freeze();
+
+ // Build the languageMap from languageTypes & languageReplacementIndexes
+ CharStringMap languageMap(490, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
+ languageMap.put(languageTypes[i],
+ strings.get(languageReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the scriptMap from scriptTypes & scriptReplacementIndexes
+ CharStringMap scriptMap(1, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
+ scriptMap.put(scriptTypes[i],
+ strings.get(scriptReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the territoryMap from territoryTypes & territoryReplacementIndexes
+ CharStringMap territoryMap(650, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
+ territoryMap.put(territoryTypes[i],
+ strings.get(territoryReplacementIndexes[i]),
+ status);
+ }
+
+ // Build the variantMap from variantTypes & variantReplacementIndexes.
+ CharStringMap variantMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
+ variantMap.put(variantTypes[i],
+ strings.get(variantReplacementIndexes[i]),
+ status);
+ }
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ // copy hashtables
+ auto *data = new AliasData(
+ std::move(languageMap),
+ std::move(scriptMap),
+ std::move(territoryMap),
+ std::move(variantMap),
+ strings.orphanCharStrings());
+
+ if (data == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return data;
+}
+
+/**
+ * A class that find the replacement values of locale fields by using AliasData.
+ */
+class AliasReplacer {
+public:
+ AliasReplacer(UErrorCode status) :
+ language(nullptr), script(nullptr), region(nullptr),
+ extensions(nullptr), variants(status),
+ data(nullptr) {
+ }
+ ~AliasReplacer() {
+ }
+
+ // Check the fields inside locale, if need to replace fields,
+ // place the the replaced locale ID in out and return true.
+ // Otherwise return false for no replacement or error.
+ bool replace(
+ const Locale& locale, CharString& out, UErrorCode status);
+
+private:
+ const char* language;
+ const char* script;
+ const char* region;
+ const char* extensions;
+ UVector variants;
+
+ const AliasData* data;
+
+ inline bool notEmpty(const char* str) {
+ return str && str[0] != NULL_CHAR;
+ }
+
+ /**
+ * If replacement is neither null nor empty and input is either null or empty,
+ * return replacement.
+ * If replacement is neither null nor empty but input is not empty, return input.
+ * If replacement is either null or empty and type is either null or empty,
+ * return input.
+ * Otherwise return null.
+ * replacement input type return
+ * AAA nullptr * AAA
+ * AAA BBB * BBB
+ * nullptr || "" CCC nullptr CCC
+ * nullptr || "" * DDD nullptr
+ */
+ inline const char* deleteOrReplace(
+ const char* input, const char* type, const char* replacement) {
+ return notEmpty(replacement) ?
+ ((input == nullptr) ? replacement : input) :
+ ((type == nullptr) ? input : nullptr);
+ }
+
+ inline bool same(const char* a, const char* b) {
+ if (a == nullptr && b == nullptr) {
+ return true;
+ }
+ if ((a == nullptr && b != nullptr) ||
+ (a != nullptr && b == nullptr)) {
+ return false;
+ }
+ return uprv_strcmp(a, b) == 0;
+ }
+
+ // Gather fields and generate locale ID into out.
+ CharString& outputToString(CharString& out, UErrorCode status);
+
+ // Generate the lookup key.
+ CharString& generateKey(const char* language, const char* region,
+ const char* variant, CharString& out,
+ UErrorCode status);
+
+ void parseLanguageReplacement(const char* replacement,
+ const char*& replaceLanguage,
+ const char*& replaceScript,
+ const char*& replaceRegion,
+ const char*& replaceVariant,
+ const char*& replaceExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using languageAlias.
+ bool replaceLanguage(bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed,
+ UErrorCode& status);
+
+ // Replace by using territoryAlias.
+ bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
+
+ // Replace by using scriptAlias.
+ bool replaceScript(UErrorCode& status);
+
+ // Replace by using variantAlias.
+ bool replaceVariant(UErrorCode& status);
+};
+
+CharString&
+AliasReplacer::generateKey(
+ const char* language, const char* region, const char* variant,
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (notEmpty(variant)) {
+ out.append(SEP_CHAR, status)
+ .append(variant, status);
+ }
+ return out;
+}
+
+void
+AliasReplacer::parseLanguageReplacement(
+ const char* replacement,
+ const char*& replacedLanguage,
+ const char*& replacedScript,
+ const char*& replacedRegion,
+ const char*& replacedVariant,
+ const char*& replacedExtensions,
+ UVector& toBeFreed,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ replacedScript = replacedRegion = replacedVariant
+ = replacedExtensions = nullptr;
+ if (uprv_strchr(replacement, '_') == nullptr) {
+ replacedLanguage = replacement;
+ // reach the end, just return it.
+ return;
+ }
+ // We have multiple field so we have to allocate and parse
+ CharString* str = new CharString(
+ replacement, (int32_t)uprv_strlen(replacement), status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (str == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ toBeFreed.addElement(str, status);
+ char* data = str->data();
+ replacedLanguage = (const char*) data;
+ char* endOfField = uprv_strchr(data, '_');
+ *endOfField = '\0'; // null terminiate it.
+ endOfField++;
+ const char* start = endOfField;
+ endOfField = (char*) uprv_strchr(start, '_');
+ size_t len = 0;
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ if (len == 4 && uprv_isASCIILetter(*start)) {
+ // Got a script
+ replacedScript = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 2 && len <= 3) {
+ // Got a region
+ replacedRegion = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ endOfField = (char*)uprv_strchr(start, '_');
+ if (endOfField == nullptr) {
+ len = uprv_strlen(start);
+ } else {
+ len = endOfField - start;
+ *endOfField = '\0'; // null terminiate it.
+ }
+ }
+ if (len >= 4) {
+ // Got a variant
+ replacedVariant = start;
+ if (endOfField == nullptr) {
+ return;
+ }
+ start = endOfField++;
+ }
+ replacedExtensions = start;
+}
+
+bool
+AliasReplacer::replaceLanguage(
+ bool checkLanguage, bool checkRegion,
+ bool checkVariants, UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if ( (checkRegion && region == nullptr) ||
+ (checkVariants && variants.size() == 0)) {
+ // Nothing to search.
+ return false;
+ }
+ int32_t variant_size = checkVariants ? variants.size() : 1;
+ // Since we may have more than one variant, we need to loop through them.
+ const char* searchLanguage = checkLanguage ? language : "und";
+ const char* searchRegion = checkRegion ? region : nullptr;
+ const char* searchVariant = nullptr;
+ for (int32_t variant_index = 0;
+ variant_index < variant_size;
+ variant_index++) {
+ if (checkVariants) {
+ U_ASSERT(variant_index < variant_size);
+ searchVariant = (const char*)(variants.elementAt(variant_index));
+ }
+
+ if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
+ // Do not consider ill-formed variant subtag.
+ searchVariant = nullptr;
+ }
+ CharString typeKey;
+ generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
+ status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->languageMap().get(typeKey.data());
+ if (replacement == nullptr) {
+ // Found no replacement data.
+ continue;
+ }
+
+ const char* replacedLanguage = nullptr;
+ const char* replacedScript = nullptr;
+ const char* replacedRegion = nullptr;
+ const char* replacedVariant = nullptr;
+ const char* replacedExtensions = nullptr;
+ parseLanguageReplacement(replacement,
+ replacedLanguage,
+ replacedScript,
+ replacedRegion,
+ replacedVariant,
+ replacedExtensions,
+ toBeFreed,
+ status);
+ replacedLanguage =
+ (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ?
+ language : replacedLanguage;
+ replacedScript = deleteOrReplace(script, nullptr, replacedScript);
+ replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
+ replacedVariant = deleteOrReplace(
+ searchVariant, searchVariant, replacedVariant);
+
+ if ( same(language, replacedLanguage) &&
+ same(script, replacedScript) &&
+ same(region, replacedRegion) &&
+ same(searchVariant, replacedVariant) &&
+ replacedExtensions == nullptr) {
+ // Replacement produce no changes.
+ continue;
+ }
+
+ language = replacedLanguage;
+ region = replacedRegion;
+ script = replacedScript;
+ if (searchVariant != nullptr) {
+ if (notEmpty(replacedVariant)) {
+ variants.setElementAt((void*)replacedVariant, variant_index);
+ } else {
+ variants.removeElementAt(variant_index);
+ }
+ }
+ if (replacedExtensions != nullptr) {
+ // TODO(ICU-21292)
+ // DO NOTHING
+ // UTS35 does not specifiy what should we do if we have extensions in the
+ // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
+ // extensions in them languageAlias:
+ // i_default => en_x_i_default
+ // i_enochian => und_x_i_enochian
+ // i_mingo => see_x_i_mingo
+ // zh_min => nan_x_zh_min
+ // But all of them are already changed by code inside ultag_parse() before
+ // hitting this code.
+ }
+
+ // Something changed by language alias data.
+ return true;
+ }
+ // Nothing changed by language alias data.
+ return false;
+}
+
+bool
+AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (region == nullptr) {
+ // No region to search.
+ return false;
+ }
+ const char *replacement = data->territoryMap().get(region);
+ if (replacement == nullptr) {
+ // Found no replacement data for this region.
+ return false;
+ }
+ const char* replacedRegion = replacement;
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ if (firstSpace != nullptr) {
+ // If there are are more than one region in the replacement.
+ // We need to check which one match based on the language.
+ // Cannot use nullptr for language because that will construct
+ // the default locale, in that case, use "und" to get the correct
+ // locale.
+ Locale l(language == nullptr ? "und" : language, nullptr, script);
+ l.addLikelySubtags(status);
+ const char* likelyRegion = l.getCountry();
+ CharString* item = nullptr;
+ if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
+ size_t len = uprv_strlen(likelyRegion);
+ const char* foundInReplacement = uprv_strstr(replacement,
+ likelyRegion);
+ if (foundInReplacement != nullptr) {
+ // Assuming the case there are no three letter region code in
+ // the replacement of territoryAlias
+ U_ASSERT(foundInReplacement == replacement ||
+ *(foundInReplacement-1) == ' ');
+ U_ASSERT(foundInReplacement[len] == ' ' ||
+ foundInReplacement[len] == '\0');
+ item = new CharString(foundInReplacement, (int32_t)len, status);
+ }
+ }
+ if (item == nullptr) {
+ item = new CharString(replacement,
+ (int32_t)(firstSpace - replacement), status);
+ }
+ if (U_FAILURE(status)) { return false; }
+ if (item == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return false;
+ }
+ replacedRegion = item->data();
+ toBeFreed.addElement(item, status);
+ }
+ U_ASSERT(!same(region, replacedRegion));
+ region = replacedRegion;
+ // The region is changed by data in territory alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceScript(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ if (script == nullptr) {
+ // No script to search.
+ return false;
+ }
+ const char *replacement = data->scriptMap().get(script);
+ if (replacement == nullptr) {
+ // Found no replacement data for this script.
+ return false;
+ }
+ U_ASSERT(!same(script, replacement));
+ script = replacement;
+ // The script is changed by data in script alias.
+ return true;
+}
+
+bool
+AliasReplacer::replaceVariant(UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // Since we may have more than one variant, we need to loop through them.
+ for (int32_t i = 0; i < variants.size(); i++) {
+ const char *variant = (const char*)(variants.elementAt(i));
+ const char *replacement = data->variantMap().get(variant);
+ if (replacement == nullptr) {
+ // Found no replacement data for this variant.
+ continue;
+ }
+ U_ASSERT((uprv_strlen(replacement) >= 5 &&
+ uprv_strlen(replacement) <= 8) ||
+ (uprv_strlen(replacement) == 4 &&
+ replacement[0] >= '0' &&
+ replacement[0] <= '9'));
+ if (!same(variant, replacement)) {
+ variants.setElementAt((void*)replacement, i);
+ // Special hack to handle hepburn-heploc => alalc97
+ if (uprv_strcmp(variant, "heploc") == 0) {
+ for (int32_t j = 0; j < variants.size(); j++) {
+ if (uprv_strcmp((const char*)(variants.elementAt(j)),
+ "hepburn") == 0) {
+ variants.removeElementAt(j);
+ }
+ }
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+CharString&
+AliasReplacer::outputToString(
+ CharString& out, UErrorCode status)
+{
+ out.append(language, status);
+ if (notEmpty(script)) {
+ out.append(SEP_CHAR, status)
+ .append(script, status);
+ }
+ if (notEmpty(region)) {
+ out.append(SEP_CHAR, status)
+ .append(region, status);
+ }
+ if (variants.size() > 0) {
+ if (!notEmpty(script) && !notEmpty(region)) {
+ out.append(SEP_CHAR, status);
+ }
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+ int32_t variantsStart = out.length();
+ for (int32_t i = 0; i < variants.size(); i++) {
+ out.append(SEP_CHAR, status)
+ .append((const char*)((UVector*)variants.elementAt(i)),
+ status);
+ }
+ T_CString_toUpperCase(out.data() + variantsStart);
+ }
+ if (notEmpty(extensions)) {
+ CharString tmp("und_", status);
+ tmp.append(extensions, status);
+ Locale tmpLocale(tmp.data());
+ // only support x extension inside CLDR for now.
+ U_ASSERT(extensions[0] == 'x');
+ out.append(tmpLocale.getName() + 1, status);
+ }
+ return out;
+}
+
+bool
+AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
+{
+ data = AliasData::singleton(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ U_ASSERT(data != nullptr);
+ out.clear();
+ language = locale.getLanguage();
+ if (!notEmpty(language)) {
+ language = nullptr;
+ }
+ script = locale.getScript();
+ if (!notEmpty(script)) {
+ script = nullptr;
+ }
+ region = locale.getCountry();
+ if (!notEmpty(region)) {
+ region = nullptr;
+ }
+ const char* variantsStr = locale.getVariant();
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ CharString variantsBuff(variantsStr, -1, status);
+ if (!variantsBuff.isEmpty()) {
+ if (U_FAILURE(status)) { return false; }
+ char* start = variantsBuff.data();
+ T_CString_toLowerCase(start);
+ char* end;
+ while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
+ U_SUCCESS(status)) {
+ *end = NULL_CHAR; // null terminate inside variantsBuff
+ variants.addElement(start, status);
+ start = end + 1;
+ }
+ variants.addElement(start, status);
+ }
+ if (U_FAILURE(status)) { return false; }
+
+ // Sort the variants
+ variants.sort([](UElement e1, UElement e2) -> int8_t {
+ return uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ }, status);
+
+ // A changed count to assert when loop too many times.
+ int changed = 0;
+ // A UVector to to hold CharString allocated by the replace* method
+ // and freed when out of scope from his function.
+ UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); },
+ nullptr, 10, status);
+ while (U_SUCCESS(status)) {
+ // Something wrong with the data cause looping here more than 10 times
+ // already.
+ U_ASSERT(changed < 5);
+ // From observation of key in data/misc/metadata.txt
+ // we know currently we only need to search in the following combination
+ // of fields for type in languageAlias:
+ // * lang_region_variant
+ // * lang_region
+ // * lang_variant
+ // * lang
+ // * und_variant
+ // This assumption is ensured by the U_ASSERT in readLanguageAlias
+ //
+ // lang REGION variant
+ if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, true, false, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, true, stringsToBeFreed, status) ||
+ replaceLanguage(true, false, false, stringsToBeFreed, status) ||
+ replaceLanguage(false,false, true, stringsToBeFreed, status) ||
+ replaceTerritory(stringsToBeFreed, status) ||
+ replaceScript(status) ||
+ replaceVariant(status)) {
+ // Some values in data is changed, try to match from the beginning
+ // again.
+ changed++;
+ continue;
+ }
+ // Nothing changed. Break out.
+ break;
+ } // while(1)
+
+ if (U_FAILURE(status)) { return false; }
+ // Nothing changed and we know the order of the vaiants are not change
+ // because we have no variant or only one.
+ if (changed == 0 && variants.size() <= 1) {
+ return false;
+ }
+ outputToString(out, status);
+ if (extensionsStr != nullptr) {
+ out.append(extensionsStr, status);
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ // If the tag is not changed, return.
+ if (uprv_strcmp(out.data(), locale.getName()) == 0) {
+ U_ASSERT(changed == 0);
+ U_ASSERT(variants.size() > 1);
+ out.clear();
+ return false;
+ }
+ return true;
+}
+
+// Return true if the locale is changed during canonicalization.
+// The replaced value then will be put into out.
+bool
+canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
+{
+ AliasReplacer replacer(status);
+ return replacer.replace(locale, out, status);
+}
+
+// Function to optimize for known cases without so we can skip the loading
+// of resources in the startup time until we really need it.
+bool
+isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
+{
+ if ( uprv_strcmp(locale, "c") == 0 ||
+ uprv_strcmp(locale, "en") == 0 ||
+ uprv_strcmp(locale, "en_US") == 0) {
+ return true;
+ }
+
+ // common well-known Canonicalized.
+ umtx_initOnce(gKnownCanonicalizedInitOnce,
+ &loadKnownCanonicalized, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ U_ASSERT(gKnownCanonicalized != nullptr);
+ return uhash_geti(gKnownCanonicalized, locale) != 0;
+}
+
+} // namespace
+
+// Function for testing.
+U_CAPI const char* const*
+ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
+{
+ *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+ return KNOWN_CANONICALIZED;
+}
+
+// Function for testing.
+U_CAPI bool
+ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
+{
+ Locale l(localeName);
+ UErrorCode status = U_ZERO_ERROR;
+ CharString temp;
+ return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
+}
+
+/*This function initializes a Locale from a C locale ID*/
+Locale& Locale::init(const char* localeID, UBool canonicalize)
+{
+ fIsBogus = FALSE;
+ /* Free our current storage */
+ if (baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ if(fullName != fullNameBuffer) {
+ uprv_free(fullName);
+ fullName = fullNameBuffer;
+ }
+
+ // not a loop:
+ // just an easy way to have a common error-exit
+ // without goto and without another function
+ do {
+ char *separator;
+ char *field[5] = {0};
+ int32_t fieldLen[5] = {0};
+ int32_t fieldIdx;
+ int32_t variantField;
+ int32_t length;
+ UErrorCode err;
+
+ if(localeID == NULL) {
+ // not an error, just set the default locale
+ return *this = getDefault();
+ }
+
+ /* preset all fields to empty */
+ language[0] = script[0] = country[0] = 0;
+
+ // "canonicalize" the locale ID to ICU/Java format
+ err = U_ZERO_ERROR;
+ length = canonicalize ?
+ uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
+ uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
+
+ if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
+ /*Go to heap for the fullName if necessary*/
+ fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
+ if(fullName == 0) {
+ fullName = fullNameBuffer;
+ break; // error: out of memory
+ }
+ err = U_ZERO_ERROR;
+ length = canonicalize ?
+ uloc_canonicalize(localeID, fullName, length+1, &err) :
+ uloc_getName(localeID, fullName, length+1, &err);
+ }
+ if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
+ /* should never occur */
+ break;
+ }
+
+ variantBegin = length;
+
+ /* after uloc_getName/canonicalize() we know that only '_' are separators */
+ /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
+ separator = field[0] = fullName;
+ fieldIdx = 1;
+ char* at = uprv_strchr(fullName, '@');
+ while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
+ fieldIdx < UPRV_LENGTHOF(field)-1 &&
+ (at == nullptr || separator < at)) {
+ field[fieldIdx] = separator + 1;
+ fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
+ fieldIdx++;
+ }
+ // variant may contain @foo or .foo POSIX cruft; remove it
+ separator = uprv_strchr(field[fieldIdx-1], '@');
+ char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
+ if (separator!=NULL || sep2!=NULL) {
+ if (separator==NULL || (sep2!=NULL && separator > sep2)) {
+ separator = sep2;
+ }
+ fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
+ } else {
+ fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
+ }
+
+ if (fieldLen[0] >= (int32_t)(sizeof(language)))
+ {
+ break; // error: the language field is too long
+ }
+
+ variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
+ if (fieldLen[0] > 0) {
+ /* We have a language */
+ uprv_memcpy(language, fullName, fieldLen[0]);
+ language[fieldLen[0]] = 0;
+ }
+ if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
+ uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
+ uprv_isASCIILetter(field[1][3])) {
+ /* We have at least a script */
+ uprv_memcpy(script, field[1], fieldLen[1]);
+ script[fieldLen[1]] = 0;
+ variantField++;
+ }
+
+ if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
+ /* We have a country */
+ uprv_memcpy(country, field[variantField], fieldLen[variantField]);
+ country[fieldLen[variantField]] = 0;
+ variantField++;
+ } else if (fieldLen[variantField] == 0) {
+ variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
+ }
+
+ if (fieldLen[variantField] > 0) {
+ /* We have a variant */
+ variantBegin = (int32_t)(field[variantField] - fullName);
+ }
+
+ err = U_ZERO_ERROR;
+ initBaseName(err);
+ if (U_FAILURE(err)) {
+ break;
+ }
+
+ if (canonicalize) {
+ if (!isKnownCanonicalizedLocale(fullName, err)) {
+ CharString replaced;
+ // Not sure it is already canonicalized
+ if (canonicalizeLocale(*this, replaced, err)) {
+ U_ASSERT(U_SUCCESS(err));
+ // If need replacement, call init again.
+ init(replaced.data(), false);
+ }
+ if (U_FAILURE(err)) {
+ break;
+ }
+ }
+ } // if (canonicalize) {
+
+ // successful end of init()
+ return *this;
+ } while(0); /*loop doesn't iterate*/
+
+ // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
+ setToBogus();
+
+ return *this;
+}
+
+/*
+ * Set up the base name.
+ * If there are no key words, it's exactly the full name.
+ * If key words exist, it's the full name truncated at the '@' character.
+ * Need to set up both at init() and after setting a keyword.
+ */
+void
+Locale::initBaseName(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ U_ASSERT(baseName==NULL || baseName==fullName);
+ const char *atPtr = uprv_strchr(fullName, '@');
+ const char *eqPtr = uprv_strchr(fullName, '=');
+ if (atPtr && eqPtr && atPtr < eqPtr) {
+ // Key words exist.
+ int32_t baseNameLength = (int32_t)(atPtr - fullName);
+ baseName = (char *)uprv_malloc(baseNameLength + 1);
+ if (baseName == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strncpy(baseName, fullName, baseNameLength);
+ baseName[baseNameLength] = 0;
+
+ // The original computation of variantBegin leaves it equal to the length
+ // of fullName if there is no variant. It should instead be
+ // the length of the baseName.
+ if (variantBegin > baseNameLength) {
+ variantBegin = baseNameLength;
+ }
+ } else {
+ baseName = fullName;
+ }
+}
+
+
+int32_t
+Locale::hashCode() const
+{
+ return ustr_hashCharsN(fullName, static_cast(uprv_strlen(fullName)));
+}
+
+void
+Locale::setToBogus() {
+ /* Free our current storage */
+ if(baseName != fullName) {
+ uprv_free(baseName);
+ }
+ baseName = NULL;
+ if(fullName != fullNameBuffer) {
+ uprv_free(fullName);
+ fullName = fullNameBuffer;
+ }
+ *fullNameBuffer = 0;
+ *language = 0;
+ *script = 0;
+ *country = 0;
+ fIsBogus = TRUE;
+ variantBegin = 0;
+}
+
+const Locale& U_EXPORT2
+Locale::getDefault()
+{
+ {
+ Mutex lock(&gDefaultLocaleMutex);
+ if (gDefaultLocale != NULL) {
+ return *gDefaultLocale;
+ }
+ }
+ UErrorCode status = U_ZERO_ERROR;
+ return *locale_set_default_internal(NULL, status);
+}
+
+
+
+void U_EXPORT2
+Locale::setDefault( const Locale& newLocale,
+ UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ /* Set the default from the full name string of the supplied locale.
+ * This is a convenient way to access the default locale caching mechanisms.
+ */
+ const char *localeID = newLocale.getName();
+ locale_set_default_internal(localeID, status);
+}
+
+void
+Locale::addLikelySubtags(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ CharString maximizedLocaleID;
+ {
+ CharStringByteSink sink(&maximizedLocaleID);
+ ulocimp_addLikelySubtags(fullName, sink, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+void
+Locale::minimizeSubtags(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ CharString minimizedLocaleID;
+ {
+ CharStringByteSink sink(&minimizedLocaleID);
+ ulocimp_minimizeSubtags(fullName, sink, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+void
+Locale::canonicalize(UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ CharString uncanonicalized(fullName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ init(uncanonicalized.data(), /*canonicalize=*/TRUE);
+ if (isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+Locale U_EXPORT2
+Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
+{
+ Locale result(Locale::eBOGUS);
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ // If a BCP 47 language tag is passed as the language parameter to the
+ // normal Locale constructor, it will actually fall back to invoking
+ // uloc_forLanguageTag() to parse it if it somehow is able to detect that
+ // the string actually is BCP 47. This works well for things like strings
+ // using BCP 47 extensions, but it does not at all work for things like
+ // legacy language tags (marked as “Type: grandfathered” in BCP 47,
+ // e.g., "en-GB-oed") which are possible to also
+ // interpret as ICU locale IDs and because of that won't trigger the BCP 47
+ // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
+ // and then Locale::init(), instead of just calling the normal constructor.
+
+ CharString localeID;
+ int32_t parsedLength;
+ {
+ CharStringByteSink sink(&localeID);
+ ulocimp_forLanguageTag(
+ tag.data(),
+ tag.length(),
+ sink,
+ &parsedLength,
+ &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ if (parsedLength != tag.size()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+
+ result.init(localeID.data(), /*canonicalize=*/FALSE);
+ if (result.isBogus()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return result;
+}
+
+void
+Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (fIsBogus) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
+}
+
+Locale U_EXPORT2
+Locale::createFromName (const char *name)
+{
+ if (name) {
+ Locale l("");
+ l.init(name, FALSE);
+ return l;
+ }
+ else {
+ return getDefault();
+ }
+}
+
+Locale U_EXPORT2
+Locale::createCanonical(const char* name) {
+ Locale loc("");
+ loc.init(name, TRUE);
+ return loc;
+}
+
+const char *
+Locale::getISO3Language() const
+{
+ return uloc_getISO3Language(fullName);
+}
+
+
+const char *
+Locale::getISO3Country() const
+{
+ return uloc_getISO3Country(fullName);
+}
+
+/**
+ * Return the LCID value as specified in the "LocaleID" resource for this
+ * locale. The LocaleID must be expressed as a hexadecimal number, from
+ * one to four digits. If the LocaleID resource is not present, or is
+ * in an incorrect format, 0 is returned. The LocaleID is for use in
+ * Windows (it is an LCID), but is available on all platforms.
+ */
+uint32_t
+Locale::getLCID() const
+{
+ return uloc_getLCID(fullName);
+}
+
+const char* const* U_EXPORT2 Locale::getISOCountries()
+{
+ return uloc_getISOCountries();
+}
+
+const char* const* U_EXPORT2 Locale::getISOLanguages()
+{
+ return uloc_getISOLanguages();
+}
+
+// Set the locale's data based on a posix id.
+void Locale::setFromPOSIXID(const char *posixID)
+{
+ init(posixID, TRUE);
+}
+
+const Locale & U_EXPORT2
+Locale::getRoot(void)
+{
+ return getLocale(eROOT);
+}
+
+const Locale & U_EXPORT2
+Locale::getEnglish(void)
+{
+ return getLocale(eENGLISH);
+}
+
+const Locale & U_EXPORT2
+Locale::getFrench(void)
+{
+ return getLocale(eFRENCH);
+}
+
+const Locale & U_EXPORT2
+Locale::getGerman(void)
+{
+ return getLocale(eGERMAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getItalian(void)
+{
+ return getLocale(eITALIAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getJapanese(void)
+{
+ return getLocale(eJAPANESE);
+}
+
+const Locale & U_EXPORT2
+Locale::getKorean(void)
+{
+ return getLocale(eKOREAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getChinese(void)
+{
+ return getLocale(eCHINESE);
+}
+
+const Locale & U_EXPORT2
+Locale::getSimplifiedChinese(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getTraditionalChinese(void)
+{
+ return getLocale(eTAIWAN);
+}
+
+
+const Locale & U_EXPORT2
+Locale::getFrance(void)
+{
+ return getLocale(eFRANCE);
+}
+
+const Locale & U_EXPORT2
+Locale::getGermany(void)
+{
+ return getLocale(eGERMANY);
+}
+
+const Locale & U_EXPORT2
+Locale::getItaly(void)
+{
+ return getLocale(eITALY);
+}
+
+const Locale & U_EXPORT2
+Locale::getJapan(void)
+{
+ return getLocale(eJAPAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getKorea(void)
+{
+ return getLocale(eKOREA);
+}
+
+const Locale & U_EXPORT2
+Locale::getChina(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getPRC(void)
+{
+ return getLocale(eCHINA);
+}
+
+const Locale & U_EXPORT2
+Locale::getTaiwan(void)
+{
+ return getLocale(eTAIWAN);
+}
+
+const Locale & U_EXPORT2
+Locale::getUK(void)
+{
+ return getLocale(eUK);
+}
+
+const Locale & U_EXPORT2
+Locale::getUS(void)
+{
+ return getLocale(eUS);
+}
+
+const Locale & U_EXPORT2
+Locale::getCanada(void)
+{
+ return getLocale(eCANADA);
+}
+
+const Locale & U_EXPORT2
+Locale::getCanadaFrench(void)
+{
+ return getLocale(eCANADA_FRENCH);
+}
+
+const Locale &
+Locale::getLocale(int locid)
+{
+ Locale *localeCache = getLocaleCache();
+ U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
+ if (localeCache == NULL) {
+ // Failure allocating the locale cache.
+ // The best we can do is return a NULL reference.
+ locid = 0;
+ }
+ return localeCache[locid]; /*operating on NULL*/
+}
+
+/*
+This function is defined this way in order to get around static
+initialization and static destruction.
+ */
+Locale *
+Locale::getLocaleCache(void)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
+ return gLocaleCache;
+}
+
+class KeywordEnumeration : public StringEnumeration {
+private:
+ char *keywords;
+ char *current;
+ int32_t length;
+ UnicodeString currUSKey;
+ static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
+
+public:
+ static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
+ virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
+public:
+ KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
+ : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
+ if(U_SUCCESS(status) && keywordLen != 0) {
+ if(keys == NULL || keywordLen < 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ keywords = (char *)uprv_malloc(keywordLen+1);
+ if (keywords == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ else {
+ uprv_memcpy(keywords, keys, keywordLen);
+ keywords[keywordLen] = 0;
+ current = keywords + currentIndex;
+ length = keywordLen;
+ }
+ }
+ }
+ }
+
+ virtual ~KeywordEnumeration();
+
+ virtual StringEnumeration * clone() const
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
+ }
+
+ virtual int32_t count(UErrorCode &/*status*/) const {
+ char *kw = keywords;
+ int32_t result = 0;
+ while(*kw) {
+ result++;
+ kw += uprv_strlen(kw)+1;
+ }
+ return result;
+ }
+
+ virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+ const char* result;
+ int32_t len;
+ if(U_SUCCESS(status) && *current != 0) {
+ result = current;
+ len = (int32_t)uprv_strlen(current);
+ current += len+1;
+ if(resultLength != NULL) {
+ *resultLength = len;
+ }
+ } else {
+ if(resultLength != NULL) {
+ *resultLength = 0;
+ }
+ result = NULL;
+ }
+ return result;
+ }
+
+ virtual const UnicodeString* snext(UErrorCode& status) {
+ int32_t resultLength = 0;
+ const char *s = next(&resultLength, status);
+ return setChars(s, resultLength, status);
+ }
+
+ virtual void reset(UErrorCode& /*status*/) {
+ current = keywords;
+ }
+};
+
+const char KeywordEnumeration::fgClassID = '\0';
+
+KeywordEnumeration::~KeywordEnumeration() {
+ uprv_free(keywords);
+}
+
+// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
+// the next() method for each keyword before returning it.
+class UnicodeKeywordEnumeration : public KeywordEnumeration {
+public:
+ using KeywordEnumeration::KeywordEnumeration;
+ virtual ~UnicodeKeywordEnumeration();
+
+ virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+ const char* legacy_key = KeywordEnumeration::next(nullptr, status);
+ if (U_SUCCESS(status) && legacy_key != nullptr) {
+ const char* key = uloc_toUnicodeLocaleKey(legacy_key);
+ if (key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ } else {
+ if (resultLength != nullptr) {
+ *resultLength = static_cast(uprv_strlen(key));
+ }
+ return key;
+ }
+ }
+ if (resultLength != nullptr) *resultLength = 0;
+ return nullptr;
+ }
+};
+
+// Out-of-line virtual destructor to serve as the "key function".
+UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
+
+StringEnumeration *
+Locale::createKeywords(UErrorCode &status) const
+{
+ StringEnumeration *result = NULL;
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ const char* variantStart = uprv_strchr(fullName, '@');
+ const char* assignment = uprv_strchr(fullName, '=');
+ if(variantStart) {
+ if(assignment > variantStart) {
+ CharString keywords;
+ CharStringByteSink sink(&keywords);
+ ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
+ if (U_SUCCESS(status) && !keywords.isEmpty()) {
+ result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
+ if (!result) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ } else {
+ status = U_INVALID_FORMAT_ERROR;
+ }
+ }
+ return result;
+}
+
+StringEnumeration *
+Locale::createUnicodeKeywords(UErrorCode &status) const
+{
+ StringEnumeration *result = NULL;
+
+ if (U_FAILURE(status)) {
+ return result;
+ }
+
+ const char* variantStart = uprv_strchr(fullName, '@');
+ const char* assignment = uprv_strchr(fullName, '=');
+ if(variantStart) {
+ if(assignment > variantStart) {
+ CharString keywords;
+ CharStringByteSink sink(&keywords);
+ ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status);
+ if (U_SUCCESS(status) && !keywords.isEmpty()) {
+ result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
+ if (!result) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ } else {
+ status = U_INVALID_FORMAT_ERROR;
+ }
+ }
+ return result;
+}
+
+int32_t
+Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
+{
+ return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
+}
+
+void
+Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (fIsBogus) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
+}
+
+void
+Locale::getUnicodeKeywordValue(StringPiece keywordName,
+ ByteSink& sink,
+ UErrorCode& status) const {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
+
+ if (legacy_key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ CharString legacy_value;
+ {
+ CharStringByteSink sink(&legacy_value);
+ getKeywordValue(legacy_key, sink, status);
+ }
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* unicode_value = uloc_toUnicodeLocaleType(
+ keywordName_nul.data(), legacy_value.data());
+
+ if (unicode_value == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ sink.Append(unicode_value, static_cast(uprv_strlen(unicode_value)));
+}
+
+void
+Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+ int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
+ int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
+ bufferLength, &status) + 1;
+ /* Handle the case the current buffer is not enough to hold the new id */
+ if (status == U_BUFFER_OVERFLOW_ERROR) {
+ U_ASSERT(newLength > bufferLength);
+ char* newFullName = (char *)uprv_malloc(newLength);
+ if (newFullName == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strcpy(newFullName, fullName);
+ if (fullName != fullNameBuffer) {
+ // if full Name is already on the heap, need to free it.
+ uprv_free(fullName);
+ }
+ fullName = newFullName;
+ status = U_ZERO_ERROR;
+ uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
+ } else {
+ U_ASSERT(newLength <= bufferLength);
+ }
+ if (U_SUCCESS(status) && baseName == fullName) {
+ // May have added the first keyword, meaning that the fullName is no longer also the baseName.
+ initBaseName(status);
+ }
+}
+
+void
+Locale::setKeywordValue(StringPiece keywordName,
+ StringPiece keywordValue,
+ UErrorCode& status) {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ const CharString keywordValue_nul(keywordValue, status);
+ setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
+}
+
+void
+Locale::setUnicodeKeywordValue(StringPiece keywordName,
+ StringPiece keywordValue,
+ UErrorCode& status) {
+ // TODO: Remove the need for a const char* to a NUL terminated buffer.
+ const CharString keywordName_nul(keywordName, status);
+ const CharString keywordValue_nul(keywordValue, status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
+
+ if (legacy_key == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ const char* legacy_value = nullptr;
+
+ if (!keywordValue_nul.isEmpty()) {
+ legacy_value =
+ uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
+
+ if (legacy_value == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ }
+
+ setKeywordValue(legacy_key, legacy_value, status);
+}
+
+const char *
+Locale::getBaseName() const {
+ return baseName;
+}
+
+Locale::Iterator::~Iterator() = default;
+
+//eof
+U_NAMESPACE_END
diff --git a/thirdparty/icu4c/common/loclikely.cpp b/thirdparty/icu4c/common/loclikely.cpp
new file mode 100644
index 0000000000..94a60aba3e
--- /dev/null
+++ b/thirdparty/icu4c/common/loclikely.cpp
@@ -0,0 +1,1410 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: loclikely.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010feb25
+* created by: Markus W. Scherer
+*
+* Code for likely and minimized locale subtags, separated out from other .cpp files
+* that then do not depend on resource bundle code and likely-subtags data.
+*/
+
+#include "unicode/bytestream.h"
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/uloc.h"
+#include "unicode/ures.h"
+#include "unicode/uscript.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "ustr_imp.h"
+
+/**
+ * These are the canonical strings for unknown languages, scripts and regions.
+ **/
+static const char* const unknownLanguage = "und";
+static const char* const unknownScript = "Zzzz";
+static const char* const unknownRegion = "ZZ";
+
+/**
+ * This function looks for the localeID in the likelySubtags resource.
+ *
+ * @param localeID The tag to find.
+ * @param buffer A buffer to hold the matching entry
+ * @param bufferLength The length of the output buffer
+ * @return A pointer to "buffer" if found, or a null pointer if not.
+ */
+static const char* U_CALLCONV
+findLikelySubtags(const char* localeID,
+ char* buffer,
+ int32_t bufferLength,
+ UErrorCode* err) {
+ const char* result = NULL;
+
+ if (!U_FAILURE(*err)) {
+ int32_t resLen = 0;
+ const UChar* s = NULL;
+ UErrorCode tmpErr = U_ZERO_ERROR;
+ icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
+ if (U_SUCCESS(tmpErr)) {
+ icu::CharString und;
+ if (localeID != NULL) {
+ if (*localeID == '\0') {
+ localeID = unknownLanguage;
+ } else if (*localeID == '_') {
+ und.append(unknownLanguage, *err);
+ und.append(localeID, *err);
+ if (U_FAILURE(*err)) {
+ return NULL;
+ }
+ localeID = und.data();
+ }
+ }
+ s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
+
+ if (U_FAILURE(tmpErr)) {
+ /*
+ * If a resource is missing, it's not really an error, it's
+ * just that we don't have any data for that particular locale ID.
+ */
+ if (tmpErr != U_MISSING_RESOURCE_ERROR) {
+ *err = tmpErr;
+ }
+ }
+ else if (resLen >= bufferLength) {
+ /* The buffer should never overflow. */
+ *err = U_INTERNAL_PROGRAM_ERROR;
+ }
+ else {
+ u_UCharsToChars(s, buffer, resLen + 1);
+ if (resLen >= 3 &&
+ uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
+ (resLen == 3 || buffer[3] == '_')) {
+ uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
+ }
+ result = buffer;
+ }
+ } else {
+ *err = tmpErr;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Append a tag to a buffer, adding the separator if necessary. The buffer
+ * must be large enough to contain the resulting tag plus any separator
+ * necessary. The tag must not be a zero-length string.
+ *
+ * @param tag The tag to add.
+ * @param tagLength The length of the tag.
+ * @param buffer The output buffer.
+ * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
+ **/
+static void U_CALLCONV
+appendTag(
+ const char* tag,
+ int32_t tagLength,
+ char* buffer,
+ int32_t* bufferLength,
+ UBool withSeparator) {
+
+ if (withSeparator) {
+ buffer[*bufferLength] = '_';
+ ++(*bufferLength);
+ }
+
+ uprv_memmove(
+ &buffer[*bufferLength],
+ tag,
+ tagLength);
+
+ *bufferLength += tagLength;
+}
+
+/**
+ * Create a tag string from the supplied parameters. The lang, script and region
+ * parameters may be NULL pointers. If they are, their corresponding length parameters
+ * must be less than or equal to 0.
+ *
+ * If any of the language, script or region parameters are empty, and the alternateTags
+ * parameter is not NULL, it will be parsed for potential language, script and region tags
+ * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
+ * it contains no language tag, the default tag for the unknown language is used.
+ *
+ * If the length of the new string exceeds the capacity of the output buffer,
+ * the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
+ * the tag string fits in the output buffer, but the null terminator doesn't.
+ *
+ * @param lang The language tag to use.
+ * @param langLength The length of the language tag.
+ * @param script The script tag to use.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag to use.
+ * @param regionLength The length of the region tag.
+ * @param trailing Any trailing data to append to the new tag.
+ * @param trailingLength The length of the trailing data.
+ * @param alternateTags A string containing any alternate tags.
+ * @param sink The output sink receiving the tag string.
+ * @param err A pointer to a UErrorCode for error reporting.
+ **/
+static void U_CALLCONV
+createTagStringWithAlternates(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* trailing,
+ int32_t trailingLength,
+ const char* alternateTags,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+
+ if (U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (langLength >= ULOC_LANG_CAPACITY ||
+ scriptLength >= ULOC_SCRIPT_CAPACITY ||
+ regionLength >= ULOC_COUNTRY_CAPACITY) {
+ goto error;
+ }
+ else {
+ /**
+ * ULOC_FULLNAME_CAPACITY will provide enough capacity
+ * that we can build a string that contains the language,
+ * script and region code without worrying about overrunning
+ * the user-supplied buffer.
+ **/
+ char tagBuffer[ULOC_FULLNAME_CAPACITY];
+ int32_t tagLength = 0;
+ UBool regionAppended = FALSE;
+
+ if (langLength > 0) {
+ appendTag(
+ lang,
+ langLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/FALSE);
+ }
+ else if (alternateTags == NULL) {
+ /*
+ * Use the empty string for an unknown language, if
+ * we found no language.
+ */
+ }
+ else {
+ /*
+ * Parse the alternateTags string for the language.
+ */
+ char alternateLang[ULOC_LANG_CAPACITY];
+ int32_t alternateLangLength = sizeof(alternateLang);
+
+ alternateLangLength =
+ uloc_getLanguage(
+ alternateTags,
+ alternateLang,
+ alternateLangLength,
+ err);
+ if(U_FAILURE(*err) ||
+ alternateLangLength >= ULOC_LANG_CAPACITY) {
+ goto error;
+ }
+ else if (alternateLangLength == 0) {
+ /*
+ * Use the empty string for an unknown language, if
+ * we found no language.
+ */
+ }
+ else {
+ appendTag(
+ alternateLang,
+ alternateLangLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/FALSE);
+ }
+ }
+
+ if (scriptLength > 0) {
+ appendTag(
+ script,
+ scriptLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+ }
+ else if (alternateTags != NULL) {
+ /*
+ * Parse the alternateTags string for the script.
+ */
+ char alternateScript[ULOC_SCRIPT_CAPACITY];
+
+ const int32_t alternateScriptLength =
+ uloc_getScript(
+ alternateTags,
+ alternateScript,
+ sizeof(alternateScript),
+ err);
+
+ if (U_FAILURE(*err) ||
+ alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
+ goto error;
+ }
+ else if (alternateScriptLength > 0) {
+ appendTag(
+ alternateScript,
+ alternateScriptLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+ }
+ }
+
+ if (regionLength > 0) {
+ appendTag(
+ region,
+ regionLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+
+ regionAppended = TRUE;
+ }
+ else if (alternateTags != NULL) {
+ /*
+ * Parse the alternateTags string for the region.
+ */
+ char alternateRegion[ULOC_COUNTRY_CAPACITY];
+
+ const int32_t alternateRegionLength =
+ uloc_getCountry(
+ alternateTags,
+ alternateRegion,
+ sizeof(alternateRegion),
+ err);
+ if (U_FAILURE(*err) ||
+ alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
+ goto error;
+ }
+ else if (alternateRegionLength > 0) {
+ appendTag(
+ alternateRegion,
+ alternateRegionLength,
+ tagBuffer,
+ &tagLength,
+ /*withSeparator=*/TRUE);
+
+ regionAppended = TRUE;
+ }
+ }
+
+ /**
+ * Copy the partial tag from our internal buffer to the supplied
+ * target.
+ **/
+ sink.Append(tagBuffer, tagLength);
+
+ if (trailingLength > 0) {
+ if (*trailing != '@') {
+ sink.Append("_", 1);
+ if (!regionAppended) {
+ /* extra separator is required */
+ sink.Append("_", 1);
+ }
+ }
+
+ /*
+ * Copy the trailing data into the supplied buffer.
+ */
+ sink.Append(trailing, trailingLength);
+ }
+
+ return;
+ }
+
+error:
+
+ /**
+ * An overflow indicates the locale ID passed in
+ * is ill-formed. If we got here, and there was
+ * no previous error, it's an implicit overflow.
+ **/
+ if (*err == U_BUFFER_OVERFLOW_ERROR ||
+ U_SUCCESS(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+/**
+ * Create a tag string from the supplied parameters. The lang, script and region
+ * parameters may be NULL pointers. If they are, their corresponding length parameters
+ * must be less than or equal to 0. If the lang parameter is an empty string, the
+ * default value for an unknown language is written to the output buffer.
+ *
+ * If the length of the new string exceeds the capacity of the output buffer,
+ * the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param lang The language tag to use.
+ * @param langLength The length of the language tag.
+ * @param script The script tag to use.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag to use.
+ * @param regionLength The length of the region tag.
+ * @param trailing Any trailing data to append to the new tag.
+ * @param trailingLength The length of the trailing data.
+ * @param sink The output sink receiving the tag string.
+ * @param err A pointer to a UErrorCode for error reporting.
+ **/
+static void U_CALLCONV
+createTagString(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* trailing,
+ int32_t trailingLength,
+ icu::ByteSink& sink,
+ UErrorCode* err)
+{
+ createTagStringWithAlternates(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ NULL,
+ sink,
+ err);
+}
+
+/**
+ * Parse the language, script, and region subtags from a tag string, and copy the
+ * results into the corresponding output parameters. The buffers are null-terminated,
+ * unless overflow occurs.
+ *
+ * The langLength, scriptLength, and regionLength parameters are input/output
+ * parameters, and must contain the capacity of their corresponding buffers on
+ * input. On output, they will contain the actual length of the buffers, not
+ * including the null terminator.
+ *
+ * If the length of any of the output subtags exceeds the capacity of the corresponding
+ * buffer, the function copies as many bytes to the output buffer as it can, and returns
+ * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
+ * occurs.
+ *
+ * If an illegal argument is provided, the function returns the error
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param localeID The locale ID to parse.
+ * @param lang The language tag buffer.
+ * @param langLength The length of the language tag.
+ * @param script The script tag buffer.
+ * @param scriptLength The length of the script tag.
+ * @param region The region tag buffer.
+ * @param regionLength The length of the region tag.
+ * @param err A pointer to a UErrorCode for error reporting.
+ * @return The number of chars of the localeID parameter consumed.
+ **/
+static int32_t U_CALLCONV
+parseTagString(
+ const char* localeID,
+ char* lang,
+ int32_t* langLength,
+ char* script,
+ int32_t* scriptLength,
+ char* region,
+ int32_t* regionLength,
+ UErrorCode* err)
+{
+ const char* position = localeID;
+ int32_t subtagLength = 0;
+
+ if(U_FAILURE(*err) ||
+ localeID == NULL ||
+ lang == NULL ||
+ langLength == NULL ||
+ script == NULL ||
+ scriptLength == NULL ||
+ region == NULL ||
+ regionLength == NULL) {
+ goto error;
+ }
+
+ subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
+
+ /*
+ * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
+ * to be an error, because it indicates the user-supplied tag is
+ * not well-formed.
+ */
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *langLength = subtagLength;
+
+ /*
+ * If no language was present, use the empty string instead.
+ * Otherwise, move past any separator.
+ */
+ if (_isIDSeparator(*position)) {
+ ++position;
+ }
+
+ subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *scriptLength = subtagLength;
+
+ if (*scriptLength > 0) {
+ if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
+ /**
+ * If the script part is the "unknown" script, then don't return it.
+ **/
+ *scriptLength = 0;
+ }
+
+ /*
+ * Move past any separator.
+ */
+ if (_isIDSeparator(*position)) {
+ ++position;
+ }
+ }
+
+ subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ *regionLength = subtagLength;
+
+ if (*regionLength > 0) {
+ if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
+ /**
+ * If the region part is the "unknown" region, then don't return it.
+ **/
+ *regionLength = 0;
+ }
+ } else if (*position != 0 && *position != '@') {
+ /* back up over consumed trailing separator */
+ --position;
+ }
+
+exit:
+
+ return (int32_t)(position - localeID);
+
+error:
+
+ /**
+ * If we get here, we have no explicit error, it's the result of an
+ * illegal argument.
+ **/
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto exit;
+}
+
+static UBool U_CALLCONV
+createLikelySubtagsString(
+ const char* lang,
+ int32_t langLength,
+ const char* script,
+ int32_t scriptLength,
+ const char* region,
+ int32_t regionLength,
+ const char* variants,
+ int32_t variantsLength,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ /**
+ * ULOC_FULLNAME_CAPACITY will provide enough capacity
+ * that we can build a string that contains the language,
+ * script and region code without worrying about overrunning
+ * the user-supplied buffer.
+ **/
+ char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ /**
+ * Try the language with the script and region first.
+ **/
+ if (scriptLength > 0 && regionLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Try the language with just the script.
+ **/
+ if (scriptLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Try the language with just the region.
+ **/
+ if (regionLength > 0) {
+
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ /**
+ * Finally, try just the language.
+ **/
+ {
+ const char* likelySubtags = NULL;
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink sink(&tagBuffer);
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ sink,
+ err);
+ }
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ likelySubtags =
+ findLikelySubtags(
+ tagBuffer.data(),
+ likelySubtagsBuffer,
+ sizeof(likelySubtagsBuffer),
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (likelySubtags != NULL) {
+ /* Always use the language tag from the
+ maximal string, since it may be more
+ specific than the one provided. */
+ createTagStringWithAlternates(
+ NULL,
+ 0,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ variants,
+ variantsLength,
+ likelySubtags,
+ sink,
+ err);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ return FALSE;
+}
+
+#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t count = 0; \
+ int32_t i; \
+ for (i = 0; i < trailingLength; i++) { \
+ if (trailing[i] == '-' || trailing[i] == '_') { \
+ count = 0; \
+ if (count > 8) { \
+ goto error; \
+ } \
+ } else if (trailing[i] == '@') { \
+ break; \
+ } else if (count > 8) { \
+ goto error; \
+ } else { \
+ count++; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+static UBool
+_uloc_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ char lang[ULOC_LANG_CAPACITY];
+ int32_t langLength = sizeof(lang);
+ char script[ULOC_SCRIPT_CAPACITY];
+ int32_t scriptLength = sizeof(script);
+ char region[ULOC_COUNTRY_CAPACITY];
+ int32_t regionLength = sizeof(region);
+ const char* trailing = "";
+ int32_t trailingLength = 0;
+ int32_t trailingIndex = 0;
+ UBool success = FALSE;
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ if (localeID == NULL) {
+ goto error;
+ }
+
+ trailingIndex = parseTagString(
+ localeID,
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ ®ionLength,
+ err);
+ if(U_FAILURE(*err)) {
+ /* Overflow indicates an illegal argument error */
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto error;
+ }
+
+ /* Find the length of the trailing portion. */
+ while (_isIDSeparator(localeID[trailingIndex])) {
+ trailingIndex++;
+ }
+ trailing = &localeID[trailingIndex];
+ trailingLength = (int32_t)uprv_strlen(trailing);
+
+ CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
+
+ success =
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+
+ if (!success) {
+ const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
+
+ /*
+ * If we get here, we need to return localeID.
+ */
+ sink.Append(localeID, localIDLength);
+ }
+
+ return success;
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return FALSE;
+}
+
+// Add likely subtags to the sink
+// return true if the value in the sink is produced by a match during the lookup
+// return false if the value in the sink is the same as input because there are
+// no match after the lookup.
+static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
+
+static void
+_uloc_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* err) {
+ icu::CharString maximizedTagBuffer;
+
+ char lang[ULOC_LANG_CAPACITY];
+ int32_t langLength = sizeof(lang);
+ char script[ULOC_SCRIPT_CAPACITY];
+ int32_t scriptLength = sizeof(script);
+ char region[ULOC_COUNTRY_CAPACITY];
+ int32_t regionLength = sizeof(region);
+ const char* trailing = "";
+ int32_t trailingLength = 0;
+ int32_t trailingIndex = 0;
+ UBool successGetMax = FALSE;
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (localeID == NULL) {
+ goto error;
+ }
+
+ trailingIndex =
+ parseTagString(
+ localeID,
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ ®ionLength,
+ err);
+ if(U_FAILURE(*err)) {
+
+ /* Overflow indicates an illegal argument error */
+ if (*err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ goto error;
+ }
+
+ /* Find the spot where the variants or the keywords begin, if any. */
+ while (_isIDSeparator(localeID[trailingIndex])) {
+ trailingIndex++;
+ }
+ trailing = &localeID[trailingIndex];
+ trailingLength = (int32_t)uprv_strlen(trailing);
+
+ CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
+
+ {
+ icu::CharString base;
+ {
+ icu::CharStringByteSink baseSink(&base);
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ baseSink,
+ err);
+ }
+
+ /**
+ * First, we need to first get the maximization
+ * from AddLikelySubtags.
+ **/
+ {
+ icu::CharStringByteSink maxSink(&maximizedTagBuffer);
+ successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
+ }
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ if (!successGetMax) {
+ /**
+ * If we got here, return the locale ID parameter unchanged.
+ **/
+ const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
+ sink.Append(localeID, localeIDLength);
+ return;
+ }
+
+ // In the following, the lang, script, region are referring to those in
+ // the maximizedTagBuffer, not the one in the localeID.
+ langLength = sizeof(lang);
+ scriptLength = sizeof(script);
+ regionLength = sizeof(region);
+ parseTagString(
+ maximizedTagBuffer.data(),
+ lang,
+ &langLength,
+ script,
+ &scriptLength,
+ region,
+ ®ionLength,
+ err);
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+
+ /**
+ * Start first with just the language.
+ **/
+ {
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ /**
+ * Next, try the language and region.
+ **/
+ if (regionLength > 0) {
+
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ NULL,
+ 0,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ /**
+ * Finally, try the language and script. This is our last chance,
+ * since trying with all three subtags would only yield the
+ * maximal version that we already have.
+ **/
+ if (scriptLength > 0) {
+ icu::CharString tagBuffer;
+ {
+ icu::CharStringByteSink tagSink(&tagBuffer);
+ createLikelySubtagsString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ tagSink,
+ err);
+ }
+
+ if(U_FAILURE(*err)) {
+ goto error;
+ }
+ else if (!tagBuffer.isEmpty() &&
+ uprv_strnicmp(
+ maximizedTagBuffer.data(),
+ tagBuffer.data(),
+ tagBuffer.length()) == 0) {
+
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ NULL,
+ 0,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+ }
+
+ {
+ /**
+ * If we got here, return the max + trail.
+ **/
+ createTagString(
+ lang,
+ langLength,
+ script,
+ scriptLength,
+ region,
+ regionLength,
+ trailing,
+ trailingLength,
+ sink,
+ err);
+ return;
+ }
+
+error:
+
+ if (!U_FAILURE(*err)) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+static UBool
+do_canonicalize(const char* localeID,
+ char* buffer,
+ int32_t bufferCapacity,
+ UErrorCode* err)
+{
+ uloc_canonicalize(
+ localeID,
+ buffer,
+ bufferCapacity,
+ err);
+
+ if (*err == U_STRING_NOT_TERMINATED_WARNING ||
+ *err == U_BUFFER_OVERFLOW_ERROR) {
+ *err = U_ILLEGAL_ARGUMENT_ERROR;
+
+ return FALSE;
+ }
+ else if (U_FAILURE(*err)) {
+
+ return FALSE;
+ }
+ else {
+ return TRUE;
+ }
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(
+ maximizedLocaleID, maximizedLocaleIDCapacity);
+
+ ulocimp_addLikelySubtags(localeID, sink, status);
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return sink.Overflowed() ? reslen : -1;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(
+ maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+static UBool
+_ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+ return _uloc_addLikelySubtags(localeBuffer, sink, status);
+ }
+ return FALSE;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_addLikelySubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ _ulocimp_addLikelySubtags(localeID, sink, status);
+}
+
+U_CAPI int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ icu::CheckedArrayByteSink sink(
+ minimizedLocaleID, minimizedLocaleIDCapacity);
+
+ ulocimp_minimizeSubtags(localeID, sink, status);
+ int32_t reslen = sink.NumberOfBytesAppended();
+
+ if (U_FAILURE(*status)) {
+ return sink.Overflowed() ? reslen : -1;
+ }
+
+ if (sink.Overflowed()) {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ } else {
+ u_terminateChars(
+ minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
+ }
+
+ return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_minimizeSubtags(const char* localeID,
+ icu::ByteSink& sink,
+ UErrorCode* status) {
+ char localeBuffer[ULOC_FULLNAME_CAPACITY];
+
+ if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+ _uloc_minimizeSubtags(localeBuffer, sink, status);
+ }
+}
+
+// Pairs of (language subtag, + or -) for finding out fast if common languages
+// are LTR (minus) or RTL (plus).
+static const char LANG_DIR_STRING[] =
+ "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
+
+// Implemented here because this calls ulocimp_addLikelySubtags().
+U_CAPI UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ char script[8];
+ int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
+ scriptLength == 0) {
+ // Fastpath: We know the likely scripts and their writing direction
+ // for some common languages.
+ errorCode = U_ZERO_ERROR;
+ char lang[8];
+ int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return FALSE;
+ }
+ if (langLength > 0) {
+ const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
+ if (langPtr != NULL) {
+ switch (langPtr[langLength]) {
+ case '-': return FALSE;
+ case '+': return TRUE;
+ default: break; // partial match of a longer code
+ }
+ }
+ }
+ // Otherwise, find the likely script.
+ errorCode = U_ZERO_ERROR;
+ icu::CharString likely;
+ {
+ icu::CharStringByteSink sink(&likely);
+ ulocimp_addLikelySubtags(locale, sink, &errorCode);
+ }
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+ return FALSE;
+ }
+ scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
+ if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
+ scriptLength == 0) {
+ return FALSE;
+ }
+ }
+ UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
+ return uscript_isRightToLeft(scriptCode);
+}
+
+U_NAMESPACE_BEGIN
+
+UBool
+Locale::isRightToLeft() const {
+ return uloc_isRightToLeft(getBaseName());
+}
+
+U_NAMESPACE_END
+
+// The following must at least allow for rg key value (6) plus terminator (1).
+#define ULOC_RG_BUFLEN 8
+
+U_CAPI int32_t U_EXPORT2
+ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
+ char *region, int32_t regionCapacity, UErrorCode* status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ char rgBuf[ULOC_RG_BUFLEN];
+ UErrorCode rgStatus = U_ZERO_ERROR;
+
+ // First check for rg keyword value
+ int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
+ if (U_FAILURE(rgStatus) || rgLen != 6) {
+ rgLen = 0;
+ } else {
+ // rgBuf guaranteed to be zero terminated here, with text len 6
+ char *rgPtr = rgBuf;
+ for (; *rgPtr!= 0; rgPtr++) {
+ *rgPtr = uprv_toupper(*rgPtr);
+ }
+ rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
+ }
+
+ if (rgLen == 0) {
+ // No valid rg keyword value, try for unicode_region_subtag
+ rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
+ if (U_FAILURE(*status)) {
+ rgLen = 0;
+ } else if (rgLen == 0 && inferRegion) {
+ // no unicode_region_subtag but inferRegion TRUE, try likely subtags
+ rgStatus = U_ZERO_ERROR;
+ icu::CharString locBuf;
+ {
+ icu::CharStringByteSink sink(&locBuf);
+ ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
+ }
+ if (U_SUCCESS(rgStatus)) {
+ rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
+ if (U_FAILURE(*status)) {
+ rgLen = 0;
+ }
+ }
+ }
+ }
+
+ rgBuf[rgLen] = 0;
+ uprv_strncpy(region, rgBuf, regionCapacity);
+ return u_terminateChars(region, regionCapacity, rgLen, status);
+}
+
diff --git a/thirdparty/icu4c/common/loclikelysubtags.cpp b/thirdparty/icu4c/common/loclikelysubtags.cpp
new file mode 100644
index 0000000000..a031bfa587
--- /dev/null
+++ b/thirdparty/icu4c/common/loclikelysubtags.cpp
@@ -0,0 +1,682 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// loclikelysubtags.cpp
+// created: 2019may08 Markus W. Scherer
+
+#include
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/localpointer.h"
+#include "unicode/locid.h"
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "loclikelysubtags.h"
+#include "lsr.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "umutex.h"
+#include "uniquecharstr.h"
+#include "uresdata.h"
+#include "uresimp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT
+constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI
+constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK
+
+} // namespace
+
+LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) :
+ distanceTrieBytes(data.distanceTrieBytes),
+ regionToPartitions(data.regionToPartitions),
+ partitions(data.partitions),
+ paradigms(data.paradigms), paradigmsLength(data.paradigmsLength),
+ distances(data.distances) {
+ data.partitions = nullptr;
+ data.paradigms = nullptr;
+}
+
+LocaleDistanceData::~LocaleDistanceData() {
+ uprv_free(partitions);
+ delete[] paradigms;
+}
+
+// TODO(ICU-20777): Rename to just LikelySubtagsData.
+struct XLikelySubtagsData {
+ UResourceBundle *langInfoBundle = nullptr;
+ UniqueCharStrings strings;
+ CharStringMap languageAliases;
+ CharStringMap regionAliases;
+ const uint8_t *trieBytes = nullptr;
+ LSR *lsrs = nullptr;
+ int32_t lsrsLength = 0;
+
+ LocaleDistanceData distanceData;
+
+ XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+
+ ~XLikelySubtagsData() {
+ ures_close(langInfoBundle);
+ delete[] lsrs;
+ }
+
+ void load(UErrorCode &errorCode) {
+ langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ StackUResourceBundle stackTempBundle;
+ ResourceDataValue value;
+ ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(),
+ value, errorCode);
+ ResourceTable likelyTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Read all strings in the resource bundle and convert them to invariant char *.
+ LocalMemory languageIndexes, regionIndexes, lsrSubtagIndexes;
+ int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
+ if (!readStrings(likelyTable, "languageAliases", value,
+ languageIndexes, languagesLength, errorCode) ||
+ !readStrings(likelyTable, "regionAliases", value,
+ regionIndexes, regionsLength, errorCode) ||
+ !readStrings(likelyTable, "lsrs", value,
+ lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((languagesLength & 1) != 0 ||
+ (regionsLength & 1) != 0 ||
+ (lsrSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ if (lsrSubtagsLength == 0) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+
+ if (!likelyTable.findValue("trie", value)) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
+ int32_t length;
+ trieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ // Also read distance/matcher data if available,
+ // to open & keep only one resource bundle pointer
+ // and to use one single UniqueCharStrings.
+ UErrorCode matchErrorCode = U_ZERO_ERROR;
+ ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(),
+ value, matchErrorCode);
+ LocalMemory partitionIndexes, paradigmSubtagIndexes;
+ int32_t partitionsLength = 0, paradigmSubtagsLength = 0;
+ if (U_SUCCESS(matchErrorCode)) {
+ ResourceTable matchTable = value.getTable(errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+
+ if (matchTable.findValue("trie", value)) {
+ distanceData.distanceTrieBytes = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ }
+
+ if (matchTable.findValue("regionToPartitions", value)) {
+ distanceData.regionToPartitions = value.getBinary(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < LSR::REGION_INDEX_LIMIT) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+
+ if (!readStrings(matchTable, "partitions", value,
+ partitionIndexes, partitionsLength, errorCode) ||
+ !readStrings(matchTable, "paradigms", value,
+ paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
+ return;
+ }
+ if ((paradigmSubtagsLength % 3) != 0) {
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+
+ if (matchTable.findValue("distances", value)) {
+ distanceData.distances = value.getIntVector(length, errorCode);
+ if (U_FAILURE(errorCode)) { return; }
+ if (length < 4) { // LocaleDistance IX_LIMIT
+ errorCode = U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+ } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) {
+ // ok for likely subtags
+ } else { // error other than missing resource
+ errorCode = matchErrorCode;
+ return;
+ }
+
+ // Fetch & store invariant-character versions of strings
+ // only after we have collected and de-duplicated all of them.
+ strings.freeze();
+
+ languageAliases = CharStringMap(languagesLength / 2, errorCode);
+ for (int32_t i = 0; i < languagesLength; i += 2) {
+ languageAliases.put(strings.get(languageIndexes[i]),
+ strings.get(languageIndexes[i + 1]), errorCode);
+ }
+
+ regionAliases = CharStringMap(regionsLength / 2, errorCode);
+ for (int32_t i = 0; i < regionsLength; i += 2) {
+ regionAliases.put(strings.get(regionIndexes[i]),
+ strings.get(regionIndexes[i + 1]), errorCode);
+ }
+ if (U_FAILURE(errorCode)) { return; }
+
+ lsrsLength = lsrSubtagsLength / 3;
+ lsrs = new LSR[lsrsLength];
+ if (lsrs == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
+ lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
+ strings.get(lsrSubtagIndexes[i + 1]),
+ strings.get(lsrSubtagIndexes[i + 2]),
+ LSR::IMPLICIT_LSR);
+ }
+
+ if (partitionsLength > 0) {
+ distanceData.partitions = static_cast(
+ uprv_malloc(partitionsLength * sizeof(const char *)));
+ if (distanceData.partitions == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0; i < partitionsLength; ++i) {
+ distanceData.partitions[i] = strings.get(partitionIndexes[i]);
+ }
+ }
+
+ if (paradigmSubtagsLength > 0) {
+ distanceData.paradigmsLength = paradigmSubtagsLength / 3;
+ LSR *paradigms = new LSR[distanceData.paradigmsLength];
+ if (paradigms == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
+ paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
+ strings.get(paradigmSubtagIndexes[i + 1]),
+ strings.get(paradigmSubtagIndexes[i + 2]),
+ LSR::DONT_CARE_FLAGS);
+ }
+ distanceData.paradigms = paradigms;
+ }
+ }
+
+private:
+ bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
+ LocalMemory