summaryrefslogtreecommitdiff
path: root/thirdparty/icu4c/common/locid.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/icu4c/common/locid.cpp')
-rw-r--r--thirdparty/icu4c/common/locid.cpp277
1 files changed, 253 insertions, 24 deletions
diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp
index 2804e36bf6..0d506293a9 100644
--- a/thirdparty/icu4c/common/locid.cpp
+++ b/thirdparty/icu4c/common/locid.cpp
@@ -35,6 +35,7 @@
#include "unicode/bytestream.h"
#include "unicode/locid.h"
+#include "unicode/localebuilder.h"
#include "unicode/strenum.h"
#include "unicode/stringpiece.h"
#include "unicode/uloc.h"
@@ -253,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
Locale::~Locale()
{
- if (baseName != fullName) {
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@@ -465,7 +466,7 @@ Locale& Locale::operator=(const Locale& other) {
}
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
- if (baseName != fullName) uprv_free(baseName);
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer) {
@@ -523,7 +524,7 @@ static const char* const KNOWN_CANONICALIZED[] = {
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
- "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
+ "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
@@ -626,6 +627,17 @@ private:
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
+
+ // Read the subdivisionAlias data from alias to
+ // strings+types+replacementIndexes
+ // Allocate length items for types, to store the type field.
+ // Allocate length items for replacementIndexes,
+ // to store the index in the strings for the replacement variant.
+ void readSubdivisionAlias(UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length, UErrorCode &status);
};
/**
@@ -646,6 +658,7 @@ public:
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
+ const CharStringMap& subdivisionMap() const { return subdivision; }
static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
@@ -657,11 +670,13 @@ private:
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
+ CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
+ subdivision(std::move(subdivisionMap)),
strings(strings) {
}
@@ -675,6 +690,7 @@ private:
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
+ CharStringMap subdivision;
CharString* strings;
friend class AliasDataBuilder;
@@ -866,6 +882,34 @@ AliasDataBuilder::readVariantAlias(
}
/**
+ * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
+ * Allocate length items for types, to store the type field. Allocate length
+ * items for replacementIndexes, to store the index in the strings for the
+ * replacement regions.
+ */
+void
+AliasDataBuilder::readSubdivisionAlias(
+ UResourceBundle* alias,
+ UniqueCharStrings* strings,
+ LocalMemory<const char*>& types,
+ LocalMemory<int32_t>& replacementIndexes,
+ int32_t &length,
+ UErrorCode &status)
+{
+ return readAlias(
+ alias, strings, types, replacementIndexes, length,
+#if U_DEBUG
+ [](const char* type) {
+ U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
+ },
+#else
+ [](const char*) {},
+#endif
+ [](const UnicodeString&) { },
+ status);
+}
+
+/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
*
@@ -904,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
+ LocalUResourceBundlePointer subdivisionAlias(
+ ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
- variantLength = 0;
+ variantLength = 0, subdivisionLength = 0;
// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
@@ -954,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);
+ // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
+ // and strings
+ LocalMemory<const char*> subdivisionTypes;
+ LocalMemory<int32_t> subdivisionReplacementIndexes;
+ readSubdivisionAlias(subdivisionAlias.getAlias(),
+ &strings,
+ subdivisionTypes,
+ subdivisionReplacementIndexes,
+ subdivisionLength, status);
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -993,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}
+ // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
+ CharStringMap subdivisionMap(2, status);
+ for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
+ subdivisionMap.put(subdivisionTypes[i],
+ strings.get(subdivisionReplacementIndexes[i]),
+ status);
+ }
+
if (U_FAILURE(status)) {
return nullptr;
}
@@ -1003,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
+ std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
@@ -1028,7 +1093,7 @@ public:
// place the the replaced locale ID in out and return true.
// Otherwise return false for no replacement or error.
bool replace(
- const Locale& locale, CharString& out, UErrorCode status);
+ const Locale& locale, CharString& out, UErrorCode& status);
private:
const char* language;
@@ -1104,6 +1169,14 @@ private:
// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);
+
+ // Replace by using subdivisionAlias.
+ bool replaceSubdivision(StringPiece subdivision,
+ CharString& output, UErrorCode& status);
+
+ // Replace transformed extensions.
+ bool replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status);
};
CharString&
@@ -1293,7 +1366,6 @@ AliasReplacer::replaceLanguage(
}
}
if (replacedExtensions != nullptr) {
- // TODO(ICU-21292)
// DO NOTHING
// UTS35 does not specifiy what should we do if we have extensions in the
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
@@ -1336,10 +1408,13 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
// Cannot use nullptr for language because that will construct
// the default locale, in that case, use "und" to get the correct
// locale.
- Locale l(language == nullptr ? "und" : language, nullptr, script);
+ Locale l = LocaleBuilder()
+ .setLanguage(language == nullptr ? "und" : language)
+ .setScript(script)
+ .build(status);
l.addLikelySubtags(status);
const char* likelyRegion = l.getCountry();
- CharString* item = nullptr;
+ LocalPointer<CharString> item;
if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
size_t len = uprv_strlen(likelyRegion);
const char* foundInReplacement = uprv_strstr(replacement,
@@ -1351,20 +1426,22 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
*(foundInReplacement-1) == ' ');
U_ASSERT(foundInReplacement[len] == ' ' ||
foundInReplacement[len] == '\0');
- item = new CharString(foundInReplacement, (int32_t)len, status);
+ item.adoptInsteadAndCheckErrorCode(
+ new CharString(foundInReplacement, (int32_t)len, status), status);
}
}
- if (item == nullptr) {
- item = new CharString(replacement,
- (int32_t)(firstSpace - replacement), status);
+ if (item.isNull() && U_SUCCESS(status)) {
+ item.adoptInsteadAndCheckErrorCode(
+ new CharString(replacement,
+ (int32_t)(firstSpace - replacement), status), status);
}
if (U_FAILURE(status)) { return false; }
- if (item == nullptr) {
+ if (item.isNull()) {
status = U_MEMORY_ALLOCATION_ERROR;
return false;
}
replacedRegion = item->data();
- toBeFreed.addElement(item, status);
+ toBeFreed.addElement(item.orphan(), status);
}
U_ASSERT(!same(region, replacedRegion));
region = replacedRegion;
@@ -1429,6 +1506,106 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}
+bool
+AliasReplacer::replaceSubdivision(
+ StringPiece subdivision, CharString& output, UErrorCode& status)
+{
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ const char *replacement = data->subdivisionMap().get(subdivision.data());
+ if (replacement != nullptr) {
+ const char* firstSpace = uprv_strchr(replacement, ' ');
+ // Found replacement data for this subdivision.
+ size_t len = (firstSpace != nullptr) ?
+ (firstSpace - replacement) : uprv_strlen(replacement);
+ if (2 <= len && len <= 8) {
+ output.append(replacement, (int32_t)len, status);
+ if (2 == len) {
+ // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
+ output.append("zzzz", 4, status);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+bool
+AliasReplacer::replaceTransformedExtensions(
+ CharString& transformedExtensions, CharString& output, UErrorCode& status)
+{
+ // The content of the transformedExtensions will be modified in this
+ // function to NULL-terminating (tkey-tvalue) pairs.
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ int32_t len = transformedExtensions.length();
+ const char* str = transformedExtensions.data();
+ const char* tkey = ultag_getTKeyStart(str);
+ int32_t tlangLen = (tkey == str) ? 0 :
+ ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
+ CharStringByteSink sink(&output);
+ if (tlangLen > 0) {
+ Locale tlang = LocaleBuilder()
+ .setLanguageTag(StringPiece(str, tlangLen))
+ .build(status);
+ tlang.canonicalize(status);
+ tlang.toLanguageTag(sink, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ T_CString_toLowerCase(output.data());
+ }
+ if (tkey != nullptr) {
+ // We need to sort the tfields by tkey
+ UVector tfields(status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ do {
+ const char* tvalue = uprv_strchr(tkey, '-');
+ if (tvalue == nullptr) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ const char* nextTKey = ultag_getTKeyStart(tvalue);
+ if (nextTKey != nullptr) {
+ *((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
+ }
+ tfields.insertElementAt((void*)tkey, tfields.size(), status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ tkey = nextTKey;
+ } while (tkey != nullptr);
+ tfields.sort([](UElement e1, UElement e2) -> int8_t {
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
+ (const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
+ }, status);
+ for (int32_t i = 0; i < tfields.size(); i++) {
+ if (output.length() > 0) {
+ output.append('-', status);
+ }
+ const char* tfield = (const char*) tfields.elementAt(i);
+ const char* tvalue = uprv_strchr(tfield, '-');
+ // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
+ U_ASSERT(tvalue != nullptr);
+ *((char*)tvalue++) = '\0'; // NULL terminate tkey
+ output.append(tfield, status).append('-', status);
+ const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
+ output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ return true;
+}
+
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
@@ -1447,13 +1624,17 @@ AliasReplacer::outputToString(
out.append(SEP_CHAR, status);
}
variants.sort([](UElement e1, UElement e2) -> int8_t {
- return uprv_strcmp(
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
int32_t variantsStart = out.length();
for (int32_t i = 0; i < variants.size(); i++) {
out.append(SEP_CHAR, status)
- .append((const char*)((UVector*)variants.elementAt(i)),
+ .append((const char*)(variants.elementAt(i)),
status);
}
T_CString_toUpperCase(out.data() + variantsStart);
@@ -1470,7 +1651,7 @@ AliasReplacer::outputToString(
}
bool
-AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
+AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
{
data = AliasData::singleton(status);
if (U_FAILURE(status)) {
@@ -1491,7 +1672,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
region = nullptr;
}
const char* variantsStr = locale.getVariant();
- const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
@@ -1510,8 +1690,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
// Sort the variants
variants.sort([](UElement e1, UElement e2) -> int8_t {
- return uprv_strcmp(
+ // uprv_strcmp return int and in some platform, such as arm64-v8a,
+ // it may return positive values > 127 which cause the casted value
+ // of int8_t negative.
+ int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
+ return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
// A changed count to assert when loop too many times.
@@ -1555,11 +1739,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
- if (changed == 0 && variants.size() <= 1) {
+ const char* extensionsStr = locale_getKeywordsStart(locale.getName());
+ if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
+ if (U_FAILURE(status)) {
+ return false;
+ }
if (extensionsStr != nullptr) {
+ changed = 0;
+ Locale temp(locale);
+ LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
+ if (U_SUCCESS(status) && !iter.isNull()) {
+ const char* key;
+ while ((key = iter->next(nullptr, status)) != nullptr) {
+ if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
+ uprv_strcmp("t", key) == 0) {
+ CharString value;
+ CharStringByteSink valueSink(&value);
+ locale.getKeywordValue(key, valueSink, status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ CharString replacement;
+ if (uprv_strlen(key) == 2) {
+ if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ } else {
+ U_ASSERT(uprv_strcmp(key, "t") == 0);
+ if (replaceTransformedExtensions(value, replacement, status)) {
+ changed++;
+ temp.setKeywordValue(key, replacement.data(), status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return false;
+ }
+ }
+ }
+ }
+ if (changed != 0) {
+ extensionsStr = locale_getKeywordsStart(temp.getName());
+ }
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
@@ -1567,8 +1792,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
}
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
- U_ASSERT(changed == 0);
- U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
@@ -1630,7 +1853,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
{
fIsBogus = FALSE;
/* Free our current storage */
- if (baseName != fullName) {
+ if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@@ -1666,6 +1889,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
+ U_ASSERT(baseName == nullptr);
/*Go to heap for the fullName if necessary*/
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
if(fullName == 0) {
@@ -1819,7 +2043,7 @@ Locale::hashCode() const
void
Locale::setToBogus() {
/* Free our current storage */
- if(baseName != fullName) {
+ if((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@@ -2453,9 +2677,13 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
if (U_FAILURE(status)) {
return;
}
+ if (status == U_STRING_NOT_TERMINATED_WARNING) {
+ status = U_ZERO_ERROR;
+ }
int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
bufferLength, &status) + 1;
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
/* Handle the case the current buffer is not enough to hold the new id */
if (status == U_BUFFER_OVERFLOW_ERROR) {
U_ASSERT(newLength > bufferLength);
@@ -2472,6 +2700,7 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
fullName = newFullName;
status = U_ZERO_ERROR;
uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
+ U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
} else {
U_ASSERT(newLength <= bufferLength);
}