diff options
Diffstat (limited to 'core/string')
| -rw-r--r-- | core/string/translation.cpp | 19 | ||||
| -rw-r--r-- | core/string/ustring.cpp | 62 |
2 files changed, 62 insertions, 19 deletions
diff --git a/core/string/translation.cpp b/core/string/translation.cpp index 0901944360..9cee218735 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -39,13 +39,14 @@ #include "main/main.h" #endif -// ISO 639-1 language codes, with the addition of glibc locales with their -// regional identifiers. This list must match the language names (in English) -// of locale_names. +// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes), +// with the addition of glibc locales with their regional identifiers. +// This list must match the language names (in English) of locale_names. // // References: // - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes // - https://lh.2xlibre.net/locales/ +// - https://iso639-3.sil.org/ static const char *locale_list[] = { "aa", // Afar @@ -100,6 +101,7 @@ static const char *locale_list[] = { "bo", // Tibetan "bo_CN", // Tibetan (China) "bo_IN", // Tibetan (India) + "br", // Breton "br_FR", // Breton (France) "brx_IN", // Bodo (India) "bs_BA", // Bosnian (Bosnia and Herzegovina) @@ -201,6 +203,7 @@ static const char *locale_list[] = { "gd_GB", // Scottish Gaelic (United Kingdom) "gez_ER", // Geez (Eritrea) "gez_ET", // Geez (Ethiopia) + "gl", // Galician "gl_ES", // Galician (Spain) "gu_IN", // Gujarati (India) "gv_GB", // Manx (United Kingdom) @@ -273,6 +276,7 @@ static const char *locale_list[] = { "ml_IN", // Malayalam (India) "mni_IN", // Manipuri (India) "mn_MN", // Mongolian (Mongolia) + "mr", // Marathi "mr_IN", // Marathi (India) "ms", // Malay "ms_MY", // Malay (Malaysia) @@ -302,6 +306,7 @@ static const char *locale_list[] = { "om", // Oromo "om_ET", // Oromo (Ethiopia) "om_KE", // Oromo (Kenya) + "or", // Oriya "or_IN", // Oriya (India) "os_RU", // Ossetian (Russia) "pa_IN", // Panjabi (India) @@ -386,6 +391,8 @@ static const char *locale_list[] = { "tr_TR", // Turkish (Turkey) "ts_ZA", // Tsonga (South Africa) "tt_RU", // Tatar (Russia) + "tzm", // Central Atlas Tamazight + "tzm_MA", // Central Atlas Tamazight (Marrocos) "ug_CN", // Uighur (China) "uk", // Ukrainian "uk_UA", // Ukrainian (Ukraine) @@ -468,6 +475,7 @@ static const char *locale_names[] = { "Tibetan", "Tibetan (China)", "Tibetan (India)", + "Breton", "Breton (France)", "Bodo (India)", "Bosnian (Bosnia and Herzegovina)", @@ -569,6 +577,7 @@ static const char *locale_names[] = { "Scottish Gaelic (United Kingdom)", "Geez (Eritrea)", "Geez (Ethiopia)", + "Galician", "Galician (Spain)", "Gujarati (India)", "Manx (United Kingdom)", @@ -641,6 +650,7 @@ static const char *locale_names[] = { "Malayalam (India)", "Manipuri (India)", "Mongolian (Mongolia)", + "Marathi", "Marathi (India)", "Malay", "Malay (Malaysia)", @@ -670,6 +680,7 @@ static const char *locale_names[] = { "Oromo", "Oromo (Ethiopia)", "Oromo (Kenya)", + "Oriya", "Oriya (India)", "Ossetian (Russia)", "Panjabi (India)", @@ -754,6 +765,8 @@ static const char *locale_names[] = { "Turkish (Turkey)", "Tsonga (South Africa)", "Tatar (Russia)", + "Central Atlas Tamazight", + "Central Atlas Tamazight (Marrocos)", "Uighur (China)", "Ukrainian", "Ukrainian (Ukraine)", diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 59fda65d43..a57c7b2504 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -3888,25 +3888,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch if (p_src_len >= 4 && p_src[1] == '#') { char32_t c = 0; - - for (int i = 2; i < p_src_len; i++) { - eat = i + 1; - char32_t ct = p_src[i]; - if (ct == ';') { - break; - } else if (ct >= '0' && ct <= '9') { - ct = ct - '0'; - } else if (ct >= 'a' && ct <= 'f') { - ct = (ct - 'a') + 10; - } else if (ct >= 'A' && ct <= 'F') { - ct = (ct - 'A') + 10; - } else { - continue; + bool overflow = false; + if (p_src[2] == 'x') { + // Hex entity &#x<num>; + for (int i = 3; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';') { + break; + } else if (ct >= '0' && ct <= '9') { + ct = ct - '0'; + } else if (ct >= 'a' && ct <= 'f') { + ct = (ct - 'a') + 10; + } else if (ct >= 'A' && ct <= 'F') { + ct = (ct - 'A') + 10; + } else { + break; + } + if (c > (UINT32_MAX >> 4)) { + overflow = true; + break; + } + c <<= 4; + c |= ct; + } + } else { + // Decimal entity &#<num>; + for (int i = 2; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';' || ct < '0' || ct > '9') { + break; + } + } + if (p_src[eat - 1] == ';') { + int64_t val = String::to_int(p_src + 2, eat - 3); + if (val > 0 && val <= UINT32_MAX) { + c = (char32_t)val; + } else { + overflow = true; + } } - c <<= 4; - c |= ct; } + // Value must be non-zero, in the range of char32_t, + // actually end with ';'. If invalid, leave the entity as-is + if (c == '\0' || overflow || p_src[eat - 1] != ';') { + eat = 1; + c = *p_src; + } if (p_dst) { *p_dst = c; } |