summaryrefslogtreecommitdiff
path: root/core/string
diff options
context:
space:
mode:
Diffstat (limited to 'core/string')
-rw-r--r--core/string/translation.cpp19
-rw-r--r--core/string/ustring.cpp62
2 files changed, 62 insertions, 19 deletions
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index 0901944360..9cee218735 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -39,13 +39,14 @@
#include "main/main.h"
#endif
-// ISO 639-1 language codes, with the addition of glibc locales with their
-// regional identifiers. This list must match the language names (in English)
-// of locale_names.
+// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes),
+// with the addition of glibc locales with their regional identifiers.
+// This list must match the language names (in English) of locale_names.
//
// References:
// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
// - https://lh.2xlibre.net/locales/
+// - https://iso639-3.sil.org/
static const char *locale_list[] = {
"aa", // Afar
@@ -100,6 +101,7 @@ static const char *locale_list[] = {
"bo", // Tibetan
"bo_CN", // Tibetan (China)
"bo_IN", // Tibetan (India)
+ "br", // Breton
"br_FR", // Breton (France)
"brx_IN", // Bodo (India)
"bs_BA", // Bosnian (Bosnia and Herzegovina)
@@ -201,6 +203,7 @@ static const char *locale_list[] = {
"gd_GB", // Scottish Gaelic (United Kingdom)
"gez_ER", // Geez (Eritrea)
"gez_ET", // Geez (Ethiopia)
+ "gl", // Galician
"gl_ES", // Galician (Spain)
"gu_IN", // Gujarati (India)
"gv_GB", // Manx (United Kingdom)
@@ -273,6 +276,7 @@ static const char *locale_list[] = {
"ml_IN", // Malayalam (India)
"mni_IN", // Manipuri (India)
"mn_MN", // Mongolian (Mongolia)
+ "mr", // Marathi
"mr_IN", // Marathi (India)
"ms", // Malay
"ms_MY", // Malay (Malaysia)
@@ -302,6 +306,7 @@ static const char *locale_list[] = {
"om", // Oromo
"om_ET", // Oromo (Ethiopia)
"om_KE", // Oromo (Kenya)
+ "or", // Oriya
"or_IN", // Oriya (India)
"os_RU", // Ossetian (Russia)
"pa_IN", // Panjabi (India)
@@ -386,6 +391,8 @@ static const char *locale_list[] = {
"tr_TR", // Turkish (Turkey)
"ts_ZA", // Tsonga (South Africa)
"tt_RU", // Tatar (Russia)
+ "tzm", // Central Atlas Tamazight
+ "tzm_MA", // Central Atlas Tamazight (Marrocos)
"ug_CN", // Uighur (China)
"uk", // Ukrainian
"uk_UA", // Ukrainian (Ukraine)
@@ -468,6 +475,7 @@ static const char *locale_names[] = {
"Tibetan",
"Tibetan (China)",
"Tibetan (India)",
+ "Breton",
"Breton (France)",
"Bodo (India)",
"Bosnian (Bosnia and Herzegovina)",
@@ -569,6 +577,7 @@ static const char *locale_names[] = {
"Scottish Gaelic (United Kingdom)",
"Geez (Eritrea)",
"Geez (Ethiopia)",
+ "Galician",
"Galician (Spain)",
"Gujarati (India)",
"Manx (United Kingdom)",
@@ -641,6 +650,7 @@ static const char *locale_names[] = {
"Malayalam (India)",
"Manipuri (India)",
"Mongolian (Mongolia)",
+ "Marathi",
"Marathi (India)",
"Malay",
"Malay (Malaysia)",
@@ -670,6 +680,7 @@ static const char *locale_names[] = {
"Oromo",
"Oromo (Ethiopia)",
"Oromo (Kenya)",
+ "Oriya",
"Oriya (India)",
"Ossetian (Russia)",
"Panjabi (India)",
@@ -754,6 +765,8 @@ static const char *locale_names[] = {
"Turkish (Turkey)",
"Tsonga (South Africa)",
"Tatar (Russia)",
+ "Central Atlas Tamazight",
+ "Central Atlas Tamazight (Marrocos)",
"Uighur (China)",
"Ukrainian",
"Ukrainian (Ukraine)",
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 59fda65d43..a57c7b2504 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -3888,25 +3888,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
if (p_src_len >= 4 && p_src[1] == '#') {
char32_t c = 0;
-
- for (int i = 2; i < p_src_len; i++) {
- eat = i + 1;
- char32_t ct = p_src[i];
- if (ct == ';') {
- break;
- } else if (ct >= '0' && ct <= '9') {
- ct = ct - '0';
- } else if (ct >= 'a' && ct <= 'f') {
- ct = (ct - 'a') + 10;
- } else if (ct >= 'A' && ct <= 'F') {
- ct = (ct - 'A') + 10;
- } else {
- continue;
+ bool overflow = false;
+ if (p_src[2] == 'x') {
+ // Hex entity &#x<num>;
+ for (int i = 3; i < p_src_len; i++) {
+ eat = i + 1;
+ char32_t ct = p_src[i];
+ if (ct == ';') {
+ break;
+ } else if (ct >= '0' && ct <= '9') {
+ ct = ct - '0';
+ } else if (ct >= 'a' && ct <= 'f') {
+ ct = (ct - 'a') + 10;
+ } else if (ct >= 'A' && ct <= 'F') {
+ ct = (ct - 'A') + 10;
+ } else {
+ break;
+ }
+ if (c > (UINT32_MAX >> 4)) {
+ overflow = true;
+ break;
+ }
+ c <<= 4;
+ c |= ct;
+ }
+ } else {
+ // Decimal entity &#<num>;
+ for (int i = 2; i < p_src_len; i++) {
+ eat = i + 1;
+ char32_t ct = p_src[i];
+ if (ct == ';' || ct < '0' || ct > '9') {
+ break;
+ }
+ }
+ if (p_src[eat - 1] == ';') {
+ int64_t val = String::to_int(p_src + 2, eat - 3);
+ if (val > 0 && val <= UINT32_MAX) {
+ c = (char32_t)val;
+ } else {
+ overflow = true;
+ }
}
- c <<= 4;
- c |= ct;
}
+ // Value must be non-zero, in the range of char32_t,
+ // actually end with ';'. If invalid, leave the entity as-is
+ if (c == '\0' || overflow || p_src[eat - 1] != ';') {
+ eat = 1;
+ c = *p_src;
+ }
if (p_dst) {
*p_dst = c;
}