3 files changed, 120 insertions, 126 deletions
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index 5337d46aa3..9cee218735 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -39,13 +39,14 @@
 #include "main/main.h"
 #endif
 
-// ISO 639-1 language codes, with the addition of glibc locales with their
-// regional identifiers. This list must match the language names (in English)
-// of locale_names.
+// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes),
+// with the addition of glibc locales with their regional identifiers.
+// This list must match the language names (in English) of locale_names.
 //
 // References:
 // - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
 // - https://lh.2xlibre.net/locales/
+// - https://iso639-3.sil.org/
 
 static const char *locale_list[] = {
 	"aa", //  Afar
@@ -100,6 +101,7 @@ static const char *locale_list[] = {
 	"bo", //  Tibetan
 	"bo_CN", //  Tibetan (China)
 	"bo_IN", //  Tibetan (India)
+	"br", //  Breton
 	"br_FR", //  Breton (France)
 	"brx_IN", //  Bodo (India)
 	"bs_BA", //  Bosnian (Bosnia and Herzegovina)
@@ -201,6 +203,7 @@ static const char *locale_list[] = {
 	"gd_GB", //  Scottish Gaelic (United Kingdom)
 	"gez_ER", //  Geez (Eritrea)
 	"gez_ET", //  Geez (Ethiopia)
+	"gl", //  Galician
 	"gl_ES", //  Galician (Spain)
 	"gu_IN", //  Gujarati (India)
 	"gv_GB", //  Manx (United Kingdom)
@@ -273,6 +276,7 @@ static const char *locale_list[] = {
 	"ml_IN", //  Malayalam (India)
 	"mni_IN", //  Manipuri (India)
 	"mn_MN", //  Mongolian (Mongolia)
+	"mr", //  Marathi
 	"mr_IN", //  Marathi (India)
 	"ms", //  Malay
 	"ms_MY", //  Malay (Malaysia)
@@ -302,6 +306,7 @@ static const char *locale_list[] = {
 	"om", //  Oromo
 	"om_ET", //  Oromo (Ethiopia)
 	"om_KE", //  Oromo (Kenya)
+	"or", //  Oriya
 	"or_IN", //  Oriya (India)
 	"os_RU", //  Ossetian (Russia)
 	"pa_IN", //  Panjabi (India)
@@ -386,6 +391,8 @@ static const char *locale_list[] = {
 	"tr_TR", //  Turkish (Turkey)
 	"ts_ZA", //  Tsonga (South Africa)
 	"tt_RU", //  Tatar (Russia)
+	"tzm", // Central Atlas Tamazight
+	"tzm_MA", // Central Atlas Tamazight (Marrocos)
 	"ug_CN", //  Uighur (China)
 	"uk", //  Ukrainian
 	"uk_UA", //  Ukrainian (Ukraine)
@@ -468,6 +475,7 @@ static const char *locale_names[] = {
 	"Tibetan",
 	"Tibetan (China)",
 	"Tibetan (India)",
+	"Breton",
 	"Breton (France)",
 	"Bodo (India)",
 	"Bosnian (Bosnia and Herzegovina)",
@@ -569,6 +577,7 @@ static const char *locale_names[] = {
 	"Scottish Gaelic (United Kingdom)",
 	"Geez (Eritrea)",
 	"Geez (Ethiopia)",
+	"Galician",
 	"Galician (Spain)",
 	"Gujarati (India)",
 	"Manx (United Kingdom)",
@@ -641,6 +650,7 @@ static const char *locale_names[] = {
 	"Malayalam (India)",
 	"Manipuri (India)",
 	"Mongolian (Mongolia)",
+	"Marathi",
 	"Marathi (India)",
 	"Malay",
 	"Malay (Malaysia)",
@@ -670,6 +680,7 @@ static const char *locale_names[] = {
 	"Oromo",
 	"Oromo (Ethiopia)",
 	"Oromo (Kenya)",
+	"Oriya",
 	"Oriya (India)",
 	"Ossetian (Russia)",
 	"Panjabi (India)",
@@ -754,6 +765,8 @@ static const char *locale_names[] = {
 	"Turkish (Turkey)",
 	"Tsonga (South Africa)",
 	"Tatar (Russia)",
+	"Central Atlas Tamazight",
+	"Central Atlas Tamazight (Marrocos)",
 	"Uighur (China)",
 	"Ukrainian",
 	"Ukrainian (Ukraine)",
@@ -1191,14 +1204,14 @@ bool TranslationServer::_load_translations(const String &p_from) {
 }
 
 void TranslationServer::setup() {
-	String test = GLOBAL_DEF("locale/test", "");
+	String test = GLOBAL_DEF("internationalization/locale/test", "");
 	test = test.strip_edges();
 	if (test != "") {
 		set_locale(test);
 	} else {
 		set_locale(OS::get_singleton()->get_locale());
 	}
-	fallback = GLOBAL_DEF("locale/fallback", "en");
+	fallback = GLOBAL_DEF("internationalization/locale/fallback", "en");
 #ifdef TOOLS_ENABLED
 	{
 		String options = "";
@@ -1210,7 +1223,7 @@ void TranslationServer::setup() {
 			options += locale_list[idx];
 			idx++;
 		}
-		ProjectSettings::get_singleton()->set_custom_property_info("locale/fallback", PropertyInfo(Variant::STRING, "locale/fallback", PROPERTY_HINT_ENUM, options));
+		ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_ENUM, options));
 	}
 #endif
 }
@@ -1307,11 +1320,11 @@ void TranslationServer::_bind_methods() {
 
 void TranslationServer::load_translations() {
 	String locale = get_locale();
-	_load_translations("locale/translations"); //all
-	_load_translations("locale/translations_" + locale.substr(0, 2));
+	_load_translations("internationalization/locale/translations"); //all
+	_load_translations("internationalization/locale/translations_" + locale.substr(0, 2));
 
 	if (locale.substr(0, 2) != locale) {
-		_load_translations("locale/translations_" + locale);
+		_load_translations("internationalization/locale/translations_" + locale);
 	}
 }
 
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 21336a99ec..a57c7b2504 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -1409,8 +1409,9 @@ String String::num(double p_num, int p_decimals) {
 				if (digit == MAX_DIGITS) //no point in going to infinite
 					break;
 
-				if ((dec - (double)((int)dec)) < 1e-6)
+				if (dec - (double)((int)dec) < 1e-6) {
 					break;
+				}
 			}
 
 			if (digit == p_decimals)
@@ -2096,8 +2097,9 @@ String::String(const StrRange &p_range) {
 	copy_from(p_range.c_str, p_range.len);
 }
 
-int64_t String::hex_to_int(bool p_with_prefix) const {
-	if (p_with_prefix && length() < 3) {
+int64_t String::hex_to_int() const {
+	int len = length();
+	if (len == 0) {
 		return 0;
 	}
 
@@ -2109,10 +2111,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const {
 		s++;
 	}
 
-	if (p_with_prefix) {
-		if (s[0] != '0' || s[1] != 'x') {
-			return 0;
-		}
+	if (len > 2 && s[0] == '0' && s[1] == 'x') {
 		s += 2;
 	}
 
@@ -2139,8 +2138,9 @@ int64_t String::hex_to_int(bool p_with_prefix) const {
 	return hex * sign;
 }
 
-int64_t String::bin_to_int(bool p_with_prefix) const {
-	if (p_with_prefix && length() < 3) {
+int64_t String::bin_to_int() const {
+	int len = length();
+	if (len == 0) {
 		return 0;
 	}
 
@@ -2152,10 +2152,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const {
 		s++;
 	}
 
-	if (p_with_prefix) {
-		if (s[0] != '0' || s[1] != 'b') {
-			return 0;
-		}
+	if (len > 2 && s[0] == '0' && s[1] == 'b') {
 		s += 2;
 	}
 
@@ -3074,35 +3071,47 @@ int String::rfindn(const String &p_str, int p_from) const {
 }
 
 bool String::ends_with(const String &p_string) const {
-	int pos = rfind(p_string);
-	if (pos == -1) {
+	int l = p_string.length();
+	if (l > length()) {
 		return false;
 	}
-	return pos + p_string.length() == length();
+
+	if (l == 0) {
+		return true;
+	}
+
+	const char32_t *p = &p_string[0];
+	const char32_t *s = &operator[](length() - l);
+
+	for (int i = 0; i < l; i++) {
+		if (p[i] != s[i]) {
+			return false;
+		}
+	}
+
+	return true;
 }
 
 bool String::begins_with(const String &p_string) const {
-	if (p_string.length() > length()) {
+	int l = p_string.length();
+	if (l > length()) {
 		return false;
 	}
 
-	int l = p_string.length();
 	if (l == 0) {
 		return true;
 	}
 
-	const char32_t *src = &p_string[0];
-	const char32_t *str = &operator[](0);
+	const char32_t *p = &p_string[0];
+	const char32_t *s = &operator[](0);
 
-	int i = 0;
-	for (; i < l; i++) {
-		if (src[i] != str[i]) {
+	for (int i = 0; i < l; i++) {
+		if (p[i] != s[i]) {
 			return false;
 		}
 	}
 
-	// only if i == l the p_string matches the beginning
-	return i == l;
+	return true;
 }
 
 bool String::begins_with(const char *p_string) const {
@@ -3250,8 +3259,8 @@ float String::similarity(const String &p_string) const {
 	int src_size = src_bigrams.size();
 	int tgt_size = tgt_bigrams.size();
 
-	double sum = src_size + tgt_size;
-	double inter = 0;
+	int sum = src_size + tgt_size;
+	int inter = 0;
 	for (int i = 0; i < src_size; i++) {
 		for (int j = 0; j < tgt_size; j++) {
 			if (src_bigrams[i] == tgt_bigrams[j]) {
@@ -3478,7 +3487,7 @@ String String::right(int p_pos) const {
 	return substr(p_pos, (length() - p_pos));
 }
 
-char32_t String::ord_at(int p_idx) const {
+char32_t String::unicode_at(int p_idx) const {
 	ERR_FAIL_INDEX_V(p_idx, length(), 0);
 	return operator[](p_idx);
 }
@@ -3750,7 +3759,7 @@ bool String::is_valid_string() const {
 	return valid;
 }
 
-String String::http_escape() const {
+String String::uri_encode() const {
 	const CharString temp = utf8();
 	String res;
 	for (int i = 0; i < temp.length(); ++i) {
@@ -3774,23 +3783,25 @@ String String::http_escape() const {
 	return res;
 }
 
-String String::http_unescape() const {
+String String::uri_decode() const {
 	String res;
 	for (int i = 0; i < length(); ++i) {
-		if (ord_at(i) == '%' && i + 2 < length()) {
-			char32_t ord1 = ord_at(i + 1);
+		if (unicode_at(i) == '%' && i + 2 < length()) {
+			char32_t ord1 = unicode_at(i + 1);
 			if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) {
-				char32_t ord2 = ord_at(i + 2);
+				char32_t ord2 = unicode_at(i + 2);
 				if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) {
 					char bytes[3] = { (char)ord1, (char)ord2, 0 };
 					res += (char)strtol(bytes, nullptr, 16);
 					i += 2;
 				}
 			} else {
-				res += ord_at(i);
+				res += unicode_at(i);
 			}
+		} else if (unicode_at(i) == '+') {
+			res += ' ';
 		} else {
-			res += ord_at(i);
+			res += unicode_at(i);
 		}
 	}
 	return String::utf8(res.ascii());
@@ -3877,25 +3888,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
 
 			if (p_src_len >= 4 && p_src[1] == '#') {
 				char32_t c = 0;
-
-				for (int i = 2; i < p_src_len; i++) {
-					eat = i + 1;
-					char32_t ct = p_src[i];
-					if (ct == ';') {
-						break;
-					} else if (ct >= '0' && ct <= '9') {
-						ct = ct - '0';
-					} else if (ct >= 'a' && ct <= 'f') {
-						ct = (ct - 'a') + 10;
-					} else if (ct >= 'A' && ct <= 'F') {
-						ct = (ct - 'A') + 10;
-					} else {
-						continue;
+				bool overflow = false;
+				if (p_src[2] == 'x') {
+					// Hex entity &#x<num>;
+					for (int i = 3; i < p_src_len; i++) {
+						eat = i + 1;
+						char32_t ct = p_src[i];
+						if (ct == ';') {
+							break;
+						} else if (ct >= '0' && ct <= '9') {
+							ct = ct - '0';
+						} else if (ct >= 'a' && ct <= 'f') {
+							ct = (ct - 'a') + 10;
+						} else if (ct >= 'A' && ct <= 'F') {
+							ct = (ct - 'A') + 10;
+						} else {
+							break;
+						}
+						if (c > (UINT32_MAX >> 4)) {
+							overflow = true;
+							break;
+						}
+						c <<= 4;
+						c |= ct;
+					}
+				} else {
+					// Decimal entity &#<num>;
+					for (int i = 2; i < p_src_len; i++) {
+						eat = i + 1;
+						char32_t ct = p_src[i];
+						if (ct == ';' || ct < '0' || ct > '9') {
+							break;
+						}
+					}
+					if (p_src[eat - 1] == ';') {
+						int64_t val = String::to_int(p_src + 2, eat - 3);
+						if (val > 0 && val <= UINT32_MAX) {
+							c = (char32_t)val;
+						} else {
+							overflow = true;
+						}
 					}
-					c <<= 4;
-					c |= ct;
 				}
 
+				// Value must be non-zero, in the range of char32_t,
+				// actually end with ';'. If invalid, leave the entity as-is
+				if (c == '\0' || overflow || p_src[eat - 1] != ';') {
+					eat = 1;
+					c = *p_src;
+				}
 				if (p_dst) {
 					*p_dst = c;
 				}
@@ -4245,7 +4286,7 @@ bool String::is_valid_ip_address() const {
 				continue;
 			}
 			if (n.is_valid_hex_number(false)) {
-				int64_t nint = n.hex_to_int(false);
+				int64_t nint = n.hex_to_int();
 				if (nint < 0 || nint > 0xffff) {
 					return false;
 				}
@@ -4340,63 +4381,6 @@ String String::plus_file(const String &p_file) const {
 	return *this + "/" + p_file;
 }
 
-String String::percent_encode() const {
-	CharString cs = utf8();
-	String encoded;
-	for (int i = 0; i < cs.length(); i++) {
-		uint8_t c = cs[i];
-		if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '~' || c == '.') {
-			char p[2] = { (char)c, 0 };
-			encoded += p;
-		} else {
-			char p[4] = { '%', 0, 0, 0 };
-			static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
-
-			p[1] = hex[c >> 4];
-			p[2] = hex[c & 0xF];
-			encoded += p;
-		}
-	}
-
-	return encoded;
-}
-
-String String::percent_decode() const {
-	CharString pe;
-
-	CharString cs = utf8();
-	for (int i = 0; i < cs.length(); i++) {
-		uint8_t c = cs[i];
-		if (c == '%' && i < length() - 2) {
-			uint8_t a = LOWERCASE(cs[i + 1]);
-			uint8_t b = LOWERCASE(cs[i + 2]);
-
-			if (a >= '0' && a <= '9') {
-				c = (a - '0') << 4;
-			} else if (a >= 'a' && a <= 'f') {
-				c = (a - 'a' + 10) << 4;
-			} else {
-				continue;
-			}
-
-			uint8_t d = 0;
-
-			if (b >= '0' && b <= '9') {
-				d = (b - '0');
-			} else if (b >= 'a' && b <= 'f') {
-				d = (b - 'a' + 10);
-			} else {
-				continue;
-			}
-			c += d;
-			i += 2;
-		}
-		pe += c;
-	}
-
-	return String::utf8(pe.ptr());
-}
-
 String String::property_name_encode() const {
 	// Escape and quote strings with extended ASCII or further Unicode characters
 	// as well as '"', '=' or ' ' (32)
diff --git a/core/string/ustring.h b/core/string/ustring.h
index 654e327320..821941036f 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -318,8 +318,8 @@ public:
 	bool is_numeric() const;
 
 	double to_float() const;
-	int64_t hex_to_int(bool p_with_prefix = true) const;
-	int64_t bin_to_int(bool p_with_prefix = true) const;
+	int64_t hex_to_int() const;
+	int64_t bin_to_int() const;
 	int64_t to_int() const;
 
 	static int64_t to_int(const char *p_str, int p_len = -1);
@@ -366,7 +366,7 @@ public:
 	String get_extension() const;
 	String get_basename() const;
 	String plus_file(const String &p_file) const;
-	char32_t ord_at(int p_idx) const;
+	char32_t unicode_at(int p_idx) const;
 
 	void erase(int p_pos, int p_chars);
 
@@ -409,17 +409,14 @@ public:
 
 	String xml_escape(bool p_escape_quotes = false) const;
 	String xml_unescape() const;
-	String http_escape() const;
-	String http_unescape() const;
+	String uri_encode() const;
+	String uri_decode() const;
 	String c_escape() const;
 	String c_escape_multiline() const;
 	String c_unescape() const;
 	String json_escape() const;
 	String word_wrap(int p_chars_per_line) const;
 
-	String percent_encode() const;
-	String percent_decode() const;
-
 	String property_name_encode() const;
 
 	bool is_valid_identifier() const;