1 files changed, 589 insertions, 470 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 397743fb6e..c02be9e5b7 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -35,9 +35,11 @@
 #include "core/math/math_funcs.h"
 #include "core/os/memory.h"
 #include "core/string/print_string.h"
+#include "core/string/string_name.h"
 #include "core/string/translation.h"
 #include "core/string/ucaps.h"
 #include "core/variant/variant.h"
+#include "core/version_generated.gen.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -53,34 +55,14 @@
 
 static const int MAX_DECIMALS = 32;
 
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
-	return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
-	return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
-	return (c >= 'a' && c <= 'z');
-}
-
 static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
-	return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
 }
 
 const char CharString::_null = 0;
 const char16_t Char16String::_null = 0;
 const char32_t String::_null = 0;
 
-bool is_symbol(char32_t c) {
-	return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
 	const String &s = p_s;
 	int beg = CLAMP(p_col, 0, s.length());
@@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const {
 }
 
 Char16String &Char16String::operator+=(char16_t p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	set(length() - 1, p_char);
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+
+	char16_t *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
 
 	return *this;
 }
 
-Char16String &Char16String::operator=(const char16_t *p_cstr) {
+void Char16String::operator=(const char16_t *p_cstr) {
 	copy_from(p_cstr);
-	return *this;
 }
 
 const char16_t *Char16String::get_data() const {
@@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const {
 }
 
 CharString &CharString::operator+=(char p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	set(length() - 1, p_char);
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+
+	char *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
 
 	return *this;
 }
 
-CharString &CharString::operator=(const char *p_cstr) {
+void CharString::operator=(const char *p_cstr) {
 	copy_from(p_cstr);
-	return *this;
 }
 
 const char *CharString::get_data() const {
@@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) {
 		return;
 	}
 
-	int len = 0;
-	const char *ptr = p_cstr;
-	while (*(ptr++) != 0) {
-		len++;
-	}
+	const size_t len = strlen(p_cstr);
 
 	if (len == 0) {
 		resize(0);
@@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) {
 
 	char32_t *dst = this->ptrw();
 
-	for (int i = 0; i < len + 1; i++) {
-		dst[i] = p_cstr[i];
+	for (size_t i = 0; i <= len; i++) {
+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+		if (c == 0 && i < len) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 }
 
@@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
 	char32_t *dst = this->ptrw();
 
 	for (int i = 0; i < len; i++) {
-		dst[i] = p_cstr[i];
+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+		if (c == 0) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 	dst[len] = 0;
 }
@@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
 }
 
 void String::copy_from(const char32_t &p_char) {
-	resize(2);
-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
-		set(0, 0xfffd);
-	} else {
-		set(0, p_char);
+	if (p_char == 0) {
+		print_unicode_error("NUL character", true);
+		return;
+	}
+	if ((p_char & 0xfffff800) == 0xd800) {
+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+	}
+	if (p_char > 0x10ffff) {
+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
 	}
-	set(1, 0);
+
+	resize(2);
+
+	char32_t *dst = ptrw();
+	dst[0] = p_char;
+	dst[1] = 0;
 }
 
 void String::copy_from(const char32_t *p_cstr) {
@@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
 // p_length <= p_char strlen
 void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
 	resize(p_length + 1);
-	set(p_length, 0);
-
 	char32_t *dst = ptrw();
+	dst[p_length] = 0;
 
 	for (int i = 0; i < p_length; i++) {
-		if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
-			dst[i] = 0xfffd;
-		} else {
-			dst[i] = p_char[i];
+		if (p_char[i] == 0) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+			continue;
+		}
+		if ((p_char[i] & 0xfffff800) == 0xd800) {
+			print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
 		}
+		if (p_char[i] > 0x10ffff) {
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+		}
+		dst[i] = p_char[i];
 	}
 }
 
@@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const {
 	return res;
 }
 
+String String::operator+(char32_t p_char) const {
+	String res = *this;
+	res += p_char;
+	return res;
+}
+
 String operator+(const char *p_chr, const String &p_str) {
 	String tmp = p_chr;
 	tmp += p_str;
@@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
 	// wchar_t is 16-bit
 	String tmp = String::utf16((const char16_t *)p_chr);
 #else
-	// wchar_t is 32-bi
+	// wchar_t is 32-bit
 	String tmp = (const char32_t *)p_chr;
 #endif
 	tmp += p_str;
@@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) {
 }
 
 String &String::operator+=(const String &p_str) {
-	if (is_empty()) {
+	const int lhs_len = length();
+	if (lhs_len == 0) {
 		*this = p_str;
 		return *this;
 	}
 
-	if (p_str.is_empty()) {
+	const int rhs_len = p_str.length();
+	if (rhs_len == 0) {
 		return *this;
 	}
 
-	int from = length();
-
-	resize(length() + p_str.size());
+	resize(lhs_len + rhs_len + 1);
 
-	const char32_t *src = p_str.get_data();
-	char32_t *dst = ptrw();
+	const char32_t *src = p_str.ptr();
+	char32_t *dst = ptrw() + lhs_len;
 
-	set(length(), 0);
-
-	for (int i = 0; i < p_str.length(); i++) {
-		dst[from + i] = src[i];
-	}
+	// Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
+	memcpy(dst, src, rhs_len * sizeof(char32_t));
+	*(dst + rhs_len) = _null;
 
 	return *this;
 }
@@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) {
 		return *this;
 	}
 
-	int src_len = 0;
-	const char *ptr = p_str;
-	while (*(ptr++) != 0) {
-		src_len++;
-	}
-
-	int from = length();
+	const int lhs_len = length();
+	const size_t rhs_len = strlen(p_str);
 
-	resize(from + src_len + 1);
-
-	char32_t *dst = ptrw();
+	resize(lhs_len + rhs_len + 1);
 
-	set(length(), 0);
+	char32_t *dst = ptrw() + lhs_len;
 
-	for (int i = 0; i < src_len; i++) {
-		dst[from + i] = p_str[i];
+	for (size_t i = 0; i <= rhs_len; i++) {
+		uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
+		if (c == 0 && i < rhs_len) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 
 	return *this;
@@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) {
 }
 
 String &String::operator+=(char32_t p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
-		set(length() - 1, 0xfffd);
-	} else {
-		set(length() - 1, p_char);
+	if (p_char == 0) {
+		print_unicode_error("NUL character", true);
+		return *this;
+	}
+	if ((p_char & 0xfffff800) == 0xd800) {
+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+	}
+	if (p_char > 0x10ffff) {
+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
 	}
 
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+	char32_t *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
+
 	return *this;
 }
 
@@ -952,10 +970,6 @@ const char32_t *String::get_data() const {
 	return size() ? &operator[](0) : &zero;
 }
 
-void String::erase(int p_pos, int p_chars) {
-	*this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars)));
-}
-
 String String::capitalize() const {
 	String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges();
 	String cap;
@@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
 	int start_index = 0;
 
 	for (int i = 1; i < this->size(); i++) {
-		bool is_upper = is_upper_case(cstr[i]);
+		bool is_upper = is_ascii_upper_case(cstr[i]);
 		bool is_number = is_digit(cstr[i]);
 
 		bool are_next_2_lower = false;
 		bool is_next_lower = false;
 		bool is_next_number = false;
-		bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+		bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
 		bool was_precedent_number = is_digit(cstr[i - 1]);
 
 		if (i + 2 < this->size()) {
-			are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+			are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
 		}
 
 		if (i + 1 < this->size()) {
-			is_next_lower = is_lower_case(cstr[i + 1]);
+			is_next_lower = is_ascii_lower_case(cstr[i + 1]);
 			is_next_number = is_digit(cstr[i + 1]);
 		}
 
@@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
 }
 
 String String::num_real(double p_num, bool p_trailing) {
-	if (Math::is_nan(p_num)) {
-		return "nan";
-	}
-
-	if (Math::is_inf(p_num)) {
-		if (signbit(p_num)) {
-			return "-inf";
+	if (p_num == (double)(int64_t)p_num) {
+		if (p_trailing) {
+			return num_int64((int64_t)p_num) + ".0";
 		} else {
-			return "inf";
+			return num_int64((int64_t)p_num);
 		}
 	}
-
-	String s;
-	String sd;
-
-	// Integer part.
-
-	bool neg = p_num < 0;
-	p_num = ABS(p_num);
-	int64_t intn = (int64_t)p_num;
-
-	// Decimal part.
-
-	if (intn != p_num) {
-		double dec = p_num - (double)intn;
-
-		int digit = 0;
-
 #ifdef REAL_T_IS_DOUBLE
-		int decimals = 14;
-		double tolerance = 1e-14;
+	int decimals = 14;
 #else
-		int decimals = 6;
-		double tolerance = 1e-6;
+	int decimals = 6;
 #endif
-		// We want to align the digits to the above sane default, so we only
-		// need to subtract log10 for numbers with a positive power of ten.
-		if (p_num > 10) {
-			decimals -= (int)floor(log10(p_num));
-		}
-
-		if (decimals > MAX_DECIMALS) {
-			decimals = MAX_DECIMALS;
-		}
-
-		// In case the value ends up ending in "99999", we want to add a
-		// tiny bit to the value we're checking when deciding when to stop,
-		// so we multiply by slightly above 1 (1 + 1e-7 or 1e-15).
-		double check_multiplier = 1 + tolerance / 10;
-
-		int64_t dec_int = 0;
-		int64_t dec_max = 0;
-
-		while (true) {
-			dec *= 10.0;
-			dec_int = dec_int * 10 + (int64_t)dec % 10;
-			dec_max = dec_max * 10 + 9;
-			digit++;
-
-			if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) {
-				break;
-			}
-
-			if (digit == decimals) {
-				break;
-			}
-		}
-
-		dec *= 10;
-		int last = (int64_t)dec % 10;
-
-		if (last > 5) {
-			if (dec_int == dec_max) {
-				dec_int = 0;
-				intn++;
-			} else {
-				dec_int++;
-			}
-		}
-
-		String decimal;
-		for (int i = 0; i < digit; i++) {
-			char num[2] = { 0, 0 };
-			num[0] = '0' + dec_int % 10;
-			decimal = num + decimal;
-			dec_int /= 10;
-		}
-		sd = '.' + decimal;
-	} else if (p_trailing) {
-		sd = ".0";
-	} else {
-		sd = "";
-	}
-
-	if (intn == 0) {
-		s = "0";
-	} else {
-		while (intn) {
-			char32_t num = '0' + (intn % 10);
-			intn /= 10;
-			s = num + s;
-		}
-	}
-
-	s = s + sd;
-	if (neg) {
-		s = "-" + s;
+	// We want to align the digits to the above sane default, so we only
+	// need to subtract log10 for numbers with a positive power of ten.
+	if (p_num > 10) {
+		decimals -= (int)floor(log10(p_num));
 	}
-	return s;
+	return num(p_num, decimals);
 }
 
 String String::num_scientific(double p_num) {
@@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
 	return ret;
 }
 
+void String::print_unicode_error(const String &p_message, bool p_critical) const {
+	if (p_critical) {
+		print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+	} else {
+		print_error(vformat("Unicode parsing error: %s", p_message));
+	}
+}
+
 CharString String::ascii(bool p_allow_extended) const {
 	if (!length()) {
 		return CharString();
@@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const {
 		if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
 			cs[i] = c;
 		} else {
-			print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
 			cs[i] = 0x20;
 		}
 	}
@@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) {
 	return ret;
 }
 
-bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
-
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
 	if (!p_utf8) {
-		return true;
+		return ERR_INVALID_DATA;
 	}
 
 	String aux;
@@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 		}
 	}
 
+	bool decode_error = false;
+	bool decode_failed = false;
 	{
 		const char *ptrtmp = p_utf8;
 		const char *ptrtmp_limit = &p_utf8[p_len];
 		int skip = 0;
+		uint8_t c_start = 0;
 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
-			if (skip == 0) {
-				uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+			uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
 
+			if (skip == 0) {
+				if (p_skip_cr && c == '\r') {
+					ptrtmp++;
+					continue;
+				}
 				/* Determine the number of characters in sequence */
 				if ((c & 0x80) == 0) {
 					skip = 0;
@@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 					skip = 2;
 				} else if ((c & 0xf8) == 0xf0) {
 					skip = 3;
+				} else if ((c & 0xfc) == 0xf8) {
+					skip = 4;
+				} else if ((c & 0xfe) == 0xfc) {
+					skip = 5;
 				} else {
-					_UNICERROR("invalid skip at " + num_int64(cstr_size));
-					return true; //invalid utf8
+					skip = 0;
+					print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
+					decode_failed = true;
 				}
+				c_start = c;
 
 				if (skip == 1 && (c & 0x1e) == 0) {
-					_UNICERROR("overlong rejected at " + num_int64(cstr_size));
-					return true; //reject overlong
+					print_unicode_error(vformat("Overlong encoding (%x ...)", c));
+					decode_error = true;
 				}
-
 				str_size++;
-
 			} else {
-				--skip;
+				if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
+					print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
+					decode_error = true;
+				}
+				if (c < 0x80 || c > 0xbf) {
+					print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
+					decode_failed = true;
+					skip = 0;
+				} else {
+					--skip;
+				}
 			}
 
 			cstr_size++;
@@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 		}
 
 		if (skip) {
-			_UNICERROR("no space left");
-			return true; //not enough space
+			print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
+			decode_failed = true;
 		}
 	}
 
 	if (str_size == 0) {
 		clear();
-		return false;
+		return OK; // empty string
 	}
 
 	resize(str_size + 1);
 	char32_t *dst = ptrw();
 	dst[str_size] = 0;
 
+	int skip = 0;
+	uint32_t unichar = 0;
 	while (cstr_size) {
-		int len = 0;
-
-		/* Determine the number of characters in sequence */
-		if ((*p_utf8 & 0x80) == 0) {
-			len = 1;
-		} else if ((*p_utf8 & 0xe0) == 0xc0) {
-			len = 2;
-		} else if ((*p_utf8 & 0xf0) == 0xe0) {
-			len = 3;
-		} else if ((*p_utf8 & 0xf8) == 0xf0) {
-			len = 4;
-		} else {
-			_UNICERROR("invalid len");
-			return true; //invalid UTF8
-		}
-
-		if (len > cstr_size) {
-			_UNICERROR("no space left");
-			return true; //not enough space
-		}
+		uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
 
-		if (len == 2 && (*p_utf8 & 0x1E) == 0) {
-			_UNICERROR("no space left");
-			return true; //reject overlong
-		}
-
-		/* Convert the first character */
-
-		uint32_t unichar = 0;
-
-		if (len == 1) {
-			unichar = *p_utf8;
+		if (skip == 0) {
+			if (p_skip_cr && c == '\r') {
+				p_utf8++;
+				continue;
+			}
+			/* Determine the number of characters in sequence */
+			if ((c & 0x80) == 0) {
+				*(dst++) = c;
+				unichar = 0;
+				skip = 0;
+			} else if ((c & 0xe0) == 0xc0) {
+				unichar = (0xff >> 3) & c;
+				skip = 1;
+			} else if ((c & 0xf0) == 0xe0) {
+				unichar = (0xff >> 4) & c;
+				skip = 2;
+			} else if ((c & 0xf8) == 0xf0) {
+				unichar = (0xff >> 5) & c;
+				skip = 3;
+			} else if ((c & 0xfc) == 0xf8) {
+				unichar = (0xff >> 6) & c;
+				skip = 4;
+			} else if ((c & 0xfe) == 0xfc) {
+				unichar = (0xff >> 7) & c;
+				skip = 5;
+			} else {
+				*(dst++) = 0x20;
+				unichar = 0;
+				skip = 0;
+			}
 		} else {
-			unichar = (0xff >> (len + 1)) & *p_utf8;
-
-			for (int i = 1; i < len; i++) {
-				if ((p_utf8[i] & 0xc0) != 0x80) {
-					_UNICERROR("invalid utf8");
-					return true; //invalid utf8
-				}
-				if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
-					_UNICERROR("invalid utf8 overlong");
-					return true; //no overlong
+			if (c < 0x80 || c > 0xbf) {
+				*(dst++) = 0x20;
+				skip = 0;
+			} else {
+				unichar = (unichar << 6) | (c & 0x3f);
+				--skip;
+				if (skip == 0) {
+					if (unichar == 0) {
+						print_unicode_error("NUL character", true);
+						decode_failed = true;
+						unichar = 0x20;
+					}
+					if ((unichar & 0xfffff800) == 0xd800) {
+						print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
+						decode_error = true;
+					}
+					if (unichar > 0x10ffff) {
+						print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
+						decode_error = true;
+					}
+					*(dst++) = unichar;
 				}
-				unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
 			}
 		}
-		if (unichar >= 0xd800 && unichar <= 0xdfff) {
-			_UNICERROR("invalid code point");
-			return CharString();
-		}
 
-		*(dst++) = unichar;
-		cstr_size -= len;
-		p_utf8 += len;
+		cstr_size--;
+		p_utf8++;
+	}
+	if (skip) {
+		*(dst++) = 0x20;
 	}
 
-	return false;
-#undef _UNICERROR
+	if (decode_failed) {
+		return ERR_INVALID_DATA;
+	} else if (decode_error) {
+		return ERR_PARSE_ERROR;
+	} else {
+		return OK;
+	}
 }
 
 CharString String::utf8() const {
@@ -1881,15 +1846,17 @@ CharString String::utf8() const {
 			fl += 2;
 		} else if (c <= 0xffff) { // 16 bits
 			fl += 3;
-		} else if (c <= 0x0010ffff) { // 21 bits
+		} else if (c <= 0x001fffff) { // 21 bits
 			fl += 4;
+		} else if (c <= 0x03ffffff) { // 26 bits
+			fl += 5;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
+		} else if (c <= 0x7fffffff) { // 31 bits
+			fl += 6;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
 		} else {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return CharString();
-		}
-		if (c >= 0xd800 && c <= 0xdfff) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return CharString();
+			fl += 1;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
 		}
 	}
 
@@ -1915,11 +1882,26 @@ CharString String::utf8() const {
 			APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
-		} else { // 21 bits
+		} else if (c <= 0x001fffff) { // 21 bits
 			APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else if (c <= 0x03ffffff) { // 26 bits
+			APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else if (c <= 0x7fffffff) { // 31 bits
+			APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else {
+			APPEND_CHAR(0x20);
 		}
 	}
 #undef APPEND_CHAR
@@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
 	return ret;
 }
 
-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
-
+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
 	if (!p_utf16) {
-		return true;
+		return ERR_INVALID_DATA;
 	}
 
 	String aux;
@@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
 		}
 	}
 
+	bool decode_error = false;
 	{
 		const char16_t *ptrtmp = p_utf16;
 		const char16_t *ptrtmp_limit = &p_utf16[p_len];
-		int skip = 0;
+		uint32_t c_prev = 0;
+		bool skip = false;
 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
 			uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
-			if (skip == 0) {
-				if ((c & 0xfffffc00) == 0xd800) {
-					skip = 1; // lead surrogate
-				} else if ((c & 0xfffffc00) == 0xdc00) {
-					_UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
-					return true; // invalid UTF16
-				} else {
-					skip = 0;
+
+			if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+				if (skip) {
+					print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
+					decode_error = true;
 				}
-				str_size++;
-			} else {
-				if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
-					--skip;
+				skip = true;
+			} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+				if (skip) {
+					str_size--;
 				} else {
-					_UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
-					return true; // invalid UTF16
+					print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
+					decode_error = true;
 				}
+				skip = false;
+			} else {
+				skip = false;
 			}
 
+			c_prev = c;
+			str_size++;
 			cstr_size++;
 			ptrtmp++;
 		}
 
 		if (skip) {
-			_UNICERROR("no space left");
-			return true; // not enough space
+			print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
+			decode_error = true;
 		}
 	}
 
 	if (str_size == 0) {
 		clear();
-		return false;
+		return OK; // empty string
 	}
 
 	resize(str_size + 1);
 	char32_t *dst = ptrw();
 	dst[str_size] = 0;
 
+	bool skip = false;
+	uint32_t c_prev = 0;
 	while (cstr_size) {
-		int len = 0;
 		uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
 
-		if ((c & 0xfffffc00) == 0xd800) {
-			len = 2;
+		if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+			if (skip) {
+				*(dst++) = c_prev; // unpaired, store as is
+			}
+			skip = true;
+		} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+			if (skip) {
+				*(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
+			} else {
+				*(dst++) = c; // unpaired, store as is
+			}
+			skip = false;
 		} else {
-			len = 1;
-		}
-
-		if (len > cstr_size) {
-			_UNICERROR("no space left");
-			return true; //not enough space
+			*(dst++) = c;
+			skip = false;
 		}
 
-		uint32_t unichar = 0;
-		if (len == 1) {
-			unichar = c;
-		} else {
-			uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
-			unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
-		}
+		cstr_size--;
+		p_utf16++;
+		c_prev = c;
+	}
 
-		*(dst++) = unichar;
-		cstr_size -= len;
-		p_utf16 += len;
+	if (skip) {
+		*(dst++) = c_prev;
 	}
 
-	return false;
-#undef _UNICERROR
+	if (decode_error) {
+		return ERR_PARSE_ERROR;
+	} else {
+		return OK;
+	}
 }
 
 Char16String String::utf16() const {
@@ -2054,15 +2044,14 @@ Char16String String::utf16() const {
 		uint32_t c = d[i];
 		if (c <= 0xffff) { // 16 bits.
 			fl += 1;
+			if ((c & 0xfffff800) == 0xd800) {
+				print_unicode_error(vformat("Unpaired surrogate (%x)", c));
+			}
 		} else if (c <= 0x10ffff) { // 32 bits.
 			fl += 2;
 		} else {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return Char16String();
-		}
-		if (c >= 0xd800 && c <= 0xdfff) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return Char16String();
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
+			fl += 1;
 		}
 	}
 
@@ -2081,9 +2070,11 @@ Char16String String::utf16() const {
 
 		if (c <= 0xffff) { // 16 bits.
 			APPEND_CHAR(c);
-		} else { // 32 bits.
+		} else if (c <= 0x10ffff) { // 32 bits.
 			APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
 			APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+		} else {
+			APPEND_CHAR(0x20);
 		}
 	}
 #undef APPEND_CHAR
@@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const {
 		}
 		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
 		bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
-		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 		hex *= 16;
 		hex += n;
 		s++;
@@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const {
 		}
 		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
 		bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
-		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 		binary *= 2;
 		binary += n;
 		s++;
@@ -2217,7 +2208,7 @@ int64_t String::to_int() const {
 		char32_t c = operator[](i);
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) {
 		char c = p_str[i];
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) {
 		wchar_t c = p_str[i];
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2308,7 +2299,7 @@ bool String::is_numeric() const {
 				return false;
 			}
 			dot = true;
-		} else if (c < '0' || c > '9') {
+		} else if (!is_digit(c)) {
 			return false;
 		}
 	}
@@ -2317,28 +2308,33 @@ bool String::is_numeric() const {
 }
 
 template <class C>
-static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number,
-				 * optionally preceded by white space. Must
-				 * have form "-I.FE-X", where I is the integer
-				 * part of the mantissa, F is the fractional
-				 * part of the mantissa, and X is the
-				 * exponent. Either of the signs may be "+",
-				 * "-", or omitted. Either I or F may be
-				 * omitted, or both. The decimal point isn't
-				 * necessary unless F is present. The "E" may
-				 * actually be an "e". E and X may both be
-				 * omitted (but not just one). */
-		C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's
-				 * address here. */
-{
-	static const int maxExponent = 511; /* Largest possible base 10 exponent.  Any
-					 * exponent larger than this will already
-					 * produce underflow or overflow, so there's
-					 * no need to worry about additional digits.
-					 */
-	static const double powersOf10[] = { /* Table giving binary powers of 10.  Entry */
-		10., /* is 10^2^i.  Used to convert decimal */
-		100., /* exponents into floating-point numbers. */
+static double built_in_strtod(
+		/* A decimal ASCII floating-point number,
+		 * optionally preceded by white space. Must
+		 * have form "-I.FE-X", where I is the integer
+		 * part of the mantissa, F is the fractional
+		 * part of the mantissa, and X is the
+		 * exponent. Either of the signs may be "+",
+		 * "-", or omitted. Either I or F may be
+		 * omitted, or both. The decimal point isn't
+		 * necessary unless F is present. The "E" may
+		 * actually be an "e". E and X may both be
+		 * omitted (but not just one). */
+		const C *string,
+		/* If non-nullptr, store terminating Cacter's
+		 * address here. */
+		C **endPtr = nullptr) {
+	/* Largest possible base 10 exponent. Any
+	 * exponent larger than this will already
+	 * produce underflow or overflow, so there's
+	 * no need to worry about additional digits. */
+	static const int maxExponent = 511;
+	/* Table giving binary powers of 10. Entry
+	 * is 10^2^i. Used to convert decimal
+	 * exponents into floating-point numbers. */
+	static const double powersOf10[] = {
+		10.,
+		100.,
 		1.0e4,
 		1.0e8,
 		1.0e16,
@@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	const double *d;
 	const C *p;
 	int c;
-	int exp = 0; /* Exponent read from "EX" field. */
-	int fracExp = 0; /* Exponent that derives from the fractional
-				 * part. Under normal circumstances, it is
-				 * the negative of the number of digits in F.
-				 * However, if I is very long, the last digits
-				 * of I get dropped (otherwise a long I with a
-				 * large negative exponent could cause an
-				 * unnecessary overflow on I alone). In this
-				 * case, fracExp is incremented one for each
-				 * dropped digit. */
-	int mantSize; /* Number of digits in mantissa. */
-	int decPt; /* Number of mantissa digits BEFORE decimal
-				 * point. */
-	const C *pExp; /* Temporarily holds location of exponent in
-				 * string. */
+	/* Exponent read from "EX" field. */
+	int exp = 0;
+	/* Exponent that derives from the fractional
+	 * part. Under normal circumstances, it is
+	 * the negative of the number of digits in F.
+	 * However, if I is very long, the last digits
+	 * of I get dropped (otherwise a long I with a
+	 * large negative exponent could cause an
+	 * unnecessary overflow on I alone). In this
+	 * case, fracExp is incremented one for each
+	 * dropped digit. */
+	int fracExp = 0;
+	/* Number of digits in mantissa. */
+	int mantSize;
+	/* Number of mantissa digits BEFORE decimal point. */
+	int decPt;
+	/* Temporarily holds location of exponent in string. */
+	const C *pExp;
 
 	/*
-     * Strip off leading blanks and check for a sign.
-     */
+	 * Strip off leading blanks and check for a sign.
+	 */
 
 	p = string;
 	while (*p == ' ' || *p == '\t' || *p == '\n') {
@@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Count the number of digits in the mantissa (including the decimal
-     * point), and also locate the decimal point.
-     */
+	 * Count the number of digits in the mantissa (including the decimal
+	 * point), and also locate the decimal point.
+	 */
 
 	decPt = -1;
 	for (mantSize = 0;; mantSize += 1) {
@@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Now suck up the digits in the mantissa. Use two integers to collect 9
-     * digits each (this is faster than using floating-point). If the mantissa
-     * has more than 18 digits, ignore the extras, since they can't affect the
-     * value anyway.
-     */
+	 * Now suck up the digits in the mantissa. Use two integers to collect 9
+	 * digits each (this is faster than using floating-point). If the mantissa
+	 * has more than 18 digits, ignore the extras, since they can't affect the
+	 * value anyway.
+	 */
 
 	pExp = p;
 	p -= mantSize;
@@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Skim off the exponent.
-     */
+	 * Skim off the exponent.
+	 */
 
 	p = pExp;
 	if ((*p == 'E') || (*p == 'e')) {
@@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Generate a floating-point number that represents the exponent. Do this
-     * by processing the exponent one bit at a time to combine many powers of
-     * 2 of 10. Then combine the exponent with the fraction.
-     */
+	 * Generate a floating-point number that represents the exponent. Do this
+	 * by processing the exponent one bit at a time to combine many powers of
+	 * 2 of 10. Then combine the exponent with the fraction.
+	 */
 
 	if (exp < 0) {
 		expSign = true;
@@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
 								return INT64_MIN;
 							}
 						} else {
-							ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+							ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 						}
 					}
 					integer *= 10;
@@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const {
 	return _base_is_subsequence_of(p_string, false);
 }
 
-bool String::is_subsequence_ofi(const String &p_string) const {
+bool String::is_subsequence_ofn(const String &p_string) const {
 	return _base_is_subsequence_of(p_string, true);
 }
 
@@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const {
 String String::repeat(int p_count) const {
 	ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
 
-	String new_string;
-	const char32_t *src = this->get_data();
-
-	new_string.resize(length() * p_count + 1);
-	new_string[length() * p_count] = 0;
-
-	for (int i = 0; i < p_count; i++) {
-		for (int j = 0; j < length(); j++) {
-			new_string[i * length() + j] = src[j];
-		}
-	}
-
+	int len = length();
+	String new_string = *this;
+	new_string.resize(p_count * len + 1);
+
+	char32_t *dst = new_string.ptrw();
+	int offset = 1;
+	int stride = 1;
+	while (offset < p_count) {
+		memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
+		offset += stride;
+		stride = MIN(stride * 2, p_count - offset);
+	}
+	dst[p_count * len] = _null;
 	return new_string;
 }
 
-String String::left(int p_pos) const {
-	if (p_pos < 0) {
-		p_pos = length() + p_pos;
+String String::left(int p_len) const {
+	if (p_len < 0) {
+		p_len = length() + p_len;
 	}
 
-	if (p_pos <= 0) {
+	if (p_len <= 0) {
 		return "";
 	}
 
-	if (p_pos >= length()) {
+	if (p_len >= length()) {
 		return *this;
 	}
 
-	return substr(0, p_pos);
+	return substr(0, p_len);
 }
 
-String String::right(int p_pos) const {
-	if (p_pos < 0) {
-		p_pos = length() + p_pos;
+String String::right(int p_len) const {
+	if (p_len < 0) {
+		p_len = length() + p_len;
 	}
 
-	if (p_pos <= 0) {
+	if (p_len <= 0) {
 		return "";
 	}
 
-	if (p_pos >= length()) {
+	if (p_len >= length()) {
 		return *this;
 	}
 
-	return substr(length() - p_pos);
+	return substr(length() - p_len);
 }
 
 char32_t String::unicode_at(int p_idx) const {
@@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const {
 	return operator[](p_idx);
 }
 
+String String::indent(const String &p_prefix) const {
+	String new_string;
+	int line_start = 0;
+
+	for (int i = 0; i < length(); i++) {
+		const char32_t c = operator[](i);
+		if (c == '\n') {
+			if (i == line_start) {
+				new_string += c; // Leave empty lines empty.
+			} else {
+				new_string += p_prefix + substr(line_start, i - line_start + 1);
+			}
+			line_start = i + 1;
+		}
+	}
+	if (line_start != length()) {
+		new_string += p_prefix + substr(line_start);
+	}
+	return new_string;
+}
+
 String String::dedent() const {
 	String new_string;
 	String indent;
@@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const {
 	return substr(0, end + 1);
 }
 
+bool String::is_network_share_path() const {
+	return begins_with("//") || begins_with("\\\\");
+}
+
 String String::simplify_path() const {
 	String s = *this;
 	String drive;
@@ -3637,6 +3662,9 @@ String String::simplify_path() const {
 	} else if (s.begins_with("user://")) {
 		drive = "user://";
 		s = s.substr(7, s.length());
+	} else if (is_network_share_path()) {
+		drive = s.substr(0, 2);
+		s = s.substr(2, s.length() - 2);
 	} else if (s.begins_with("/") || s.begins_with("\\")) {
 		drive = s.substr(0, 1);
 		s = s.substr(1, s.length() - 1);
@@ -3665,15 +3693,15 @@ String String::simplify_path() const {
 	for (int i = 0; i < dirs.size(); i++) {
 		String d = dirs[i];
 		if (d == ".") {
-			dirs.remove(i);
+			dirs.remove_at(i);
 			i--;
 		} else if (d == "..") {
 			if (i == 0) {
-				dirs.remove(i);
+				dirs.remove_at(i);
 				i--;
 			} else {
-				dirs.remove(i);
-				dirs.remove(i - 1);
+				dirs.remove_at(i);
+				dirs.remove_at(i - 1);
 				i -= 2;
 			}
 		}
@@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const {
 	}
 }
 
+static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
+	if (p_index == 0 && is_digit(p_char)) {
+		return false; // No start with number plz.
+	}
+	return is_ascii_identifier_char(p_char);
+}
+
+String String::validate_identifier() const {
+	if (is_empty()) {
+		return "_"; // Empty string is not a valid identifier;
+	}
+
+	String result = *this;
+	int len = result.length();
+	char32_t *buffer = result.ptrw();
+
+	for (int i = 0; i < len; i++) {
+		if (!_is_valid_identifier_bit(i, buffer[i])) {
+			buffer[i] = '_';
+		}
+	}
+
+	return result;
+}
+
 bool String::is_valid_identifier() const {
 	int len = length();
 
@@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const {
 	const char32_t *str = &operator[](0);
 
 	for (int i = 0; i < len; i++) {
-		if (i == 0) {
-			if (is_digit(str[0])) {
-				return false; // no start with number plz
-			}
-		}
-
-		bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
-
-		if (!valid_char) {
+		if (!_is_valid_identifier_bit(i, str[i])) {
 			return false;
 		}
 	}
@@ -3775,18 +3820,15 @@ String String::uri_encode() const {
 	const CharString temp = utf8();
 	String res;
 	for (int i = 0; i < temp.length(); ++i) {
-		char ord = temp[i];
-		if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+		uint8_t ord = temp[i];
+		if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
 			res += ord;
 		} else {
-			char h_Val[3];
-#if defined(__GNUC__) || defined(_MSC_VER)
-			snprintf(h_Val, 3, "%02hhX", ord);
-#else
-			sprintf(h_Val, "%02hhX", ord);
-#endif
-			res += "%";
-			res += h_Val;
+			char p[4] = { '%', 0, 0, 0 };
+			static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+			p[1] = hex[ord >> 4];
+			p[2] = hex[ord & 0xF];
+			res += p;
 		}
 	}
 	return res;
@@ -3798,9 +3840,9 @@ String String::uri_decode() const {
 	for (int i = 0; i < src.length(); ++i) {
 		if (src[i] == '%' && i + 2 < src.length()) {
 			char ord1 = src[i + 1];
-			if (is_digit(ord1) || is_upper_case(ord1)) {
+			if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
 				char ord2 = src[i + 2];
-				if (is_digit(ord2) || is_upper_case(ord2)) {
+				if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
 					char bytes[3] = { (char)ord1, (char)ord2, 0 };
 					res += (char)strtol(bytes, nullptr, 16);
 					i += 2;
@@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
 					for (int i = 2; i < p_src_len; i++) {
 						eat = i + 1;
 						char32_t ct = p_src[i];
-						if (ct == ';' || ct < '0' || ct > '9') {
+						if (ct == ';' || !is_digit(ct)) {
 							break;
 						}
 					}
@@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const {
 
 	int begin = 0;
 
-	while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+	while (begin < end && !is_digit(s[begin])) {
 		begin++;
 	}
 
@@ -4102,7 +4144,7 @@ bool String::is_valid_int() const {
 	}
 
 	for (int i = from; i < len; i++) {
-		if (operator[](i) < '0' || operator[](i) > '9') {
+		if (!is_digit(operator[](i))) {
 			return false; // no start with number plz
 		}
 	}
@@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const {
 		dst += "/";
 	}
 
-	String base;
-
 	if (src.begins_with("res://") && dst.begins_with("res://")) {
-		base = "res:/";
 		src = src.replace("res://", "/");
 		dst = dst.replace("res://", "/");
 
 	} else if (src.begins_with("user://") && dst.begins_with("user://")) {
-		base = "user:/";
 		src = src.replace("user://", "/");
 		dst = dst.replace("user://", "/");
 
@@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const {
 			return p_path; //impossible to do this
 		}
 
-		base = src_begin;
 		src = src.substr(src_begin.length(), src.length());
 		dst = dst.substr(dst_begin.length(), dst.length());
 	}
@@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const {
 		return false;
 	}
 
-	if (stripped == String()) {
+	if (stripped.is_empty()) {
 		return false;
 	}
 
@@ -4338,13 +4375,13 @@ bool String::is_relative_path() const {
 String String::get_base_dir() const {
 	int end = 0;
 
-	// url scheme style base
+	// URL scheme style base.
 	int basepos = find("://");
 	if (basepos != -1) {
 		end = basepos + 3;
 	}
 
-	// windows top level directory base
+	// Windows top level directory base.
 	if (end == 0) {
 		basepos = find(":/");
 		if (basepos == -1) {
@@ -4355,7 +4392,24 @@ String String::get_base_dir() const {
 		}
 	}
 
-	// unix root directory base
+	// Windows UNC network share path.
+	if (end == 0) {
+		if (is_network_share_path()) {
+			basepos = find("/", 2);
+			if (basepos == -1) {
+				basepos = find("\\", 2);
+			}
+			int servpos = find("/", basepos + 1);
+			if (servpos == -1) {
+				servpos = find("\\", basepos + 1);
+			}
+			if (servpos != -1) {
+				end = servpos + 1;
+			}
+		}
+	}
+
+	// Unix root directory base.
 	if (end == 0) {
 		if (begins_with("/")) {
 			end = 1;
@@ -4412,7 +4466,7 @@ String String::property_name_encode() const {
 	// as well as '"', '=' or ' ' (32)
 	const char32_t *cstr = get_data();
 	for (int i = 0; cstr[i]; i++) {
-		if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) {
+		if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
 			return "\"" + c_escape_multiline() + "\"";
 		}
 	}
@@ -4421,7 +4475,7 @@ String String::property_name_encode() const {
 }
 
 // Changes made to the set of invalid characters must also be reflected in the String documentation.
-const String String::invalid_node_name_characters = ". : @ / \"";
+const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX;
 
 String String::validate_node_name() const {
 	Vector<String> chars = String::invalid_node_name_characters.split(" ");
@@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const {
 	int min_chars = 0;
 	int min_decimals = 0;
 	bool in_decimals = false;
-	bool pad_with_zeroes = false;
+	bool pad_with_zeros = false;
 	bool left_justified = false;
 	bool show_sign = false;
 
@@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const {
 
 					// Padding.
 					int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars;
-					String pad_char = pad_with_zeroes ? String("0") : String(" ");
+					String pad_char = pad_with_zeros ? String("0") : String(" ");
 					if (left_justified) {
 						str = str.rpad(pad_chars_count, pad_char);
 					} else {
@@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const {
 					}
 
 					// Sign.
-					if (show_sign && value >= 0) {
-						str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+");
-					} else if (value < 0) {
-						str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-");
+					if (show_sign || value < 0) {
+						String sign_char = value < 0 ? "-" : "+";
+						if (left_justified) {
+							str = str.insert(0, sign_char);
+						} else {
+							str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
+						}
 					}
 
 					formatted += str;
@@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const {
 
 					// Padding. Leave room for sign later if required.
 					int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
-					String pad_char = pad_with_zeroes ? String("0") : String(" ");
+					String pad_char = pad_with_zeros ? String("0") : String(" ");
 					if (left_justified) {
-						if (pad_with_zeroes) {
-							return "left justification cannot be used with zeros as the padding";
-						} else {
-							str = str.rpad(pad_chars_count, pad_char);
-						}
+						str = str.rpad(pad_chars_count, pad_char);
 					} else {
 						str = str.lpad(pad_chars_count, pad_char);
 					}
@@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const {
 						if (left_justified) {
 							str = str.insert(0, sign_char);
 						} else {
-							str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char);
+							str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
 						}
 					}
 
@@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const {
 						min_decimals += n;
 					} else {
 						if (c == '0' && min_chars == 0) {
-							pad_with_zeroes = true;
+							if (left_justified) {
+								WARN_PRINT("'0' flag ignored with '-' flag in string format");
+							} else {
+								pad_with_zeros = true;
+							}
 						} else {
 							min_chars *= 10;
 							min_chars += n;
@@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const {
 					// Back to defaults:
 					min_chars = 0;
 					min_decimals = 6;
-					pad_with_zeroes = false;
+					pad_with_zeros = false;
 					left_justified = false;
 					show_sign = false;
 					in_decimals = false;
@@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const {
 }
 
 #ifdef TOOLS_ENABLED
+/**
+ * "Tools TRanslate". Performs string replacement for internationalization
+ * within the editor. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `TTRN()` instead of `TTR()`.
+ *
+ * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTR()` instead.
+ */
 String TTR(const String &p_text, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		return TranslationServer::get_singleton()->tool_translate(p_text, p_context);
@@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) {
 	return p_text;
 }
 
+/**
+ * "Tools TRanslate for N items". Performs string replacement for
+ * internationalization within the editor. A translation context can optionally
+ * be specified to disambiguate between identical source strings in
+ * translations. Use `TTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTRN()` instead.
+ */
 String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
@@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St
 	return p_text_plural;
 }
 
+/**
+ * "Docs TRanslate". Used for the editor class reference documentation,
+ * handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
 String DTR(const String &p_text, const String &p_context) {
 	// Comes straight from the XML, so remove indentation and any trailing whitespace.
 	const String text = p_text.dedent().strip_edges();
 
 	if (TranslationServer::get_singleton()) {
-		return TranslationServer::get_singleton()->doc_translate(text, p_context);
+		return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
 
-	return text;
+	return text.replace("$DOCS_URL", VERSION_DOCS_URL);
 }
 
+/**
+ * "Docs TRanslate for N items". Used for the editor class reference documentation
+ * (with support for plurals), handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
 String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	const String text = p_text.dedent().strip_edges();
 	const String text_plural = p_text_plural.dedent().strip_edges();
 
 	if (TranslationServer::get_singleton()) {
-		return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context);
+		return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
 
 	// Return message based on English plural rule if translation is not possible.
 	if (p_n == 1) {
-		return text;
+		return text.replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
-	return text_plural;
+	return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL);
 }
 #endif
 
+/**
+ * "Run-time TRanslate". Performs string replacement for internationalization
+ * within a running project. The translation string must be supplied by the
+ * project, as Godot does not provide built-in translations for `RTR()` strings
+ * to keep binary size low. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `RTRN()` instead of `RTR()`.
+ *
+ * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTR()` instead.
+ */
 String RTR(const String &p_text, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context);
-		if (rtr == String() || rtr == p_text) {
+		if (rtr.is_empty() || rtr == p_text) {
 			return TranslationServer::get_singleton()->translate(p_text, p_context);
 		} else {
 			return rtr;
@@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) {
 	return p_text;
 }
 
+/**
+ * "Run-time TRanslate for N items". Performs string replacement for
+ * internationalization within a running project. The translation string must be
+ * supplied by the project, as Godot does not provide built-in translations for
+ * `RTRN()` strings to keep binary size low. A translation context can
+ * optionally be specified to disambiguate between identical source strings in
+ * translations. Use `RTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTRN()` instead.
+ */
 String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
-		if (rtr == String() || rtr == p_text || rtr == p_text_plural) {
+		if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) {
 			return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
 		} else {
 			return rtr;