21 files changed, 3994 insertions, 1552 deletions
diff --git a/core/string/char_range.inc b/core/string/char_range.inc
new file mode 100644
index 0000000000..c0be9016ad
--- /dev/null
+++ b/core/string/char_range.inc
@@ -0,0 +1,1456 @@
+/*************************************************************************/
+/*  char_range.inc                                                       */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef CHAR_RANGE_INC
+#define CHAR_RANGE_INC
+
+#include "core/typedefs.h"
+
+struct CharRange {
+	char32_t start;
+	char32_t end;
+};
+
+static CharRange xid_start[] = {
+	{ 0x41, 0x5a },
+	{ 0x5f, 0x5f },
+	{ 0x61, 0x7a },
+	{ 0xaa, 0xaa },
+	{ 0xb5, 0xb5 },
+	{ 0xba, 0xba },
+	{ 0xc0, 0xd6 },
+	{ 0xd8, 0xf6 },
+	{ 0xf8, 0x2c1 },
+	{ 0x2c6, 0x2d1 },
+	{ 0x2e0, 0x2e4 },
+	{ 0x2ec, 0x2ec },
+	{ 0x2ee, 0x2ee },
+	{ 0x370, 0x374 },
+	{ 0x376, 0x377 },
+	{ 0x37a, 0x37d },
+	{ 0x37f, 0x37f },
+	{ 0x386, 0x386 },
+	{ 0x388, 0x38a },
+	{ 0x38c, 0x38c },
+	{ 0x38e, 0x3a1 },
+	{ 0x3a3, 0x3f5 },
+	{ 0x3f7, 0x481 },
+	{ 0x48a, 0x52f },
+	{ 0x531, 0x556 },
+	{ 0x559, 0x559 },
+	{ 0x560, 0x588 },
+	{ 0x5d0, 0x5ea },
+	{ 0x5ef, 0x5f2 },
+	{ 0x620, 0x64a },
+	{ 0x66e, 0x66f },
+	{ 0x671, 0x6d3 },
+	{ 0x6d5, 0x6d5 },
+	{ 0x6e5, 0x6e6 },
+	{ 0x6ee, 0x6ef },
+	{ 0x6fa, 0x6fc },
+	{ 0x6ff, 0x6ff },
+	{ 0x710, 0x710 },
+	{ 0x712, 0x72f },
+	{ 0x74d, 0x7a5 },
+	{ 0x7b1, 0x7b1 },
+	{ 0x7ca, 0x7ea },
+	{ 0x7f4, 0x7f5 },
+	{ 0x7fa, 0x7fa },
+	{ 0x800, 0x815 },
+	{ 0x81a, 0x81a },
+	{ 0x824, 0x824 },
+	{ 0x828, 0x828 },
+	{ 0x840, 0x858 },
+	{ 0x860, 0x86a },
+	{ 0x870, 0x887 },
+	{ 0x889, 0x88e },
+	{ 0x8a0, 0x8c9 },
+	{ 0x904, 0x939 },
+	{ 0x93d, 0x93d },
+	{ 0x950, 0x950 },
+	{ 0x958, 0x961 },
+	{ 0x971, 0x980 },
+	{ 0x985, 0x98c },
+	{ 0x98f, 0x990 },
+	{ 0x993, 0x9a8 },
+	{ 0x9aa, 0x9b0 },
+	{ 0x9b2, 0x9b2 },
+	{ 0x9b6, 0x9b9 },
+	{ 0x9bd, 0x9bd },
+	{ 0x9ce, 0x9ce },
+	{ 0x9dc, 0x9dd },
+	{ 0x9df, 0x9e1 },
+	{ 0x9f0, 0x9f1 },
+	{ 0x9fc, 0x9fc },
+	{ 0xa05, 0xa0a },
+	{ 0xa0f, 0xa10 },
+	{ 0xa13, 0xa28 },
+	{ 0xa2a, 0xa30 },
+	{ 0xa32, 0xa33 },
+	{ 0xa35, 0xa36 },
+	{ 0xa38, 0xa39 },
+	{ 0xa59, 0xa5c },
+	{ 0xa5e, 0xa5e },
+	{ 0xa72, 0xa74 },
+	{ 0xa85, 0xa8d },
+	{ 0xa8f, 0xa91 },
+	{ 0xa93, 0xaa8 },
+	{ 0xaaa, 0xab0 },
+	{ 0xab2, 0xab3 },
+	{ 0xab5, 0xab9 },
+	{ 0xabd, 0xabd },
+	{ 0xad0, 0xad0 },
+	{ 0xae0, 0xae1 },
+	{ 0xaf9, 0xaf9 },
+	{ 0xb05, 0xb0c },
+	{ 0xb0f, 0xb10 },
+	{ 0xb13, 0xb28 },
+	{ 0xb2a, 0xb30 },
+	{ 0xb32, 0xb33 },
+	{ 0xb35, 0xb39 },
+	{ 0xb3d, 0xb3d },
+	{ 0xb5c, 0xb5d },
+	{ 0xb5f, 0xb61 },
+	{ 0xb71, 0xb71 },
+	{ 0xb83, 0xb83 },
+	{ 0xb85, 0xb8a },
+	{ 0xb8e, 0xb90 },
+	{ 0xb92, 0xb95 },
+	{ 0xb99, 0xb9a },
+	{ 0xb9c, 0xb9c },
+	{ 0xb9e, 0xb9f },
+	{ 0xba3, 0xba4 },
+	{ 0xba8, 0xbaa },
+	{ 0xbae, 0xbb9 },
+	{ 0xbd0, 0xbd0 },
+	{ 0xc05, 0xc0c },
+	{ 0xc0e, 0xc10 },
+	{ 0xc12, 0xc28 },
+	{ 0xc2a, 0xc39 },
+	{ 0xc3d, 0xc3d },
+	{ 0xc58, 0xc5a },
+	{ 0xc5d, 0xc5d },
+	{ 0xc60, 0xc61 },
+	{ 0xc80, 0xc80 },
+	{ 0xc85, 0xc8c },
+	{ 0xc8e, 0xc90 },
+	{ 0xc92, 0xca8 },
+	{ 0xcaa, 0xcb3 },
+	{ 0xcb5, 0xcb9 },
+	{ 0xcbd, 0xcbd },
+	{ 0xcdd, 0xcde },
+	{ 0xce0, 0xce1 },
+	{ 0xcf1, 0xcf2 },
+	{ 0xd04, 0xd0c },
+	{ 0xd0e, 0xd10 },
+	{ 0xd12, 0xd3a },
+	{ 0xd3d, 0xd3d },
+	{ 0xd4e, 0xd4e },
+	{ 0xd54, 0xd56 },
+	{ 0xd5f, 0xd61 },
+	{ 0xd7a, 0xd7f },
+	{ 0xd85, 0xd96 },
+	{ 0xd9a, 0xdb1 },
+	{ 0xdb3, 0xdbb },
+	{ 0xdbd, 0xdbd },
+	{ 0xdc0, 0xdc6 },
+	{ 0xe01, 0xe30 },
+	{ 0xe32, 0xe33 },
+	{ 0xe40, 0xe46 },
+	{ 0xe81, 0xe82 },
+	{ 0xe84, 0xe84 },
+	{ 0xe86, 0xe8a },
+	{ 0xe8c, 0xea3 },
+	{ 0xea5, 0xea5 },
+	{ 0xea7, 0xeb0 },
+	{ 0xeb2, 0xeb3 },
+	{ 0xebd, 0xebd },
+	{ 0xec0, 0xec4 },
+	{ 0xec6, 0xec6 },
+	{ 0xedc, 0xedf },
+	{ 0xf00, 0xf00 },
+	{ 0xf40, 0xf47 },
+	{ 0xf49, 0xf6c },
+	{ 0xf88, 0xf8c },
+	{ 0x1000, 0x102a },
+	{ 0x103f, 0x103f },
+	{ 0x1050, 0x1055 },
+	{ 0x105a, 0x105d },
+	{ 0x1061, 0x1061 },
+	{ 0x1065, 0x1066 },
+	{ 0x106e, 0x1070 },
+	{ 0x1075, 0x1081 },
+	{ 0x108e, 0x108e },
+	{ 0x10a0, 0x10c5 },
+	{ 0x10c7, 0x10c7 },
+	{ 0x10cd, 0x10cd },
+	{ 0x10d0, 0x10fa },
+	{ 0x10fc, 0x1248 },
+	{ 0x124a, 0x124d },
+	{ 0x1250, 0x1256 },
+	{ 0x1258, 0x1258 },
+	{ 0x125a, 0x125d },
+	{ 0x1260, 0x1288 },
+	{ 0x128a, 0x128d },
+	{ 0x1290, 0x12b0 },
+	{ 0x12b2, 0x12b5 },
+	{ 0x12b8, 0x12be },
+	{ 0x12c0, 0x12c0 },
+	{ 0x12c2, 0x12c5 },
+	{ 0x12c8, 0x12d6 },
+	{ 0x12d8, 0x1310 },
+	{ 0x1312, 0x1315 },
+	{ 0x1318, 0x135a },
+	{ 0x1380, 0x138f },
+	{ 0x13a0, 0x13f5 },
+	{ 0x13f8, 0x13fd },
+	{ 0x1401, 0x166c },
+	{ 0x166f, 0x167f },
+	{ 0x1681, 0x169a },
+	{ 0x16a0, 0x16ea },
+	{ 0x16ee, 0x16f8 },
+	{ 0x1700, 0x1711 },
+	{ 0x171f, 0x1731 },
+	{ 0x1740, 0x1751 },
+	{ 0x1760, 0x176c },
+	{ 0x176e, 0x1770 },
+	{ 0x1780, 0x17b3 },
+	{ 0x17d7, 0x17d7 },
+	{ 0x17dc, 0x17dc },
+	{ 0x1820, 0x1878 },
+	{ 0x1880, 0x1884 },
+	{ 0x1887, 0x18a8 },
+	{ 0x18aa, 0x18aa },
+	{ 0x18b0, 0x18f5 },
+	{ 0x1900, 0x191e },
+	{ 0x1950, 0x196d },
+	{ 0x1970, 0x1974 },
+	{ 0x1980, 0x19ab },
+	{ 0x19b0, 0x19c9 },
+	{ 0x1a00, 0x1a16 },
+	{ 0x1a20, 0x1a54 },
+	{ 0x1aa7, 0x1aa7 },
+	{ 0x1b05, 0x1b33 },
+	{ 0x1b45, 0x1b4c },
+	{ 0x1b83, 0x1ba0 },
+	{ 0x1bae, 0x1baf },
+	{ 0x1bba, 0x1be5 },
+	{ 0x1c00, 0x1c23 },
+	{ 0x1c4d, 0x1c4f },
+	{ 0x1c5a, 0x1c7d },
+	{ 0x1c80, 0x1c88 },
+	{ 0x1c90, 0x1cba },
+	{ 0x1cbd, 0x1cbf },
+	{ 0x1ce9, 0x1cec },
+	{ 0x1cee, 0x1cf3 },
+	{ 0x1cf5, 0x1cf6 },
+	{ 0x1cfa, 0x1cfa },
+	{ 0x1d00, 0x1dbf },
+	{ 0x1e00, 0x1f15 },
+	{ 0x1f18, 0x1f1d },
+	{ 0x1f20, 0x1f45 },
+	{ 0x1f48, 0x1f4d },
+	{ 0x1f50, 0x1f57 },
+	{ 0x1f59, 0x1f59 },
+	{ 0x1f5b, 0x1f5b },
+	{ 0x1f5d, 0x1f5d },
+	{ 0x1f5f, 0x1f7d },
+	{ 0x1f80, 0x1fb4 },
+	{ 0x1fb6, 0x1fbc },
+	{ 0x1fbe, 0x1fbe },
+	{ 0x1fc2, 0x1fc4 },
+	{ 0x1fc6, 0x1fcc },
+	{ 0x1fd0, 0x1fd3 },
+	{ 0x1fd6, 0x1fdb },
+	{ 0x1fe0, 0x1fec },
+	{ 0x1ff2, 0x1ff4 },
+	{ 0x1ff6, 0x1ffc },
+	{ 0x2071, 0x2071 },
+	{ 0x207f, 0x207f },
+	{ 0x2090, 0x209c },
+	{ 0x2102, 0x2102 },
+	{ 0x2107, 0x2107 },
+	{ 0x210a, 0x2113 },
+	{ 0x2115, 0x2115 },
+	{ 0x2118, 0x211d },
+	{ 0x2124, 0x2124 },
+	{ 0x2126, 0x2126 },
+	{ 0x2128, 0x2128 },
+	{ 0x212a, 0x2139 },
+	{ 0x213c, 0x213f },
+	{ 0x2145, 0x2149 },
+	{ 0x214e, 0x214e },
+	{ 0x2160, 0x2188 },
+	{ 0x2c00, 0x2ce4 },
+	{ 0x2ceb, 0x2cee },
+	{ 0x2cf2, 0x2cf3 },
+	{ 0x2d00, 0x2d25 },
+	{ 0x2d27, 0x2d27 },
+	{ 0x2d2d, 0x2d2d },
+	{ 0x2d30, 0x2d67 },
+	{ 0x2d6f, 0x2d6f },
+	{ 0x2d80, 0x2d96 },
+	{ 0x2da0, 0x2da6 },
+	{ 0x2da8, 0x2dae },
+	{ 0x2db0, 0x2db6 },
+	{ 0x2db8, 0x2dbe },
+	{ 0x2dc0, 0x2dc6 },
+	{ 0x2dc8, 0x2dce },
+	{ 0x2dd0, 0x2dd6 },
+	{ 0x2dd8, 0x2dde },
+	{ 0x3005, 0x3007 },
+	{ 0x3021, 0x3029 },
+	{ 0x3031, 0x3035 },
+	{ 0x3038, 0x303c },
+	{ 0x3041, 0x3096 },
+	{ 0x309b, 0x309f },
+	{ 0x30a1, 0x30fa },
+	{ 0x30fc, 0x30ff },
+	{ 0x3105, 0x312f },
+	{ 0x3131, 0x318e },
+	{ 0x31a0, 0x31bf },
+	{ 0x31f0, 0x31ff },
+	{ 0x3400, 0x4dbf },
+	{ 0x4e00, 0xa48c },
+	{ 0xa4d0, 0xa4fd },
+	{ 0xa500, 0xa60c },
+	{ 0xa610, 0xa61f },
+	{ 0xa62a, 0xa62b },
+	{ 0xa640, 0xa66e },
+	{ 0xa67f, 0xa69d },
+	{ 0xa6a0, 0xa6ef },
+	{ 0xa717, 0xa71f },
+	{ 0xa722, 0xa788 },
+	{ 0xa78b, 0xa7ca },
+	{ 0xa7d0, 0xa7d1 },
+	{ 0xa7d3, 0xa7d3 },
+	{ 0xa7d5, 0xa7d9 },
+	{ 0xa7f2, 0xa801 },
+	{ 0xa803, 0xa805 },
+	{ 0xa807, 0xa80a },
+	{ 0xa80c, 0xa822 },
+	{ 0xa840, 0xa873 },
+	{ 0xa882, 0xa8b3 },
+	{ 0xa8f2, 0xa8f7 },
+	{ 0xa8fb, 0xa8fb },
+	{ 0xa8fd, 0xa8fe },
+	{ 0xa90a, 0xa925 },
+	{ 0xa930, 0xa946 },
+	{ 0xa960, 0xa97c },
+	{ 0xa984, 0xa9b2 },
+	{ 0xa9cf, 0xa9cf },
+	{ 0xa9e0, 0xa9e4 },
+	{ 0xa9e6, 0xa9ef },
+	{ 0xa9fa, 0xa9fe },
+	{ 0xaa00, 0xaa28 },
+	{ 0xaa40, 0xaa42 },
+	{ 0xaa44, 0xaa4b },
+	{ 0xaa60, 0xaa76 },
+	{ 0xaa7a, 0xaa7a },
+	{ 0xaa7e, 0xaaaf },
+	{ 0xaab1, 0xaab1 },
+	{ 0xaab5, 0xaab6 },
+	{ 0xaab9, 0xaabd },
+	{ 0xaac0, 0xaac0 },
+	{ 0xaac2, 0xaac2 },
+	{ 0xaadb, 0xaadd },
+	{ 0xaae0, 0xaaea },
+	{ 0xaaf2, 0xaaf4 },
+	{ 0xab01, 0xab06 },
+	{ 0xab09, 0xab0e },
+	{ 0xab11, 0xab16 },
+	{ 0xab20, 0xab26 },
+	{ 0xab28, 0xab2e },
+	{ 0xab30, 0xab5a },
+	{ 0xab5c, 0xab69 },
+	{ 0xab70, 0xabe2 },
+	{ 0xac00, 0xd7a3 },
+	{ 0xd7b0, 0xd7c6 },
+	{ 0xd7cb, 0xd7fb },
+	{ 0xf900, 0xfa6d },
+	{ 0xfa70, 0xfad9 },
+	{ 0xfb00, 0xfb06 },
+	{ 0xfb13, 0xfb17 },
+	{ 0xfb1d, 0xfb1d },
+	{ 0xfb1f, 0xfb28 },
+	{ 0xfb2a, 0xfb36 },
+	{ 0xfb38, 0xfb3c },
+	{ 0xfb3e, 0xfb3e },
+	{ 0xfb40, 0xfb41 },
+	{ 0xfb43, 0xfb44 },
+	{ 0xfb46, 0xfbb1 },
+	{ 0xfbd3, 0xfd3d },
+	{ 0xfd50, 0xfd8f },
+	{ 0xfd92, 0xfdc7 },
+	{ 0xfdf0, 0xfdfb },
+	{ 0xfe70, 0xfe74 },
+	{ 0xfe76, 0xfefc },
+	{ 0xff21, 0xff3a },
+	{ 0xff41, 0xff5a },
+	{ 0xff66, 0xffbe },
+	{ 0xffc2, 0xffc7 },
+	{ 0xffca, 0xffcf },
+	{ 0xffd2, 0xffd7 },
+	{ 0xffda, 0xffdc },
+	{ 0x10000, 0x1000b },
+	{ 0x1000d, 0x10026 },
+	{ 0x10028, 0x1003a },
+	{ 0x1003c, 0x1003d },
+	{ 0x1003f, 0x1004d },
+	{ 0x10050, 0x1005d },
+	{ 0x10080, 0x100fa },
+	{ 0x10140, 0x10174 },
+	{ 0x10280, 0x1029c },
+	{ 0x102a0, 0x102d0 },
+	{ 0x10300, 0x1031f },
+	{ 0x1032d, 0x1034a },
+	{ 0x10350, 0x10375 },
+	{ 0x10380, 0x1039d },
+	{ 0x103a0, 0x103c3 },
+	{ 0x103c8, 0x103cf },
+	{ 0x103d1, 0x103d5 },
+	{ 0x10400, 0x1049d },
+	{ 0x104b0, 0x104d3 },
+	{ 0x104d8, 0x104fb },
+	{ 0x10500, 0x10527 },
+	{ 0x10530, 0x10563 },
+	{ 0x10570, 0x1057a },
+	{ 0x1057c, 0x1058a },
+	{ 0x1058c, 0x10592 },
+	{ 0x10594, 0x10595 },
+	{ 0x10597, 0x105a1 },
+	{ 0x105a3, 0x105b1 },
+	{ 0x105b3, 0x105b9 },
+	{ 0x105bb, 0x105bc },
+	{ 0x10600, 0x10736 },
+	{ 0x10740, 0x10755 },
+	{ 0x10760, 0x10767 },
+	{ 0x10780, 0x10785 },
+	{ 0x10787, 0x107b0 },
+	{ 0x107b2, 0x107ba },
+	{ 0x10800, 0x10805 },
+	{ 0x10808, 0x10808 },
+	{ 0x1080a, 0x10835 },
+	{ 0x10837, 0x10838 },
+	{ 0x1083c, 0x1083c },
+	{ 0x1083f, 0x10855 },
+	{ 0x10860, 0x10876 },
+	{ 0x10880, 0x1089e },
+	{ 0x108e0, 0x108f2 },
+	{ 0x108f4, 0x108f5 },
+	{ 0x10900, 0x10915 },
+	{ 0x10920, 0x10939 },
+	{ 0x10980, 0x109b7 },
+	{ 0x109be, 0x109bf },
+	{ 0x10a00, 0x10a00 },
+	{ 0x10a10, 0x10a13 },
+	{ 0x10a15, 0x10a17 },
+	{ 0x10a19, 0x10a35 },
+	{ 0x10a60, 0x10a7c },
+	{ 0x10a80, 0x10a9c },
+	{ 0x10ac0, 0x10ac7 },
+	{ 0x10ac9, 0x10ae4 },
+	{ 0x10b00, 0x10b35 },
+	{ 0x10b40, 0x10b55 },
+	{ 0x10b60, 0x10b72 },
+	{ 0x10b80, 0x10b91 },
+	{ 0x10c00, 0x10c48 },
+	{ 0x10c80, 0x10cb2 },
+	{ 0x10cc0, 0x10cf2 },
+	{ 0x10d00, 0x10d23 },
+	{ 0x10e80, 0x10ea9 },
+	{ 0x10eb0, 0x10eb1 },
+	{ 0x10f00, 0x10f1c },
+	{ 0x10f27, 0x10f27 },
+	{ 0x10f30, 0x10f45 },
+	{ 0x10f70, 0x10f81 },
+	{ 0x10fb0, 0x10fc4 },
+	{ 0x10fe0, 0x10ff6 },
+	{ 0x11003, 0x11037 },
+	{ 0x11071, 0x11072 },
+	{ 0x11075, 0x11075 },
+	{ 0x11083, 0x110af },
+	{ 0x110d0, 0x110e8 },
+	{ 0x11103, 0x11126 },
+	{ 0x11144, 0x11144 },
+	{ 0x11147, 0x11147 },
+	{ 0x11150, 0x11172 },
+	{ 0x11176, 0x11176 },
+	{ 0x11183, 0x111b2 },
+	{ 0x111c1, 0x111c4 },
+	{ 0x111da, 0x111da },
+	{ 0x111dc, 0x111dc },
+	{ 0x11200, 0x11211 },
+	{ 0x11213, 0x1122b },
+	{ 0x11280, 0x11286 },
+	{ 0x11288, 0x11288 },
+	{ 0x1128a, 0x1128d },
+	{ 0x1128f, 0x1129d },
+	{ 0x1129f, 0x112a8 },
+	{ 0x112b0, 0x112de },
+	{ 0x11305, 0x1130c },
+	{ 0x1130f, 0x11310 },
+	{ 0x11313, 0x11328 },
+	{ 0x1132a, 0x11330 },
+	{ 0x11332, 0x11333 },
+	{ 0x11335, 0x11339 },
+	{ 0x1133d, 0x1133d },
+	{ 0x11350, 0x11350 },
+	{ 0x1135d, 0x11361 },
+	{ 0x11400, 0x11434 },
+	{ 0x11447, 0x1144a },
+	{ 0x1145f, 0x11461 },
+	{ 0x11480, 0x114af },
+	{ 0x114c4, 0x114c5 },
+	{ 0x114c7, 0x114c7 },
+	{ 0x11580, 0x115ae },
+	{ 0x115d8, 0x115db },
+	{ 0x11600, 0x1162f },
+	{ 0x11644, 0x11644 },
+	{ 0x11680, 0x116aa },
+	{ 0x116b8, 0x116b8 },
+	{ 0x11700, 0x1171a },
+	{ 0x11740, 0x11746 },
+	{ 0x11800, 0x1182b },
+	{ 0x118a0, 0x118df },
+	{ 0x118ff, 0x11906 },
+	{ 0x11909, 0x11909 },
+	{ 0x1190c, 0x11913 },
+	{ 0x11915, 0x11916 },
+	{ 0x11918, 0x1192f },
+	{ 0x1193f, 0x1193f },
+	{ 0x11941, 0x11941 },
+	{ 0x119a0, 0x119a7 },
+	{ 0x119aa, 0x119d0 },
+	{ 0x119e1, 0x119e1 },
+	{ 0x119e3, 0x119e3 },
+	{ 0x11a00, 0x11a00 },
+	{ 0x11a0b, 0x11a32 },
+	{ 0x11a3a, 0x11a3a },
+	{ 0x11a50, 0x11a50 },
+	{ 0x11a5c, 0x11a89 },
+	{ 0x11a9d, 0x11a9d },
+	{ 0x11ab0, 0x11af8 },
+	{ 0x11c00, 0x11c08 },
+	{ 0x11c0a, 0x11c2e },
+	{ 0x11c40, 0x11c40 },
+	{ 0x11c72, 0x11c8f },
+	{ 0x11d00, 0x11d06 },
+	{ 0x11d08, 0x11d09 },
+	{ 0x11d0b, 0x11d30 },
+	{ 0x11d46, 0x11d46 },
+	{ 0x11d60, 0x11d65 },
+	{ 0x11d67, 0x11d68 },
+	{ 0x11d6a, 0x11d89 },
+	{ 0x11d98, 0x11d98 },
+	{ 0x11ee0, 0x11ef2 },
+	{ 0x11fb0, 0x11fb0 },
+	{ 0x12000, 0x12399 },
+	{ 0x12400, 0x1246e },
+	{ 0x12480, 0x12543 },
+	{ 0x12f90, 0x12ff0 },
+	{ 0x13000, 0x1342e },
+	{ 0x14400, 0x14646 },
+	{ 0x16800, 0x16a38 },
+	{ 0x16a40, 0x16a5e },
+	{ 0x16a70, 0x16abe },
+	{ 0x16ad0, 0x16aed },
+	{ 0x16b00, 0x16b2f },
+	{ 0x16b40, 0x16b43 },
+	{ 0x16b63, 0x16b77 },
+	{ 0x16b7d, 0x16b8f },
+	{ 0x16e40, 0x16e7f },
+	{ 0x16f00, 0x16f4a },
+	{ 0x16f50, 0x16f50 },
+	{ 0x16f93, 0x16f9f },
+	{ 0x16fe0, 0x16fe1 },
+	{ 0x16fe3, 0x16fe3 },
+	{ 0x17000, 0x187f7 },
+	{ 0x18800, 0x18cd5 },
+	{ 0x18d00, 0x18d08 },
+	{ 0x1aff0, 0x1aff3 },
+	{ 0x1aff5, 0x1affb },
+	{ 0x1affd, 0x1affe },
+	{ 0x1b000, 0x1b122 },
+	{ 0x1b150, 0x1b152 },
+	{ 0x1b164, 0x1b167 },
+	{ 0x1b170, 0x1b2fb },
+	{ 0x1bc00, 0x1bc6a },
+	{ 0x1bc70, 0x1bc7c },
+	{ 0x1bc80, 0x1bc88 },
+	{ 0x1bc90, 0x1bc99 },
+	{ 0x1d400, 0x1d454 },
+	{ 0x1d456, 0x1d49c },
+	{ 0x1d49e, 0x1d49f },
+	{ 0x1d4a2, 0x1d4a2 },
+	{ 0x1d4a5, 0x1d4a6 },
+	{ 0x1d4a9, 0x1d4ac },
+	{ 0x1d4ae, 0x1d4b9 },
+	{ 0x1d4bb, 0x1d4bb },
+	{ 0x1d4bd, 0x1d4c3 },
+	{ 0x1d4c5, 0x1d505 },
+	{ 0x1d507, 0x1d50a },
+	{ 0x1d50d, 0x1d514 },
+	{ 0x1d516, 0x1d51c },
+	{ 0x1d51e, 0x1d539 },
+	{ 0x1d53b, 0x1d53e },
+	{ 0x1d540, 0x1d544 },
+	{ 0x1d546, 0x1d546 },
+	{ 0x1d54a, 0x1d550 },
+	{ 0x1d552, 0x1d6a5 },
+	{ 0x1d6a8, 0x1d6c0 },
+	{ 0x1d6c2, 0x1d6da },
+	{ 0x1d6dc, 0x1d6fa },
+	{ 0x1d6fc, 0x1d714 },
+	{ 0x1d716, 0x1d734 },
+	{ 0x1d736, 0x1d74e },
+	{ 0x1d750, 0x1d76e },
+	{ 0x1d770, 0x1d788 },
+	{ 0x1d78a, 0x1d7a8 },
+	{ 0x1d7aa, 0x1d7c2 },
+	{ 0x1d7c4, 0x1d7cb },
+	{ 0x1df00, 0x1df1e },
+	{ 0x1e100, 0x1e12c },
+	{ 0x1e137, 0x1e13d },
+	{ 0x1e14e, 0x1e14e },
+	{ 0x1e290, 0x1e2ad },
+	{ 0x1e2c0, 0x1e2eb },
+	{ 0x1e7e0, 0x1e7e6 },
+	{ 0x1e7e8, 0x1e7eb },
+	{ 0x1e7ed, 0x1e7ee },
+	{ 0x1e7f0, 0x1e7fe },
+	{ 0x1e800, 0x1e8c4 },
+	{ 0x1e900, 0x1e943 },
+	{ 0x1e94b, 0x1e94b },
+	{ 0x1ee00, 0x1ee03 },
+	{ 0x1ee05, 0x1ee1f },
+	{ 0x1ee21, 0x1ee22 },
+	{ 0x1ee24, 0x1ee24 },
+	{ 0x1ee27, 0x1ee27 },
+	{ 0x1ee29, 0x1ee32 },
+	{ 0x1ee34, 0x1ee37 },
+	{ 0x1ee39, 0x1ee39 },
+	{ 0x1ee3b, 0x1ee3b },
+	{ 0x1ee42, 0x1ee42 },
+	{ 0x1ee47, 0x1ee47 },
+	{ 0x1ee49, 0x1ee49 },
+	{ 0x1ee4b, 0x1ee4b },
+	{ 0x1ee4d, 0x1ee4f },
+	{ 0x1ee51, 0x1ee52 },
+	{ 0x1ee54, 0x1ee54 },
+	{ 0x1ee57, 0x1ee57 },
+	{ 0x1ee59, 0x1ee59 },
+	{ 0x1ee5b, 0x1ee5b },
+	{ 0x1ee5d, 0x1ee5d },
+	{ 0x1ee5f, 0x1ee5f },
+	{ 0x1ee61, 0x1ee62 },
+	{ 0x1ee64, 0x1ee64 },
+	{ 0x1ee67, 0x1ee6a },
+	{ 0x1ee6c, 0x1ee72 },
+	{ 0x1ee74, 0x1ee77 },
+	{ 0x1ee79, 0x1ee7c },
+	{ 0x1ee7e, 0x1ee7e },
+	{ 0x1ee80, 0x1ee89 },
+	{ 0x1ee8b, 0x1ee9b },
+	{ 0x1eea1, 0x1eea3 },
+	{ 0x1eea5, 0x1eea9 },
+	{ 0x1eeab, 0x1eebb },
+	{ 0x20000, 0x2a6df },
+	{ 0x2a700, 0x2b738 },
+	{ 0x2b740, 0x2b81d },
+	{ 0x2b820, 0x2cea1 },
+	{ 0x2ceb0, 0x2ebe0 },
+	{ 0x2f800, 0x2fa1d },
+	{ 0x30000, 0x3134a },
+	{ 0x0, 0x0 },
+};
+
+static CharRange xid_continue[] = {
+	{ 0x30, 0x39 },
+	{ 0x41, 0x5a },
+	{ 0x5f, 0x5f },
+	{ 0x61, 0x7a },
+	{ 0xaa, 0xaa },
+	{ 0xb5, 0xb5 },
+	{ 0xb7, 0xb7 },
+	{ 0xba, 0xba },
+	{ 0xc0, 0xd6 },
+	{ 0xd8, 0xf6 },
+	{ 0xf8, 0x2c1 },
+	{ 0x2c6, 0x2d1 },
+	{ 0x2e0, 0x2e4 },
+	{ 0x2ec, 0x2ec },
+	{ 0x2ee, 0x2ee },
+	{ 0x300, 0x374 },
+	{ 0x376, 0x377 },
+	{ 0x37a, 0x37d },
+	{ 0x37f, 0x37f },
+	{ 0x386, 0x38a },
+	{ 0x38c, 0x38c },
+	{ 0x38e, 0x3a1 },
+	{ 0x3a3, 0x3f5 },
+	{ 0x3f7, 0x481 },
+	{ 0x483, 0x487 },
+	{ 0x48a, 0x52f },
+	{ 0x531, 0x556 },
+	{ 0x559, 0x559 },
+	{ 0x560, 0x588 },
+	{ 0x591, 0x5bd },
+	{ 0x5bf, 0x5bf },
+	{ 0x5c1, 0x5c2 },
+	{ 0x5c4, 0x5c5 },
+	{ 0x5c7, 0x5c7 },
+	{ 0x5d0, 0x5ea },
+	{ 0x5ef, 0x5f2 },
+	{ 0x610, 0x61a },
+	{ 0x620, 0x669 },
+	{ 0x66e, 0x6d3 },
+	{ 0x6d5, 0x6dc },
+	{ 0x6df, 0x6e8 },
+	{ 0x6ea, 0x6fc },
+	{ 0x6ff, 0x6ff },
+	{ 0x710, 0x74a },
+	{ 0x74d, 0x7b1 },
+	{ 0x7c0, 0x7f5 },
+	{ 0x7fa, 0x7fa },
+	{ 0x7fd, 0x7fd },
+	{ 0x800, 0x82d },
+	{ 0x840, 0x85b },
+	{ 0x860, 0x86a },
+	{ 0x870, 0x887 },
+	{ 0x889, 0x88e },
+	{ 0x898, 0x8e1 },
+	{ 0x8e3, 0x963 },
+	{ 0x966, 0x96f },
+	{ 0x971, 0x983 },
+	{ 0x985, 0x98c },
+	{ 0x98f, 0x990 },
+	{ 0x993, 0x9a8 },
+	{ 0x9aa, 0x9b0 },
+	{ 0x9b2, 0x9b2 },
+	{ 0x9b6, 0x9b9 },
+	{ 0x9bc, 0x9c4 },
+	{ 0x9c7, 0x9c8 },
+	{ 0x9cb, 0x9ce },
+	{ 0x9d7, 0x9d7 },
+	{ 0x9dc, 0x9dd },
+	{ 0x9df, 0x9e3 },
+	{ 0x9e6, 0x9f1 },
+	{ 0x9fc, 0x9fc },
+	{ 0x9fe, 0x9fe },
+	{ 0xa01, 0xa03 },
+	{ 0xa05, 0xa0a },
+	{ 0xa0f, 0xa10 },
+	{ 0xa13, 0xa28 },
+	{ 0xa2a, 0xa30 },
+	{ 0xa32, 0xa33 },
+	{ 0xa35, 0xa36 },
+	{ 0xa38, 0xa39 },
+	{ 0xa3c, 0xa3c },
+	{ 0xa3e, 0xa42 },
+	{ 0xa47, 0xa48 },
+	{ 0xa4b, 0xa4d },
+	{ 0xa51, 0xa51 },
+	{ 0xa59, 0xa5c },
+	{ 0xa5e, 0xa5e },
+	{ 0xa66, 0xa75 },
+	{ 0xa81, 0xa83 },
+	{ 0xa85, 0xa8d },
+	{ 0xa8f, 0xa91 },
+	{ 0xa93, 0xaa8 },
+	{ 0xaaa, 0xab0 },
+	{ 0xab2, 0xab3 },
+	{ 0xab5, 0xab9 },
+	{ 0xabc, 0xac5 },
+	{ 0xac7, 0xac9 },
+	{ 0xacb, 0xacd },
+	{ 0xad0, 0xad0 },
+	{ 0xae0, 0xae3 },
+	{ 0xae6, 0xaef },
+	{ 0xaf9, 0xaff },
+	{ 0xb01, 0xb03 },
+	{ 0xb05, 0xb0c },
+	{ 0xb0f, 0xb10 },
+	{ 0xb13, 0xb28 },
+	{ 0xb2a, 0xb30 },
+	{ 0xb32, 0xb33 },
+	{ 0xb35, 0xb39 },
+	{ 0xb3c, 0xb44 },
+	{ 0xb47, 0xb48 },
+	{ 0xb4b, 0xb4d },
+	{ 0xb55, 0xb57 },
+	{ 0xb5c, 0xb5d },
+	{ 0xb5f, 0xb63 },
+	{ 0xb66, 0xb6f },
+	{ 0xb71, 0xb71 },
+	{ 0xb82, 0xb83 },
+	{ 0xb85, 0xb8a },
+	{ 0xb8e, 0xb90 },
+	{ 0xb92, 0xb95 },
+	{ 0xb99, 0xb9a },
+	{ 0xb9c, 0xb9c },
+	{ 0xb9e, 0xb9f },
+	{ 0xba3, 0xba4 },
+	{ 0xba8, 0xbaa },
+	{ 0xbae, 0xbb9 },
+	{ 0xbbe, 0xbc2 },
+	{ 0xbc6, 0xbc8 },
+	{ 0xbca, 0xbcd },
+	{ 0xbd0, 0xbd0 },
+	{ 0xbd7, 0xbd7 },
+	{ 0xbe6, 0xbef },
+	{ 0xc00, 0xc0c },
+	{ 0xc0e, 0xc10 },
+	{ 0xc12, 0xc28 },
+	{ 0xc2a, 0xc39 },
+	{ 0xc3c, 0xc44 },
+	{ 0xc46, 0xc48 },
+	{ 0xc4a, 0xc4d },
+	{ 0xc55, 0xc56 },
+	{ 0xc58, 0xc5a },
+	{ 0xc5d, 0xc5d },
+	{ 0xc60, 0xc63 },
+	{ 0xc66, 0xc6f },
+	{ 0xc80, 0xc83 },
+	{ 0xc85, 0xc8c },
+	{ 0xc8e, 0xc90 },
+	{ 0xc92, 0xca8 },
+	{ 0xcaa, 0xcb3 },
+	{ 0xcb5, 0xcb9 },
+	{ 0xcbc, 0xcc4 },
+	{ 0xcc6, 0xcc8 },
+	{ 0xcca, 0xccd },
+	{ 0xcd5, 0xcd6 },
+	{ 0xcdd, 0xcde },
+	{ 0xce0, 0xce3 },
+	{ 0xce6, 0xcef },
+	{ 0xcf1, 0xcf2 },
+	{ 0xd00, 0xd0c },
+	{ 0xd0e, 0xd10 },
+	{ 0xd12, 0xd44 },
+	{ 0xd46, 0xd48 },
+	{ 0xd4a, 0xd4e },
+	{ 0xd54, 0xd57 },
+	{ 0xd5f, 0xd63 },
+	{ 0xd66, 0xd6f },
+	{ 0xd7a, 0xd7f },
+	{ 0xd81, 0xd83 },
+	{ 0xd85, 0xd96 },
+	{ 0xd9a, 0xdb1 },
+	{ 0xdb3, 0xdbb },
+	{ 0xdbd, 0xdbd },
+	{ 0xdc0, 0xdc6 },
+	{ 0xdca, 0xdca },
+	{ 0xdcf, 0xdd4 },
+	{ 0xdd6, 0xdd6 },
+	{ 0xdd8, 0xddf },
+	{ 0xde6, 0xdef },
+	{ 0xdf2, 0xdf3 },
+	{ 0xe01, 0xe3a },
+	{ 0xe40, 0xe4e },
+	{ 0xe50, 0xe59 },
+	{ 0xe81, 0xe82 },
+	{ 0xe84, 0xe84 },
+	{ 0xe86, 0xe8a },
+	{ 0xe8c, 0xea3 },
+	{ 0xea5, 0xea5 },
+	{ 0xea7, 0xebd },
+	{ 0xec0, 0xec4 },
+	{ 0xec6, 0xec6 },
+	{ 0xec8, 0xecd },
+	{ 0xed0, 0xed9 },
+	{ 0xedc, 0xedf },
+	{ 0xf00, 0xf00 },
+	{ 0xf18, 0xf19 },
+	{ 0xf20, 0xf29 },
+	{ 0xf35, 0xf35 },
+	{ 0xf37, 0xf37 },
+	{ 0xf39, 0xf39 },
+	{ 0xf3e, 0xf47 },
+	{ 0xf49, 0xf6c },
+	{ 0xf71, 0xf84 },
+	{ 0xf86, 0xf97 },
+	{ 0xf99, 0xfbc },
+	{ 0xfc6, 0xfc6 },
+	{ 0x1000, 0x1049 },
+	{ 0x1050, 0x109d },
+	{ 0x10a0, 0x10c5 },
+	{ 0x10c7, 0x10c7 },
+	{ 0x10cd, 0x10cd },
+	{ 0x10d0, 0x10fa },
+	{ 0x10fc, 0x1248 },
+	{ 0x124a, 0x124d },
+	{ 0x1250, 0x1256 },
+	{ 0x1258, 0x1258 },
+	{ 0x125a, 0x125d },
+	{ 0x1260, 0x1288 },
+	{ 0x128a, 0x128d },
+	{ 0x1290, 0x12b0 },
+	{ 0x12b2, 0x12b5 },
+	{ 0x12b8, 0x12be },
+	{ 0x12c0, 0x12c0 },
+	{ 0x12c2, 0x12c5 },
+	{ 0x12c8, 0x12d6 },
+	{ 0x12d8, 0x1310 },
+	{ 0x1312, 0x1315 },
+	{ 0x1318, 0x135a },
+	{ 0x135d, 0x135f },
+	{ 0x1369, 0x1369 },
+	{ 0x1371, 0x1371 },
+	{ 0x1380, 0x138f },
+	{ 0x13a0, 0x13f5 },
+	{ 0x13f8, 0x13fd },
+	{ 0x1401, 0x166c },
+	{ 0x166f, 0x167f },
+	{ 0x1681, 0x169a },
+	{ 0x16a0, 0x16ea },
+	{ 0x16ee, 0x16f8 },
+	{ 0x1700, 0x1715 },
+	{ 0x171f, 0x1734 },
+	{ 0x1740, 0x1753 },
+	{ 0x1760, 0x176c },
+	{ 0x176e, 0x1770 },
+	{ 0x1772, 0x1773 },
+	{ 0x1780, 0x17d3 },
+	{ 0x17d7, 0x17d7 },
+	{ 0x17dc, 0x17dd },
+	{ 0x17e0, 0x17e9 },
+	{ 0x180b, 0x180d },
+	{ 0x180f, 0x1819 },
+	{ 0x1820, 0x1878 },
+	{ 0x1880, 0x18aa },
+	{ 0x18b0, 0x18f5 },
+	{ 0x1900, 0x191e },
+	{ 0x1920, 0x192b },
+	{ 0x1930, 0x193b },
+	{ 0x1946, 0x196d },
+	{ 0x1970, 0x1974 },
+	{ 0x1980, 0x19ab },
+	{ 0x19b0, 0x19c9 },
+	{ 0x19d0, 0x19da },
+	{ 0x1a00, 0x1a1b },
+	{ 0x1a20, 0x1a5e },
+	{ 0x1a60, 0x1a7c },
+	{ 0x1a7f, 0x1a89 },
+	{ 0x1a90, 0x1a99 },
+	{ 0x1aa7, 0x1aa7 },
+	{ 0x1ab0, 0x1abd },
+	{ 0x1abf, 0x1ace },
+	{ 0x1b00, 0x1b4c },
+	{ 0x1b50, 0x1b59 },
+	{ 0x1b6b, 0x1b73 },
+	{ 0x1b80, 0x1bf3 },
+	{ 0x1c00, 0x1c37 },
+	{ 0x1c40, 0x1c49 },
+	{ 0x1c4d, 0x1c7d },
+	{ 0x1c80, 0x1c88 },
+	{ 0x1c90, 0x1cba },
+	{ 0x1cbd, 0x1cbf },
+	{ 0x1cd0, 0x1cd2 },
+	{ 0x1cd4, 0x1cfa },
+	{ 0x1d00, 0x1f15 },
+	{ 0x1f18, 0x1f1d },
+	{ 0x1f20, 0x1f45 },
+	{ 0x1f48, 0x1f4d },
+	{ 0x1f50, 0x1f57 },
+	{ 0x1f59, 0x1f59 },
+	{ 0x1f5b, 0x1f5b },
+	{ 0x1f5d, 0x1f5d },
+	{ 0x1f5f, 0x1f7d },
+	{ 0x1f80, 0x1fb4 },
+	{ 0x1fb6, 0x1fbc },
+	{ 0x1fbe, 0x1fbe },
+	{ 0x1fc2, 0x1fc4 },
+	{ 0x1fc6, 0x1fcc },
+	{ 0x1fd0, 0x1fd3 },
+	{ 0x1fd6, 0x1fdb },
+	{ 0x1fe0, 0x1fec },
+	{ 0x1ff2, 0x1ff4 },
+	{ 0x1ff6, 0x1ffc },
+	{ 0x203f, 0x2040 },
+	{ 0x2054, 0x2054 },
+	{ 0x2071, 0x2071 },
+	{ 0x207f, 0x207f },
+	{ 0x2090, 0x209c },
+	{ 0x20d0, 0x20dc },
+	{ 0x20e1, 0x20e1 },
+	{ 0x20e5, 0x20f0 },
+	{ 0x2102, 0x2102 },
+	{ 0x2107, 0x2107 },
+	{ 0x210a, 0x2113 },
+	{ 0x2115, 0x2115 },
+	{ 0x2118, 0x211d },
+	{ 0x2124, 0x2124 },
+	{ 0x2126, 0x2126 },
+	{ 0x2128, 0x2128 },
+	{ 0x212a, 0x2139 },
+	{ 0x213c, 0x213f },
+	{ 0x2145, 0x2149 },
+	{ 0x214e, 0x214e },
+	{ 0x2160, 0x2188 },
+	{ 0x2c00, 0x2ce4 },
+	{ 0x2ceb, 0x2cf3 },
+	{ 0x2d00, 0x2d25 },
+	{ 0x2d27, 0x2d27 },
+	{ 0x2d2d, 0x2d2d },
+	{ 0x2d30, 0x2d67 },
+	{ 0x2d6f, 0x2d6f },
+	{ 0x2d7f, 0x2d96 },
+	{ 0x2da0, 0x2da6 },
+	{ 0x2da8, 0x2dae },
+	{ 0x2db0, 0x2db6 },
+	{ 0x2db8, 0x2dbe },
+	{ 0x2dc0, 0x2dc6 },
+	{ 0x2dc8, 0x2dce },
+	{ 0x2dd0, 0x2dd6 },
+	{ 0x2dd8, 0x2dde },
+	{ 0x2de0, 0x2dff },
+	{ 0x3005, 0x3007 },
+	{ 0x3021, 0x302f },
+	{ 0x3031, 0x3035 },
+	{ 0x3038, 0x303c },
+	{ 0x3041, 0x3096 },
+	{ 0x3099, 0x309f },
+	{ 0x30a1, 0x30fa },
+	{ 0x30fc, 0x30ff },
+	{ 0x3105, 0x312f },
+	{ 0x3131, 0x318e },
+	{ 0x31a0, 0x31bf },
+	{ 0x31f0, 0x31ff },
+	{ 0x3400, 0x4dbf },
+	{ 0x4e00, 0xa48c },
+	{ 0xa4d0, 0xa4fd },
+	{ 0xa500, 0xa60c },
+	{ 0xa610, 0xa62b },
+	{ 0xa640, 0xa66f },
+	{ 0xa674, 0xa67d },
+	{ 0xa67f, 0xa6f1 },
+	{ 0xa717, 0xa71f },
+	{ 0xa722, 0xa788 },
+	{ 0xa78b, 0xa7ca },
+	{ 0xa7d0, 0xa7d1 },
+	{ 0xa7d3, 0xa7d3 },
+	{ 0xa7d5, 0xa7d9 },
+	{ 0xa7f2, 0xa827 },
+	{ 0xa82c, 0xa82c },
+	{ 0xa840, 0xa873 },
+	{ 0xa880, 0xa8c5 },
+	{ 0xa8d0, 0xa8d9 },
+	{ 0xa8e0, 0xa8f7 },
+	{ 0xa8fb, 0xa8fb },
+	{ 0xa8fd, 0xa92d },
+	{ 0xa930, 0xa953 },
+	{ 0xa960, 0xa97c },
+	{ 0xa980, 0xa9c0 },
+	{ 0xa9cf, 0xa9d9 },
+	{ 0xa9e0, 0xa9fe },
+	{ 0xaa00, 0xaa36 },
+	{ 0xaa40, 0xaa4d },
+	{ 0xaa50, 0xaa59 },
+	{ 0xaa60, 0xaa76 },
+	{ 0xaa7a, 0xaac2 },
+	{ 0xaadb, 0xaadd },
+	{ 0xaae0, 0xaaef },
+	{ 0xaaf2, 0xaaf6 },
+	{ 0xab01, 0xab06 },
+	{ 0xab09, 0xab0e },
+	{ 0xab11, 0xab16 },
+	{ 0xab20, 0xab26 },
+	{ 0xab28, 0xab2e },
+	{ 0xab30, 0xab5a },
+	{ 0xab5c, 0xab69 },
+	{ 0xab70, 0xabea },
+	{ 0xabec, 0xabed },
+	{ 0xabf0, 0xabf9 },
+	{ 0xac00, 0xd7a3 },
+	{ 0xd7b0, 0xd7c6 },
+	{ 0xd7cb, 0xd7fb },
+	{ 0xf900, 0xfa6d },
+	{ 0xfa70, 0xfad9 },
+	{ 0xfb00, 0xfb06 },
+	{ 0xfb13, 0xfb17 },
+	{ 0xfb1d, 0xfb28 },
+	{ 0xfb2a, 0xfb36 },
+	{ 0xfb38, 0xfb3c },
+	{ 0xfb3e, 0xfb3e },
+	{ 0xfb40, 0xfb41 },
+	{ 0xfb43, 0xfb44 },
+	{ 0xfb46, 0xfbb1 },
+	{ 0xfbd3, 0xfd3d },
+	{ 0xfd50, 0xfd8f },
+	{ 0xfd92, 0xfdc7 },
+	{ 0xfdf0, 0xfdfb },
+	{ 0xfe00, 0xfe0f },
+	{ 0xfe20, 0xfe2f },
+	{ 0xfe33, 0xfe34 },
+	{ 0xfe4d, 0xfe4f },
+	{ 0xfe70, 0xfe74 },
+	{ 0xfe76, 0xfefc },
+	{ 0xff10, 0xff19 },
+	{ 0xff21, 0xff3a },
+	{ 0xff3f, 0xff3f },
+	{ 0xff41, 0xff5a },
+	{ 0xff66, 0xffbe },
+	{ 0xffc2, 0xffc7 },
+	{ 0xffca, 0xffcf },
+	{ 0xffd2, 0xffd7 },
+	{ 0xffda, 0xffdc },
+	{ 0x10000, 0x1000b },
+	{ 0x1000d, 0x10026 },
+	{ 0x10028, 0x1003a },
+	{ 0x1003c, 0x1003d },
+	{ 0x1003f, 0x1004d },
+	{ 0x10050, 0x1005d },
+	{ 0x10080, 0x100fa },
+	{ 0x10140, 0x10174 },
+	{ 0x101fd, 0x101fd },
+	{ 0x10280, 0x1029c },
+	{ 0x102a0, 0x102d0 },
+	{ 0x102e0, 0x102e0 },
+	{ 0x10300, 0x1031f },
+	{ 0x1032d, 0x1034a },
+	{ 0x10350, 0x1037a },
+	{ 0x10380, 0x1039d },
+	{ 0x103a0, 0x103c3 },
+	{ 0x103c8, 0x103cf },
+	{ 0x103d1, 0x103d5 },
+	{ 0x10400, 0x1049d },
+	{ 0x104a0, 0x104a9 },
+	{ 0x104b0, 0x104d3 },
+	{ 0x104d8, 0x104fb },
+	{ 0x10500, 0x10527 },
+	{ 0x10530, 0x10563 },
+	{ 0x10570, 0x1057a },
+	{ 0x1057c, 0x1058a },
+	{ 0x1058c, 0x10592 },
+	{ 0x10594, 0x10595 },
+	{ 0x10597, 0x105a1 },
+	{ 0x105a3, 0x105b1 },
+	{ 0x105b3, 0x105b9 },
+	{ 0x105bb, 0x105bc },
+	{ 0x10600, 0x10736 },
+	{ 0x10740, 0x10755 },
+	{ 0x10760, 0x10767 },
+	{ 0x10780, 0x10785 },
+	{ 0x10787, 0x107b0 },
+	{ 0x107b2, 0x107ba },
+	{ 0x10800, 0x10805 },
+	{ 0x10808, 0x10808 },
+	{ 0x1080a, 0x10835 },
+	{ 0x10837, 0x10838 },
+	{ 0x1083c, 0x1083c },
+	{ 0x1083f, 0x10855 },
+	{ 0x10860, 0x10876 },
+	{ 0x10880, 0x1089e },
+	{ 0x108e0, 0x108f2 },
+	{ 0x108f4, 0x108f5 },
+	{ 0x10900, 0x10915 },
+	{ 0x10920, 0x10939 },
+	{ 0x10980, 0x109b7 },
+	{ 0x109be, 0x109bf },
+	{ 0x10a00, 0x10a03 },
+	{ 0x10a05, 0x10a06 },
+	{ 0x10a0c, 0x10a13 },
+	{ 0x10a15, 0x10a17 },
+	{ 0x10a19, 0x10a35 },
+	{ 0x10a38, 0x10a3a },
+	{ 0x10a3f, 0x10a3f },
+	{ 0x10a60, 0x10a7c },
+	{ 0x10a80, 0x10a9c },
+	{ 0x10ac0, 0x10ac7 },
+	{ 0x10ac9, 0x10ae6 },
+	{ 0x10b00, 0x10b35 },
+	{ 0x10b40, 0x10b55 },
+	{ 0x10b60, 0x10b72 },
+	{ 0x10b80, 0x10b91 },
+	{ 0x10c00, 0x10c48 },
+	{ 0x10c80, 0x10cb2 },
+	{ 0x10cc0, 0x10cf2 },
+	{ 0x10d00, 0x10d27 },
+	{ 0x10d30, 0x10d39 },
+	{ 0x10e80, 0x10ea9 },
+	{ 0x10eab, 0x10eac },
+	{ 0x10eb0, 0x10eb1 },
+	{ 0x10f00, 0x10f1c },
+	{ 0x10f27, 0x10f27 },
+	{ 0x10f30, 0x10f50 },
+	{ 0x10f70, 0x10f85 },
+	{ 0x10fb0, 0x10fc4 },
+	{ 0x10fe0, 0x10ff6 },
+	{ 0x11000, 0x11046 },
+	{ 0x11066, 0x11075 },
+	{ 0x1107f, 0x110ba },
+	{ 0x110c2, 0x110c2 },
+	{ 0x110d0, 0x110e8 },
+	{ 0x110f0, 0x110f9 },
+	{ 0x11100, 0x11134 },
+	{ 0x11136, 0x1113f },
+	{ 0x11144, 0x11147 },
+	{ 0x11150, 0x11173 },
+	{ 0x11176, 0x11176 },
+	{ 0x11180, 0x111c4 },
+	{ 0x111c9, 0x111cc },
+	{ 0x111ce, 0x111da },
+	{ 0x111dc, 0x111dc },
+	{ 0x11200, 0x11211 },
+	{ 0x11213, 0x11237 },
+	{ 0x1123e, 0x1123e },
+	{ 0x11280, 0x11286 },
+	{ 0x11288, 0x11288 },
+	{ 0x1128a, 0x1128d },
+	{ 0x1128f, 0x1129d },
+	{ 0x1129f, 0x112a8 },
+	{ 0x112b0, 0x112ea },
+	{ 0x112f0, 0x112f9 },
+	{ 0x11300, 0x11303 },
+	{ 0x11305, 0x1130c },
+	{ 0x1130f, 0x11310 },
+	{ 0x11313, 0x11328 },
+	{ 0x1132a, 0x11330 },
+	{ 0x11332, 0x11333 },
+	{ 0x11335, 0x11339 },
+	{ 0x1133b, 0x11344 },
+	{ 0x11347, 0x11348 },
+	{ 0x1134b, 0x1134d },
+	{ 0x11350, 0x11350 },
+	{ 0x11357, 0x11357 },
+	{ 0x1135d, 0x11363 },
+	{ 0x11366, 0x1136c },
+	{ 0x11370, 0x11374 },
+	{ 0x11400, 0x1144a },
+	{ 0x11450, 0x11459 },
+	{ 0x1145e, 0x11461 },
+	{ 0x11480, 0x114c5 },
+	{ 0x114c7, 0x114c7 },
+	{ 0x114d0, 0x114d9 },
+	{ 0x11580, 0x115b5 },
+	{ 0x115b8, 0x115c0 },
+	{ 0x115d8, 0x115dd },
+	{ 0x11600, 0x11640 },
+	{ 0x11644, 0x11644 },
+	{ 0x11650, 0x11659 },
+	{ 0x11680, 0x116b8 },
+	{ 0x116c0, 0x116c9 },
+	{ 0x11700, 0x1171a },
+	{ 0x1171d, 0x1172b },
+	{ 0x11730, 0x11739 },
+	{ 0x11740, 0x11746 },
+	{ 0x11800, 0x1183a },
+	{ 0x118a0, 0x118e9 },
+	{ 0x118ff, 0x11906 },
+	{ 0x11909, 0x11909 },
+	{ 0x1190c, 0x11913 },
+	{ 0x11915, 0x11916 },
+	{ 0x11918, 0x11935 },
+	{ 0x11937, 0x11938 },
+	{ 0x1193b, 0x11943 },
+	{ 0x11950, 0x11959 },
+	{ 0x119a0, 0x119a7 },
+	{ 0x119aa, 0x119d7 },
+	{ 0x119da, 0x119e1 },
+	{ 0x119e3, 0x119e4 },
+	{ 0x11a00, 0x11a3e },
+	{ 0x11a47, 0x11a47 },
+	{ 0x11a50, 0x11a99 },
+	{ 0x11a9d, 0x11a9d },
+	{ 0x11ab0, 0x11af8 },
+	{ 0x11c00, 0x11c08 },
+	{ 0x11c0a, 0x11c36 },
+	{ 0x11c38, 0x11c40 },
+	{ 0x11c50, 0x11c59 },
+	{ 0x11c72, 0x11c8f },
+	{ 0x11c92, 0x11ca7 },
+	{ 0x11ca9, 0x11cb6 },
+	{ 0x11d00, 0x11d06 },
+	{ 0x11d08, 0x11d09 },
+	{ 0x11d0b, 0x11d36 },
+	{ 0x11d3a, 0x11d3a },
+	{ 0x11d3c, 0x11d3d },
+	{ 0x11d3f, 0x11d47 },
+	{ 0x11d50, 0x11d59 },
+	{ 0x11d60, 0x11d65 },
+	{ 0x11d67, 0x11d68 },
+	{ 0x11d6a, 0x11d8e },
+	{ 0x11d90, 0x11d91 },
+	{ 0x11d93, 0x11d98 },
+	{ 0x11da0, 0x11da9 },
+	{ 0x11ee0, 0x11ef6 },
+	{ 0x11fb0, 0x11fb0 },
+	{ 0x12000, 0x12399 },
+	{ 0x12400, 0x1246e },
+	{ 0x12480, 0x12543 },
+	{ 0x12f90, 0x12ff0 },
+	{ 0x13000, 0x1342e },
+	{ 0x14400, 0x14646 },
+	{ 0x16800, 0x16a38 },
+	{ 0x16a40, 0x16a5e },
+	{ 0x16a60, 0x16a69 },
+	{ 0x16a70, 0x16abe },
+	{ 0x16ac0, 0x16ac9 },
+	{ 0x16ad0, 0x16aed },
+	{ 0x16af0, 0x16af4 },
+	{ 0x16b00, 0x16b36 },
+	{ 0x16b40, 0x16b43 },
+	{ 0x16b50, 0x16b59 },
+	{ 0x16b63, 0x16b77 },
+	{ 0x16b7d, 0x16b8f },
+	{ 0x16e40, 0x16e7f },
+	{ 0x16f00, 0x16f4a },
+	{ 0x16f4f, 0x16f87 },
+	{ 0x16f8f, 0x16f9f },
+	{ 0x16fe0, 0x16fe1 },
+	{ 0x16fe3, 0x16fe4 },
+	{ 0x16ff0, 0x16ff1 },
+	{ 0x17000, 0x187f7 },
+	{ 0x18800, 0x18cd5 },
+	{ 0x18d00, 0x18d08 },
+	{ 0x1aff0, 0x1aff3 },
+	{ 0x1aff5, 0x1affb },
+	{ 0x1affd, 0x1affe },
+	{ 0x1b000, 0x1b122 },
+	{ 0x1b150, 0x1b152 },
+	{ 0x1b164, 0x1b167 },
+	{ 0x1b170, 0x1b2fb },
+	{ 0x1bc00, 0x1bc6a },
+	{ 0x1bc70, 0x1bc7c },
+	{ 0x1bc80, 0x1bc88 },
+	{ 0x1bc90, 0x1bc99 },
+	{ 0x1bc9d, 0x1bc9e },
+	{ 0x1cf00, 0x1cf2d },
+	{ 0x1cf30, 0x1cf46 },
+	{ 0x1d165, 0x1d169 },
+	{ 0x1d16d, 0x1d172 },
+	{ 0x1d17b, 0x1d182 },
+	{ 0x1d185, 0x1d18b },
+	{ 0x1d1aa, 0x1d1ad },
+	{ 0x1d242, 0x1d244 },
+	{ 0x1d400, 0x1d454 },
+	{ 0x1d456, 0x1d49c },
+	{ 0x1d49e, 0x1d49f },
+	{ 0x1d4a2, 0x1d4a2 },
+	{ 0x1d4a5, 0x1d4a6 },
+	{ 0x1d4a9, 0x1d4ac },
+	{ 0x1d4ae, 0x1d4b9 },
+	{ 0x1d4bb, 0x1d4bb },
+	{ 0x1d4bd, 0x1d4c3 },
+	{ 0x1d4c5, 0x1d505 },
+	{ 0x1d507, 0x1d50a },
+	{ 0x1d50d, 0x1d514 },
+	{ 0x1d516, 0x1d51c },
+	{ 0x1d51e, 0x1d539 },
+	{ 0x1d53b, 0x1d53e },
+	{ 0x1d540, 0x1d544 },
+	{ 0x1d546, 0x1d546 },
+	{ 0x1d54a, 0x1d550 },
+	{ 0x1d552, 0x1d6a5 },
+	{ 0x1d6a8, 0x1d6c0 },
+	{ 0x1d6c2, 0x1d6da },
+	{ 0x1d6dc, 0x1d6fa },
+	{ 0x1d6fc, 0x1d714 },
+	{ 0x1d716, 0x1d734 },
+	{ 0x1d736, 0x1d74e },
+	{ 0x1d750, 0x1d76e },
+	{ 0x1d770, 0x1d788 },
+	{ 0x1d78a, 0x1d7a8 },
+	{ 0x1d7aa, 0x1d7c2 },
+	{ 0x1d7c4, 0x1d7cb },
+	{ 0x1d7ce, 0x1d7ff },
+	{ 0x1da00, 0x1da36 },
+	{ 0x1da3b, 0x1da6c },
+	{ 0x1da75, 0x1da75 },
+	{ 0x1da84, 0x1da84 },
+	{ 0x1da9b, 0x1da9f },
+	{ 0x1daa1, 0x1daaf },
+	{ 0x1df00, 0x1df1e },
+	{ 0x1e000, 0x1e006 },
+	{ 0x1e008, 0x1e018 },
+	{ 0x1e01b, 0x1e021 },
+	{ 0x1e023, 0x1e024 },
+	{ 0x1e026, 0x1e02a },
+	{ 0x1e100, 0x1e12c },
+	{ 0x1e130, 0x1e13d },
+	{ 0x1e140, 0x1e149 },
+	{ 0x1e14e, 0x1e14e },
+	{ 0x1e290, 0x1e2ae },
+	{ 0x1e2c0, 0x1e2f9 },
+	{ 0x1e7e0, 0x1e7e6 },
+	{ 0x1e7e8, 0x1e7eb },
+	{ 0x1e7ed, 0x1e7ee },
+	{ 0x1e7f0, 0x1e7fe },
+	{ 0x1e800, 0x1e8c4 },
+	{ 0x1e8d0, 0x1e8d6 },
+	{ 0x1e900, 0x1e94b },
+	{ 0x1e950, 0x1e959 },
+	{ 0x1ee00, 0x1ee03 },
+	{ 0x1ee05, 0x1ee1f },
+	{ 0x1ee21, 0x1ee22 },
+	{ 0x1ee24, 0x1ee24 },
+	{ 0x1ee27, 0x1ee27 },
+	{ 0x1ee29, 0x1ee32 },
+	{ 0x1ee34, 0x1ee37 },
+	{ 0x1ee39, 0x1ee39 },
+	{ 0x1ee3b, 0x1ee3b },
+	{ 0x1ee42, 0x1ee42 },
+	{ 0x1ee47, 0x1ee47 },
+	{ 0x1ee49, 0x1ee49 },
+	{ 0x1ee4b, 0x1ee4b },
+	{ 0x1ee4d, 0x1ee4f },
+	{ 0x1ee51, 0x1ee52 },
+	{ 0x1ee54, 0x1ee54 },
+	{ 0x1ee57, 0x1ee57 },
+	{ 0x1ee59, 0x1ee59 },
+	{ 0x1ee5b, 0x1ee5b },
+	{ 0x1ee5d, 0x1ee5d },
+	{ 0x1ee5f, 0x1ee5f },
+	{ 0x1ee61, 0x1ee62 },
+	{ 0x1ee64, 0x1ee64 },
+	{ 0x1ee67, 0x1ee6a },
+	{ 0x1ee6c, 0x1ee72 },
+	{ 0x1ee74, 0x1ee77 },
+	{ 0x1ee79, 0x1ee7c },
+	{ 0x1ee7e, 0x1ee7e },
+	{ 0x1ee80, 0x1ee89 },
+	{ 0x1ee8b, 0x1ee9b },
+	{ 0x1eea1, 0x1eea3 },
+	{ 0x1eea5, 0x1eea9 },
+	{ 0x1eeab, 0x1eebb },
+	{ 0x1fbf0, 0x1fbf9 },
+	{ 0x20000, 0x2a6df },
+	{ 0x2a700, 0x2b738 },
+	{ 0x2b740, 0x2b81d },
+	{ 0x2b820, 0x2cea1 },
+	{ 0x2ceb0, 0x2ebe0 },
+	{ 0x2f800, 0x2fa1d },
+	{ 0x30000, 0x3134a },
+	{ 0xe0100, 0xe01ef },
+	{ 0x0, 0x0 },
+};
+
+#endif // CHAR_RANGE_INC
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
new file mode 100644
index 0000000000..67147a4327
--- /dev/null
+++ b/core/string/char_utils.h
@@ -0,0 +1,112 @@
+/*************************************************************************/
+/*  char_utils.h                                                         */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef CHAR_UTILS_H
+#define CHAR_UTILS_H
+
+#include "core/typedefs.h"
+
+#include "char_range.inc"
+
+static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) {
+	for (int i = 0; xid_start[i].start != 0; i++) {
+		if (c >= xid_start[i].start && c <= xid_start[i].end) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) {
+	for (int i = 0; xid_continue[i].start != 0; i++) {
+		if (c >= xid_continue[i].start && c <= xid_continue[i].end) {
+			return true;
+		}
+	}
+	return false;
+}
+
+static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
+	return (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) {
+	return (c >= 'a' && c <= 'z');
+}
+
+static _FORCE_INLINE_ bool is_digit(char32_t c) {
+	return (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
+	return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
+}
+
+static _FORCE_INLINE_ bool is_binary_digit(char32_t c) {
+	return (c == '0' || c == '1');
+}
+
+static _FORCE_INLINE_ bool is_ascii_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+}
+
+static _FORCE_INLINE_ bool is_symbol(char32_t c) {
+	return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
+}
+
+static _FORCE_INLINE_ bool is_control(char32_t p_char) {
+	return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
+}
+
+static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
+	return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
+}
+
+static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
+	return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
+}
+
+static _FORCE_INLINE_ bool is_punct(char32_t p_char) {
+	return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
+}
+
+static _FORCE_INLINE_ bool is_underscore(char32_t p_char) {
+	return (p_char == '_');
+}
+
+#endif // CHAR_UTILS_H
diff --git a/core/string/locales.h b/core/string/locales.h
new file mode 100644
index 0000000000..32d6608ec2
--- /dev/null
+++ b/core/string/locales.h
@@ -0,0 +1,1197 @@
+/*************************************************************************/
+/*  locales.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef LOCALES_H
+#define LOCALES_H
+
+// Windows has some weird locale identifiers which do not honor the ISO 639-1
+// standardized nomenclature. Whenever those don't conflict with existing ISO
+// identifiers, we override them.
+//
+// Reference:
+// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx
+
+static const char *locale_renames[][2] = {
+	{ "in", "id" }, //  Indonesian
+	{ "iw", "he" }, //  Hebrew
+	{ "no", "nb" }, //  Norwegian Bokmål
+	{ "C", "en" }, // Locale is not set, fallback to English.
+	{ nullptr, nullptr }
+};
+
+// Additional script information to preferred scripts.
+// Language code, script code, default country, supported countries.
+// Reference:
+// - https://lh.2xlibre.net/locales/
+// - https://www.localeplanet.com/icu/index.html
+// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
+
+static const char *locale_scripts[][4] = {
+	{ "az", "Latn", "", "AZ" },
+	{ "az", "Arab", "", "IR" },
+	{ "bs", "Latn", "", "BA" },
+	{ "ff", "Latn", "", "BF,CM,GH,GM,GN,GW,LR,MR,NE,NG,SL,SN" },
+	{ "pa", "Arab", "PK", "PK" },
+	{ "pa", "Guru", "IN", "IN" },
+	{ "sd", "Arab", "PK", "PK" },
+	{ "sd", "Deva", "IN", "IN" },
+	{ "shi", "Tfng", "", "MA" },
+	{ "sr", "Cyrl", "", "BA,RS,XK" },
+	{ "sr", "Latn", "", "ME" },
+	{ "uz", "Latn", "", "UZ" },
+	{ "uz", "Arab", "AF", "AF" },
+	{ "vai", "Vaii", "", "LR" },
+	{ "yue", "Hans", "CN", "CN" },
+	{ "yue", "Hant", "HK", "HK" },
+	{ "zh", "Hans", "CN", "CN,SG" },
+	{ "zh", "Hant", "TW", "HK,MO,TW" },
+	{ nullptr, nullptr, nullptr, nullptr }
+};
+
+// Additional mapping for outdated, temporary or exceptionally reserved country codes.
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+// - https://www.iso.org/obp/ui/#search/code/
+
+static const char *country_renames[][2] = {
+	{ "BU", "MM" }, // Burma, name changed to Myanmar.
+	{ "KV", "XK" }, // Kosovo (temporary FIPS code to European Commission code), no official ISO code assigned.
+	{ "TP", "TL" }, // East Timor, name changed to Timor-Leste.
+	{ "UK", "GB" }, // United Kingdom, exceptionally reserved code.
+	{ nullptr, nullptr }
+};
+
+// Country code, country name.
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+// - https://www.iso.org/obp/ui/#search/code/
+
+static const char *country_names[][2] = {
+	{ "AC", "Ascension Island" }, // Exceptionally reserved.
+	{ "AD", "Andorra" },
+	{ "AE", "United Arab Emirates" },
+	{ "AF", "Afghanistan" },
+	{ "AG", "Antigua and Barbuda" },
+	{ "AI", "Anguilla" },
+	{ "AL", "Albania" },
+	{ "AM", "Armenia" },
+	{ "AN", "Netherlands Antilles" }, // Transitionally reserved, divided into BQ, CW and SX.
+	{ "AO", "Angola" },
+	{ "AQ", "Antarctica" },
+	{ "AR", "Argentina" },
+	{ "AS", "American Samoa" },
+	{ "AT", "Austria" },
+	{ "AU", "Australia" },
+	{ "AW", "Aruba" },
+	{ "AX", "Åland Islands" },
+	{ "AZ", "Azerbaijan" },
+	{ "BA", "Bosnia and Herzegovina" },
+	{ "BB", "Barbados" },
+	{ "BD", "Bangladesh" },
+	{ "BE", "Belgium" },
+	{ "BF", "Burkina Faso" },
+	{ "BG", "Bulgaria" },
+	{ "BH", "Bahrain" },
+	{ "BI", "Burundi" },
+	{ "BJ", "Benin" },
+	{ "BL", "St. Barthélemy" },
+	{ "BM", "Bermuda" },
+	{ "BN", "Brunei" },
+	{ "BO", "Bolivia" },
+	{ "BQ", "Caribbean Netherlands" },
+	{ "BR", "Brazil" },
+	{ "BS", "Bahamas" },
+	{ "BT", "Bhutan" },
+	{ "BV", "Bouvet Island" },
+	{ "BW", "Botswana" },
+	{ "BY", "Belarus" },
+	{ "BZ", "Belize" },
+	{ "CA", "Canada" },
+	{ "CC", "Cocos (Keeling) Islands" },
+	{ "CD", "Congo - Kinshasa" },
+	{ "CF", "Central African Republic" },
+	{ "CG", "Congo - Brazzaville" },
+	{ "CH", "Switzerland" },
+	{ "CI", "Côte d'Ivoire" },
+	{ "CK", "Cook Islands" },
+	{ "CL", "Chile" },
+	{ "CM", "Cameroon" },
+	{ "CN", "China" },
+	{ "CO", "Colombia" },
+	{ "CP", "Clipperton Island" }, // Exceptionally reserved.
+	{ "CR", "Costa Rica" },
+	{ "CQ", "Island of Sark" }, // Exceptionally reserved.
+	{ "CU", "Cuba" },
+	{ "CV", "Cabo Verde" },
+	{ "CW", "Curaçao" },
+	{ "CX", "Christmas Island" },
+	{ "CY", "Cyprus" },
+	{ "CZ", "Czechia" },
+	{ "DE", "Germany" },
+	{ "DG", "Diego Garcia" }, // Exceptionally reserved.
+	{ "DJ", "Djibouti" },
+	{ "DK", "Denmark" },
+	{ "DM", "Dominica" },
+	{ "DO", "Dominican Republic" },
+	{ "DZ", "Algeria" },
+	{ "EA", "Ceuta and Melilla" }, // Exceptionally reserved.
+	{ "EC", "Ecuador" },
+	{ "EE", "Estonia" },
+	{ "EG", "Egypt" },
+	{ "EH", "Western Sahara" },
+	{ "ER", "Eritrea" },
+	{ "ES", "Spain" },
+	{ "ET", "Ethiopia" },
+	{ "EU", "European Union" }, // Exceptionally reserved.
+	{ "EZ", "Eurozone" }, // Exceptionally reserved.
+	{ "FI", "Finland" },
+	{ "FJ", "Fiji" },
+	{ "FK", "Falkland Islands" },
+	{ "FM", "Micronesia" },
+	{ "FO", "Faroe Islands" },
+	{ "FR", "France" },
+	{ "FX", "France, Metropolitan" }, // Exceptionally reserved.
+	{ "GA", "Gabon" },
+	{ "GB", "United Kingdom" },
+	{ "GD", "Grenada" },
+	{ "GE", "Georgia" },
+	{ "GF", "French Guiana" },
+	{ "GG", "Guernsey" },
+	{ "GH", "Ghana" },
+	{ "GI", "Gibraltar" },
+	{ "GL", "Greenland" },
+	{ "GM", "Gambia" },
+	{ "GN", "Guinea" },
+	{ "GP", "Guadeloupe" },
+	{ "GQ", "Equatorial Guinea" },
+	{ "GR", "Greece" },
+	{ "GS", "South Georgia and South Sandwich Islands" },
+	{ "GT", "Guatemala" },
+	{ "GU", "Guam" },
+	{ "GW", "Guinea-Bissau" },
+	{ "GY", "Guyana" },
+	{ "HK", "Hong Kong" },
+	{ "HM", "Heard Island and McDonald Islands" },
+	{ "HN", "Honduras" },
+	{ "HR", "Croatia" },
+	{ "HT", "Haiti" },
+	{ "HU", "Hungary" },
+	{ "IC", "Canary Islands" }, // Exceptionally reserved.
+	{ "ID", "Indonesia" },
+	{ "IE", "Ireland" },
+	{ "IL", "Israel" },
+	{ "IM", "Isle of Man" },
+	{ "IN", "India" },
+	{ "IO", "British Indian Ocean Territory" },
+	{ "IQ", "Iraq" },
+	{ "IR", "Iran" },
+	{ "IS", "Iceland" },
+	{ "IT", "Italy" },
+	{ "JE", "Jersey" },
+	{ "JM", "Jamaica" },
+	{ "JO", "Jordan" },
+	{ "JP", "Japan" },
+	{ "KE", "Kenya" },
+	{ "KG", "Kyrgyzstan" },
+	{ "KH", "Cambodia" },
+	{ "KI", "Kiribati" },
+	{ "KM", "Comoros" },
+	{ "KN", "St. Kitts and Nevis" },
+	{ "KP", "North Korea" },
+	{ "KR", "South Korea" },
+	{ "KW", "Kuwait" },
+	{ "KY", "Cayman Islands" },
+	{ "KZ", "Kazakhstan" },
+	{ "LA", "Laos" },
+	{ "LB", "Lebanon" },
+	{ "LC", "St. Lucia" },
+	{ "LI", "Liechtenstein" },
+	{ "LK", "Sri Lanka" },
+	{ "LR", "Liberia" },
+	{ "LS", "Lesotho" },
+	{ "LT", "Lithuania" },
+	{ "LU", "Luxembourg" },
+	{ "LV", "Latvia" },
+	{ "LY", "Libya" },
+	{ "MA", "Morocco" },
+	{ "MC", "Monaco" },
+	{ "MD", "Moldova" },
+	{ "ME", "Montenegro" },
+	{ "MF", "St. Martin" },
+	{ "MG", "Madagascar" },
+	{ "MH", "Marshall Islands" },
+	{ "MK", "North Macedonia" },
+	{ "ML", "Mali" },
+	{ "MM", "Myanmar" },
+	{ "MN", "Mongolia" },
+	{ "MO", "Macao" },
+	{ "MP", "Northern Mariana Islands" },
+	{ "MQ", "Martinique" },
+	{ "MR", "Mauritania" },
+	{ "MS", "Montserrat" },
+	{ "MT", "Malta" },
+	{ "MU", "Mauritius" },
+	{ "MV", "Maldives" },
+	{ "MW", "Malawi" },
+	{ "MX", "Mexico" },
+	{ "MY", "Malaysia" },
+	{ "MZ", "Mozambique" },
+	{ "NA", "Namibia" },
+	{ "NC", "New Caledonia" },
+	{ "NE", "Niger" },
+	{ "NF", "Norfolk Island" },
+	{ "NG", "Nigeria" },
+	{ "NI", "Nicaragua" },
+	{ "NL", "Netherlands" },
+	{ "NO", "Norway" },
+	{ "NP", "Nepal" },
+	{ "NR", "Nauru" },
+	{ "NU", "Niue" },
+	{ "NZ", "New Zealand" },
+	{ "OM", "Oman" },
+	{ "PA", "Panama" },
+	{ "PE", "Peru" },
+	{ "PF", "French Polynesia" },
+	{ "PG", "Papua New Guinea" },
+	{ "PH", "Philippines" },
+	{ "PK", "Pakistan" },
+	{ "PL", "Poland" },
+	{ "PM", "St. Pierre and Miquelon" },
+	{ "PN", "Pitcairn Islands" },
+	{ "PR", "Puerto Rico" },
+	{ "PS", "Palestine" },
+	{ "PT", "Portugal" },
+	{ "PW", "Palau" },
+	{ "PY", "Paraguay" },
+	{ "QA", "Qatar" },
+	{ "RE", "Réunion" },
+	{ "RO", "Romania" },
+	{ "RS", "Serbia" },
+	{ "RU", "Russia" },
+	{ "RW", "Rwanda" },
+	{ "SA", "Saudi Arabia" },
+	{ "SB", "Solomon Islands" },
+	{ "SC", "Seychelles" },
+	{ "SD", "Sudan" },
+	{ "SE", "Sweden" },
+	{ "SG", "Singapore" },
+	{ "SH", "St. Helena, Ascension and Tristan da Cunha" },
+	{ "SI", "Slovenia" },
+	{ "SJ", "Svalbard and Jan Mayen" },
+	{ "SK", "Slovakia" },
+	{ "SL", "Sierra Leone" },
+	{ "SM", "San Marino" },
+	{ "SN", "Senegal" },
+	{ "SO", "Somalia" },
+	{ "SR", "Suriname" },
+	{ "SS", "South Sudan" },
+	{ "ST", "Sao Tome and Principe" },
+	{ "SV", "El Salvador" },
+	{ "SX", "Sint Maarten" },
+	{ "SY", "Syria" },
+	{ "SZ", "Eswatini" },
+	{ "TA", "Tristan da Cunha" }, // Exceptionally reserved.
+	{ "TC", "Turks and Caicos Islands" },
+	{ "TD", "Chad" },
+	{ "TF", "French Southern Territories" },
+	{ "TG", "Togo" },
+	{ "TH", "Thailand" },
+	{ "TJ", "Tajikistan" },
+	{ "TK", "Tokelau" },
+	{ "TL", "Timor-Leste" },
+	{ "TM", "Turkmenistan" },
+	{ "TN", "Tunisia" },
+	{ "TO", "Tonga" },
+	{ "TR", "Turkey" },
+	{ "TT", "Trinidad and Tobago" },
+	{ "TV", "Tuvalu" },
+	{ "TW", "Taiwan" },
+	{ "TZ", "Tanzania" },
+	{ "UA", "Ukraine" },
+	{ "UG", "Uganda" },
+	{ "UM", "U.S. Outlying Islands" },
+	{ "US", "United States of America" },
+	{ "UY", "Uruguay" },
+	{ "UZ", "Uzbekistan" },
+	{ "VA", "Holy See" },
+	{ "VC", "St. Vincent and the Grenadines" },
+	{ "VE", "Venezuela" },
+	{ "VG", "British Virgin Islands" },
+	{ "VI", "U.S. Virgin Islands" },
+	{ "VN", "Viet Nam" },
+	{ "VU", "Vanuatu" },
+	{ "WF", "Wallis and Futuna" },
+	{ "WS", "Samoa" },
+	{ "XK", "Kosovo" }, // Temporary code, no official ISO code assigned.
+	{ "YE", "Yemen" },
+	{ "YT", "Mayotte" },
+	{ "ZA", "South Africa" },
+	{ "ZM", "Zambia" },
+	{ "ZW", "Zimbabwe" },
+	{ nullptr, nullptr }
+};
+
+// Languages code, language name.
+// Reference:
+// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+// - https://www.localeplanet.com/icu/index.html
+// - https://lh.2xlibre.net/locales/
+
+static const char *language_list[][2] = {
+	{ "aa", "Afar" },
+	{ "ab", "Abkhazian" },
+	{ "ace", "Achinese" },
+	{ "ach", "Acoli" },
+	{ "ada", "Adangme" },
+	{ "ady", "Adyghe" },
+	{ "ae", "Avestan" },
+	{ "aeb", "Tunisian Arabic" },
+	{ "af", "Afrikaans" },
+	{ "afh", "Afrihili" },
+	{ "agq", "Aghem" },
+	{ "ain", "Ainu" },
+	{ "agr", "Aguaruna" },
+	{ "ak", "Akan" },
+	{ "akk", "Akkadian" },
+	{ "akz", "Alabama" },
+	{ "ale", "Aleut" },
+	{ "aln", "Gheg Albanian" },
+	{ "alt", "Southern Altai" },
+	{ "am", "Amharic" },
+	{ "an", "Aragonese" },
+	{ "ang", "Old English" },
+	{ "anp", "Angika" },
+	{ "ar", "Arabic" },
+	{ "arc", "Aramaic" },
+	{ "arn", "Mapudungun" },
+	{ "aro", "Araona" },
+	{ "arp", "Arapaho" },
+	{ "arq", "Algerian Arabic" },
+	{ "ars", "Najdi Arabic" },
+	{ "arw", "Arawak" },
+	{ "ary", "Moroccan Arabic" },
+	{ "arz", "Egyptian Arabic" },
+	{ "as", "Assamese" },
+	{ "asa", "Asu" },
+	{ "ase", "American Sign Language" },
+	{ "ast", "Asturian" },
+	{ "av", "Avaric" },
+	{ "avk", "Kotava" },
+	{ "awa", "Awadhi" },
+	{ "ayc", "Southern Aymara" },
+	{ "ay", "Aymara" },
+	{ "az", "Azerbaijani" },
+	{ "ba", "Bashkir" },
+	{ "bal", "Baluchi" },
+	{ "ban", "Balinese" },
+	{ "bar", "Bavarian" },
+	{ "bas", "Bassa" },
+	{ "bax", "Bamun" },
+	{ "bbc", "Batak Toba" },
+	{ "bbj", "Ghomala" },
+	{ "be", "Belarusian" },
+	{ "bej", "Beja" },
+	{ "bem", "Bemba" },
+	{ "ber", "Berber" },
+	{ "bew", "Betawi" },
+	{ "bez", "Bena" },
+	{ "bfd", "Bafut" },
+	{ "bfq", "Badaga" },
+	{ "bg", "Bulgarian" },
+	{ "bhb", "Bhili" },
+	{ "bgn", "Western Balochi" },
+	{ "bho", "Bhojpuri" },
+	{ "bi", "Bislama" },
+	{ "bik", "Bikol" },
+	{ "bin", "Bini" },
+	{ "bjn", "Banjar" },
+	{ "bkm", "Kom" },
+	{ "bla", "Siksika" },
+	{ "bm", "Bambara" },
+	{ "bn", "Bengali" },
+	{ "bo", "Tibetan" },
+	{ "bpy", "Bishnupriya" },
+	{ "bqi", "Bakhtiari" },
+	{ "br", "Breton" },
+	{ "brh", "Brahui" },
+	{ "brx", "Bodo" },
+	{ "bs", "Bosnian" },
+	{ "bss", "Akoose" },
+	{ "bua", "Buriat" },
+	{ "bug", "Buginese" },
+	{ "bum", "Bulu" },
+	{ "byn", "Bilin" },
+	{ "byv", "Medumba" },
+	{ "ca", "Catalan" },
+	{ "cad", "Caddo" },
+	{ "car", "Carib" },
+	{ "cay", "Cayuga" },
+	{ "cch", "Atsam" },
+	{ "ccp", "Chakma" },
+	{ "ce", "Chechen" },
+	{ "ceb", "Cebuano" },
+	{ "cgg", "Chiga" },
+	{ "ch", "Chamorro" },
+	{ "chb", "Chibcha" },
+	{ "chg", "Chagatai" },
+	{ "chk", "Chuukese" },
+	{ "chm", "Mari" },
+	{ "chn", "Chinook Jargon" },
+	{ "cho", "Choctaw" },
+	{ "chp", "Chipewyan" },
+	{ "chr", "Cherokee" },
+	{ "chy", "Cheyenne" },
+	{ "cic", "Chickasaw" },
+	{ "ckb", "Central Kurdish" },
+	{ "csb", "Kashubian" },
+	{ "cmn", "Mandarin Chinese" },
+	{ "co", "Corsican" },
+	{ "cop", "Coptic" },
+	{ "cps", "Capiznon" },
+	{ "cr", "Cree" },
+	{ "crh", "Crimean Tatar" },
+	{ "crs", "Seselwa Creole French" },
+	{ "cs", "Czech" },
+	{ "csb", "Kashubian" },
+	{ "cu", "Church Slavic" },
+	{ "cv", "Chuvash" },
+	{ "cy", "Welsh" },
+	{ "da", "Danish" },
+	{ "dak", "Dakota" },
+	{ "dar", "Dargwa" },
+	{ "dav", "Taita" },
+	{ "de", "German" },
+	{ "del", "Delaware" },
+	{ "den", "Slave" },
+	{ "dgr", "Dogrib" },
+	{ "din", "Dinka" },
+	{ "dje", "Zarma" },
+	{ "doi", "Dogri" },
+	{ "dsb", "Lower Sorbian" },
+	{ "dtp", "Central Dusun" },
+	{ "dua", "Duala" },
+	{ "dum", "Middle Dutch" },
+	{ "dv", "Dhivehi" },
+	{ "dyo", "Jola-Fonyi" },
+	{ "dyu", "Dyula" },
+	{ "dz", "Dzongkha" },
+	{ "dzg", "Dazaga" },
+	{ "ebu", "Embu" },
+	{ "ee", "Ewe" },
+	{ "efi", "Efik" },
+	{ "egl", "Emilian" },
+	{ "egy", "Ancient Egyptian" },
+	{ "eka", "Ekajuk" },
+	{ "el", "Greek" },
+	{ "elx", "Elamite" },
+	{ "en", "English" },
+	{ "enm", "Middle English" },
+	{ "eo", "Esperanto" },
+	{ "es", "Spanish" },
+	{ "esu", "Central Yupik" },
+	{ "et", "Estonian" },
+	{ "eu", "Basque" },
+	{ "ewo", "Ewondo" },
+	{ "ext", "Extremaduran" },
+	{ "fa", "Persian" },
+	{ "fan", "Fang" },
+	{ "fat", "Fanti" },
+	{ "ff", "Fulah" },
+	{ "fi", "Finnish" },
+	{ "fil", "Filipino" },
+	{ "fit", "Tornedalen Finnish" },
+	{ "fj", "Fijian" },
+	{ "fo", "Faroese" },
+	{ "fon", "Fon" },
+	{ "fr", "French" },
+	{ "frc", "Cajun French" },
+	{ "frm", "Middle French" },
+	{ "fro", "Old French" },
+	{ "frp", "Arpitan" },
+	{ "frr", "Northern Frisian" },
+	{ "frs", "Eastern Frisian" },
+	{ "fur", "Friulian" },
+	{ "fy", "Western Frisian" },
+	{ "ga", "Irish" },
+	{ "gaa", "Ga" },
+	{ "gag", "Gagauz" },
+	{ "gan", "Gan Chinese" },
+	{ "gay", "Gayo" },
+	{ "gba", "Gbaya" },
+	{ "gbz", "Zoroastrian Dari" },
+	{ "gd", "Scottish Gaelic" },
+	{ "gez", "Geez" },
+	{ "gil", "Gilbertese" },
+	{ "gl", "Galician" },
+	{ "glk", "Gilaki" },
+	{ "gmh", "Middle High German" },
+	{ "gn", "Guarani" },
+	{ "goh", "Old High German" },
+	{ "gom", "Goan Konkani" },
+	{ "gon", "Gondi" },
+	{ "gor", "Gorontalo" },
+	{ "got", "Gothic" },
+	{ "grb", "Grebo" },
+	{ "grc", "Ancient Greek" },
+	{ "gsw", "Swiss German" },
+	{ "gu", "Gujarati" },
+	{ "guc", "Wayuu" },
+	{ "gur", "Frafra" },
+	{ "guz", "Gusii" },
+	{ "gv", "Manx" },
+	{ "gwi", "Gwichʼin" },
+	{ "ha", "Hausa" },
+	{ "hai", "Haida" },
+	{ "hak", "Hakka Chinese" },
+	{ "haw", "Hawaiian" },
+	{ "he", "Hebrew" },
+	{ "hi", "Hindi" },
+	{ "hif", "Fiji Hindi" },
+	{ "hil", "Hiligaynon" },
+	{ "hit", "Hittite" },
+	{ "hmn", "Hmong" },
+	{ "ho", "Hiri Motu" },
+	{ "hne", "Chhattisgarhi" },
+	{ "hr", "Croatian" },
+	{ "hsb", "Upper Sorbian" },
+	{ "hsn", "Xiang Chinese" },
+	{ "ht", "Haitian" },
+	{ "hu", "Hungarian" },
+	{ "hup", "Hupa" },
+	{ "hus", "Huastec" },
+	{ "hy", "Armenian" },
+	{ "hz", "Herero" },
+	{ "ia", "Interlingua" },
+	{ "iba", "Iban" },
+	{ "ibb", "Ibibio" },
+	{ "id", "Indonesian" },
+	{ "ie", "Interlingue" },
+	{ "ig", "Igbo" },
+	{ "ii", "Sichuan Yi" },
+	{ "ik", "Inupiaq" },
+	{ "ilo", "Iloko" },
+	{ "inh", "Ingush" },
+	{ "io", "Ido" },
+	{ "is", "Icelandic" },
+	{ "it", "Italian" },
+	{ "iu", "Inuktitut" },
+	{ "izh", "Ingrian" },
+	{ "ja", "Japanese" },
+	{ "jam", "Jamaican Creole English" },
+	{ "jbo", "Lojban" },
+	{ "jgo", "Ngomba" },
+	{ "jmc", "Machame" },
+	{ "jpr", "Judeo-Persian" },
+	{ "jrb", "Judeo-Arabic" },
+	{ "jut", "Jutish" },
+	{ "jv", "Javanese" },
+	{ "ka", "Georgian" },
+	{ "kaa", "Kara-Kalpak" },
+	{ "kab", "Kabyle" },
+	{ "kac", "Kachin" },
+	{ "kaj", "Jju" },
+	{ "kam", "Kamba" },
+	{ "kaw", "Kawi" },
+	{ "kbd", "Kabardian" },
+	{ "kbl", "Kanembu" },
+	{ "kcg", "Tyap" },
+	{ "kde", "Makonde" },
+	{ "kea", "Kabuverdianu" },
+	{ "ken", "Kenyang" },
+	{ "kfo", "Koro" },
+	{ "kg", "Kongo" },
+	{ "kgp", "Kaingang" },
+	{ "kha", "Khasi" },
+	{ "kho", "Khotanese" },
+	{ "khq", "Koyra Chiini" },
+	{ "khw", "Khowar" },
+	{ "ki", "Kikuyu" },
+	{ "kiu", "Kirmanjki" },
+	{ "kj", "Kuanyama" },
+	{ "kk", "Kazakh" },
+	{ "kkj", "Kako" },
+	{ "kl", "Kalaallisut" },
+	{ "kln", "Kalenjin" },
+	{ "km", "Central Khmer" },
+	{ "kmb", "Kimbundu" },
+	{ "kn", "Kannada" },
+	{ "ko", "Korean" },
+	{ "koi", "Komi-Permyak" },
+	{ "kok", "Konkani" },
+	{ "kos", "Kosraean" },
+	{ "kpe", "Kpelle" },
+	{ "kr", "Kanuri" },
+	{ "krc", "Karachay-Balkar" },
+	{ "kri", "Krio" },
+	{ "krj", "Kinaray-a" },
+	{ "krl", "Karelian" },
+	{ "kru", "Kurukh" },
+	{ "ks", "Kashmiri" },
+	{ "ksb", "Shambala" },
+	{ "ksf", "Bafia" },
+	{ "ksh", "Colognian" },
+	{ "ku", "Kurdish" },
+	{ "kum", "Kumyk" },
+	{ "kut", "Kutenai" },
+	{ "kv", "Komi" },
+	{ "kw", "Cornish" },
+	{ "ky", "Kirghiz" },
+	{ "lag", "Langi" },
+	{ "la", "Latin" },
+	{ "lad", "Ladino" },
+	{ "lag", "Langi" },
+	{ "lah", "Lahnda" },
+	{ "lam", "Lamba" },
+	{ "lb", "Luxembourgish" },
+	{ "lez", "Lezghian" },
+	{ "lfn", "Lingua Franca Nova" },
+	{ "lg", "Ganda" },
+	{ "li", "Limburgan" },
+	{ "lij", "Ligurian" },
+	{ "liv", "Livonian" },
+	{ "lkt", "Lakota" },
+	{ "lmo", "Lombard" },
+	{ "ln", "Lingala" },
+	{ "lo", "Lao" },
+	{ "lol", "Mongo" },
+	{ "lou", "Louisiana Creole" },
+	{ "loz", "Lozi" },
+	{ "lrc", "Northern Luri" },
+	{ "lt", "Lithuanian" },
+	{ "ltg", "Latgalian" },
+	{ "lu", "Luba-Katanga" },
+	{ "lua", "Luba-Lulua" },
+	{ "lui", "Luiseno" },
+	{ "lun", "Lunda" },
+	{ "luo", "Luo" },
+	{ "lus", "Mizo" },
+	{ "luy", "Luyia" },
+	{ "lv", "Latvian" },
+	{ "lzh", "Literary Chinese" },
+	{ "lzz", "Laz" },
+	{ "mad", "Madurese" },
+	{ "maf", "Mafa" },
+	{ "mag", "Magahi" },
+	{ "mai", "Maithili" },
+	{ "mak", "Makasar" },
+	{ "man", "Mandingo" },
+	{ "mas", "Masai" },
+	{ "mde", "Maba" },
+	{ "mdf", "Moksha" },
+	{ "mdr", "Mandar" },
+	{ "men", "Mende" },
+	{ "mer", "Meru" },
+	{ "mfe", "Morisyen" },
+	{ "mg", "Malagasy" },
+	{ "mga", "Middle Irish" },
+	{ "mgh", "Makhuwa-Meetto" },
+	{ "mgo", "Metaʼ" },
+	{ "mh", "Marshallese" },
+	{ "mhr", "Eastern Mari" },
+	{ "mi", "Māori" },
+	{ "mic", "Mi'kmaq" },
+	{ "min", "Minangkabau" },
+	{ "miq", "Mískito" },
+	{ "mjw", "Karbi" },
+	{ "mk", "Macedonian" },
+	{ "ml", "Malayalam" },
+	{ "mn", "Mongolian" },
+	{ "mnc", "Manchu" },
+	{ "mni", "Manipuri" },
+	{ "mnw", "Mon" },
+	{ "mos", "Mossi" },
+	{ "moh", "Mohawk" },
+	{ "mr", "Marathi" },
+	{ "mrj", "Western Mari" },
+	{ "ms", "Malay" },
+	{ "mt", "Maltese" },
+	{ "mua", "Mundang" },
+	{ "mus", "Muscogee" },
+	{ "mwl", "Mirandese" },
+	{ "mwr", "Marwari" },
+	{ "mwv", "Mentawai" },
+	{ "my", "Burmese" },
+	{ "mye", "Myene" },
+	{ "myv", "Erzya" },
+	{ "mzn", "Mazanderani" },
+	{ "na", "Nauru" },
+	{ "nah", "Nahuatl" },
+	{ "nan", "Min Nan Chinese" },
+	{ "nap", "Neapolitan" },
+	{ "naq", "Nama" },
+	{ "nan", "Min Nan Chinese" },
+	{ "nb", "Norwegian Bokmål" },
+	{ "nd", "North Ndebele" },
+	{ "nds", "Low German" },
+	{ "ne", "Nepali" },
+	{ "new", "Newari" },
+	{ "nhn", "Central Nahuatl" },
+	{ "ng", "Ndonga" },
+	{ "nia", "Nias" },
+	{ "niu", "Niuean" },
+	{ "njo", "Ao Naga" },
+	{ "nl", "Dutch" },
+	{ "nmg", "Kwasio" },
+	{ "nn", "Norwegian Nynorsk" },
+	{ "nnh", "Ngiemboon" },
+	{ "nog", "Nogai" },
+	{ "non", "Old Norse" },
+	{ "nov", "Novial" },
+	{ "nqo", "N'ko" },
+	{ "nr", "South Ndebele" },
+	{ "nso", "Pedi" },
+	{ "nus", "Nuer" },
+	{ "nv", "Navajo" },
+	{ "nwc", "Classical Newari" },
+	{ "ny", "Nyanja" },
+	{ "nym", "Nyamwezi" },
+	{ "nyn", "Nyankole" },
+	{ "nyo", "Nyoro" },
+	{ "nzi", "Nzima" },
+	{ "oc", "Occitan" },
+	{ "oj", "Ojibwa" },
+	{ "om", "Oromo" },
+	{ "or", "Odia" },
+	{ "os", "Ossetic" },
+	{ "osa", "Osage" },
+	{ "ota", "Ottoman Turkish" },
+	{ "pa", "Panjabi" },
+	{ "pag", "Pangasinan" },
+	{ "pal", "Pahlavi" },
+	{ "pam", "Pampanga" },
+	{ "pap", "Papiamento" },
+	{ "pau", "Palauan" },
+	{ "pcd", "Picard" },
+	{ "pcm", "Nigerian Pidgin" },
+	{ "pdc", "Pennsylvania German" },
+	{ "pdt", "Plautdietsch" },
+	{ "peo", "Old Persian" },
+	{ "pfl", "Palatine German" },
+	{ "phn", "Phoenician" },
+	{ "pi", "Pali" },
+	{ "pl", "Polish" },
+	{ "pms", "Piedmontese" },
+	{ "pnt", "Pontic" },
+	{ "pon", "Pohnpeian" },
+	{ "pr", "Pirate" },
+	{ "prg", "Prussian" },
+	{ "pro", "Old Provençal" },
+	{ "prs", "Dari" },
+	{ "ps", "Pushto" },
+	{ "pt", "Portuguese" },
+	{ "qu", "Quechua" },
+	{ "quc", "K'iche" },
+	{ "qug", "Chimborazo Highland Quichua" },
+	{ "quy", "Ayacucho Quechua" },
+	{ "quz", "Cusco Quechua" },
+	{ "raj", "Rajasthani" },
+	{ "rap", "Rapanui" },
+	{ "rar", "Rarotongan" },
+	{ "rgn", "Romagnol" },
+	{ "rif", "Riffian" },
+	{ "rm", "Romansh" },
+	{ "rn", "Rundi" },
+	{ "ro", "Romanian" },
+	{ "rof", "Rombo" },
+	{ "rom", "Romany" },
+	{ "rtm", "Rotuman" },
+	{ "ru", "Russian" },
+	{ "rue", "Rusyn" },
+	{ "rug", "Roviana" },
+	{ "rup", "Aromanian" },
+	{ "rw", "Kinyarwanda" },
+	{ "rwk", "Rwa" },
+	{ "sa", "Sanskrit" },
+	{ "sad", "Sandawe" },
+	{ "sah", "Sakha" },
+	{ "sam", "Samaritan Aramaic" },
+	{ "saq", "Samburu" },
+	{ "sas", "Sasak" },
+	{ "sat", "Santali" },
+	{ "saz", "Saurashtra" },
+	{ "sba", "Ngambay" },
+	{ "sbp", "Sangu" },
+	{ "sc", "Sardinian" },
+	{ "scn", "Sicilian" },
+	{ "sco", "Scots" },
+	{ "sd", "Sindhi" },
+	{ "sdc", "Sassarese Sardinian" },
+	{ "sdh", "Southern Kurdish" },
+	{ "se", "Northern Sami" },
+	{ "see", "Seneca" },
+	{ "seh", "Sena" },
+	{ "sei", "Seri" },
+	{ "sel", "Selkup" },
+	{ "ses", "Koyraboro Senni" },
+	{ "sg", "Sango" },
+	{ "sga", "Old Irish" },
+	{ "sgs", "Samogitian" },
+	{ "sh", "Serbo-Croatian" },
+	{ "shi", "Tachelhit" },
+	{ "shn", "Shan" },
+	{ "shs", "Shuswap" },
+	{ "shu", "Chadian Arabic" },
+	{ "si", "Sinhala" },
+	{ "sid", "Sidamo" },
+	{ "sk", "Slovak" },
+	{ "sl", "Slovenian" },
+	{ "sli", "Lower Silesian" },
+	{ "sly", "Selayar" },
+	{ "sm", "Samoan" },
+	{ "sma", "Southern Sami" },
+	{ "smj", "Lule Sami" },
+	{ "smn", "Inari Sami" },
+	{ "sms", "Skolt Sami" },
+	{ "sn", "Shona" },
+	{ "snk", "Soninke" },
+	{ "so", "Somali" },
+	{ "sog", "Sogdien" },
+	{ "son", "Songhai" },
+	{ "sq", "Albanian" },
+	{ "sr", "Serbian" },
+	{ "srn", "Sranan Tongo" },
+	{ "srr", "Serer" },
+	{ "ss", "Swati" },
+	{ "ssy", "Saho" },
+	{ "st", "Southern Sotho" },
+	{ "stq", "Saterland Frisian" },
+	{ "su", "Sundanese" },
+	{ "suk", "Sukuma" },
+	{ "sus", "Susu" },
+	{ "sux", "Sumerian" },
+	{ "sv", "Swedish" },
+	{ "sw", "Swahili" },
+	{ "swb", "Comorian" },
+	{ "swc", "Congo Swahili" },
+	{ "syc", "Classical Syriac" },
+	{ "syr", "Syriac" },
+	{ "szl", "Silesian" },
+	{ "ta", "Tamil" },
+	{ "tcy", "Tulu" },
+	{ "te", "Telugu" },
+	{ "tem", "Timne" },
+	{ "teo", "Teso" },
+	{ "ter", "Tereno" },
+	{ "tet", "Tetum" },
+	{ "tg", "Tajik" },
+	{ "th", "Thai" },
+	{ "the", "Chitwania Tharu" },
+	{ "ti", "Tigrinya" },
+	{ "tig", "Tigre" },
+	{ "tiv", "Tiv" },
+	{ "tk", "Turkmen" },
+	{ "tkl", "Tokelau" },
+	{ "tkr", "Tsakhur" },
+	{ "tl", "Tagalog" },
+	{ "tlh", "Klingon" },
+	{ "tli", "Tlingit" },
+	{ "tly", "Talysh" },
+	{ "tmh", "Tamashek" },
+	{ "tn", "Tswana" },
+	{ "to", "Tongan" },
+	{ "tog", "Nyasa Tonga" },
+	{ "tpi", "Tok Pisin" },
+	{ "tr", "Turkish" },
+	{ "tru", "Turoyo" },
+	{ "trv", "Taroko" },
+	{ "ts", "Tsonga" },
+	{ "tsd", "Tsakonian" },
+	{ "tsi", "Tsimshian" },
+	{ "tt", "Tatar" },
+	{ "ttt", "Muslim Tat" },
+	{ "tum", "Tumbuka" },
+	{ "tvl", "Tuvalu" },
+	{ "tw", "Twi" },
+	{ "twq", "Tasawaq" },
+	{ "ty", "Tahitian" },
+	{ "tyv", "Tuvinian" },
+	{ "tzm", "Central Atlas Tamazight" },
+	{ "udm", "Udmurt" },
+	{ "ug", "Uyghur" },
+	{ "uga", "Ugaritic" },
+	{ "uk", "Ukrainian" },
+	{ "umb", "Umbundu" },
+	{ "unm", "Unami" },
+	{ "ur", "Urdu" },
+	{ "uz", "Uzbek" },
+	{ "vai", "Vai" },
+	{ "ve", "Venda" },
+	{ "vec", "Venetian" },
+	{ "vep", "Veps" },
+	{ "vi", "Vietnamese" },
+	{ "vls", "West Flemish" },
+	{ "vmf", "Main-Franconian" },
+	{ "vo", "Volapük" },
+	{ "vot", "Votic" },
+	{ "vro", "Võro" },
+	{ "vun", "Vunjo" },
+	{ "wa", "Walloon" },
+	{ "wae", "Walser" },
+	{ "wal", "Wolaytta" },
+	{ "war", "Waray" },
+	{ "was", "Washo" },
+	{ "wbp", "Warlpiri" },
+	{ "wo", "Wolof" },
+	{ "wuu", "Wu Chinese" },
+	{ "xal", "Kalmyk" },
+	{ "xh", "Xhosa" },
+	{ "xmf", "Mingrelian" },
+	{ "xog", "Soga" },
+	{ "yao", "Yao" },
+	{ "yap", "Yapese" },
+	{ "yav", "Yangben" },
+	{ "ybb", "Yemba" },
+	{ "yi", "Yiddish" },
+	{ "yo", "Yoruba" },
+	{ "yrl", "Nheengatu" },
+	{ "yue", "Yue Chinese" },
+	{ "yuw", "Papua New Guinea" },
+	{ "za", "Zhuang" },
+	{ "zap", "Zapotec" },
+	{ "zbl", "Blissymbols" },
+	{ "zea", "Zeelandic" },
+	{ "zen", "Zenaga" },
+	{ "zgh", "Standard Moroccan Tamazight" },
+	{ "zh", "Chinese" },
+	{ "zu", "Zulu" },
+	{ "zun", "Zuni" },
+	{ "zza", "Zaza" },
+	{ nullptr, nullptr }
+};
+
+// Additional regional variants.
+// Variant name, supported languages.
+
+static const char *locale_variants[][2] = {
+	{ "valencia", "ca" },
+	{ "iqtelif", "tt" },
+	{ "saaho", "aa" },
+	{ "tradnl", "es" },
+	{ nullptr, nullptr },
+};
+
+// Script names and codes (excludes typographic variants, special codes, reserved codes and aliases for combined scripts).
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_15924
+
+static const char *script_list[][2] = {
+	{ "Adlam", "Adlm" },
+	{ "Afaka", "Afak" },
+	{ "Caucasian Albanian", "Aghb" },
+	{ "Ahom", "Ahom" },
+	{ "Arabic", "Arab" },
+	{ "Imperial Aramaic", "Armi" },
+	{ "Armenian", "Armn" },
+	{ "Avestan", "Avst" },
+	{ "Balinese", "Bali" },
+	{ "Bamum", "Bamu" },
+	{ "Bassa Vah", "Bass" },
+	{ "Batak", "Batk" },
+	{ "Bengali", "Beng" },
+	{ "Bhaiksuki", "Bhks" },
+	{ "Blissymbols", "Blis" },
+	{ "Bopomofo", "Bopo" },
+	{ "Brahmi", "Brah" },
+	{ "Braille", "Brai" },
+	{ "Buginese", "Bugi" },
+	{ "Buhid", "Buhd" },
+	{ "Chakma", "Cakm" },
+	{ "Unified Canadian Aboriginal", "Cans" },
+	{ "Carian", "Cari" },
+	{ "Cham", "Cham" },
+	{ "Cherokee", "Cher" },
+	{ "Chorasmian", "Chrs" },
+	{ "Cirth", "Cirt" },
+	{ "Coptic", "Copt" },
+	{ "Cypro-Minoan", "Cpmn" },
+	{ "Cypriot", "Cprt" },
+	{ "Cyrillic", "Cyrl" },
+	{ "Devanagari", "Deva" },
+	{ "Dives Akuru", "Diak" },
+	{ "Dogra", "Dogr" },
+	{ "Deseret", "Dsrt" },
+	{ "Duployan", "Dupl" },
+	{ "Egyptian demotic", "Egyd" },
+	{ "Egyptian hieratic", "Egyh" },
+	{ "Egyptian hieroglyphs", "Egyp" },
+	{ "Elbasan", "Elba" },
+	{ "Elymaic", "Elym" },
+	{ "Ethiopic", "Ethi" },
+	{ "Khutsuri", "Geok" },
+	{ "Georgian", "Geor" },
+	{ "Glagolitic", "Glag" },
+	{ "Gunjala Gondi", "Gong" },
+	{ "Masaram Gondi", "Gonm" },
+	{ "Gothic", "Goth" },
+	{ "Grantha", "Gran" },
+	{ "Greek", "Grek" },
+	{ "Gujarati", "Gujr" },
+	{ "Gurmukhi", "Guru" },
+	{ "Hangul", "Hang" },
+	{ "Han", "Hani" },
+	{ "Hanunoo", "Hano" },
+	{ "Simplified", "Hans" },
+	{ "Traditional", "Hant" },
+	{ "Hatran", "Hatr" },
+	{ "Hebrew", "Hebr" },
+	{ "Hiragana", "Hira" },
+	{ "Anatolian Hieroglyphs", "Hluw" },
+	{ "Pahawh Hmong", "Hmng" },
+	{ "Nyiakeng Puachue Hmong", "Hmnp" },
+	{ "Old Hungarian", "Hung" },
+	{ "Indus", "Inds" },
+	{ "Old Italic", "Ital" },
+	{ "Javanese", "Java" },
+	{ "Jurchen", "Jurc" },
+	{ "Kayah Li", "Kali" },
+	{ "Katakana", "Kana" },
+	{ "Kharoshthi", "Khar" },
+	{ "Khmer", "Khmr" },
+	{ "Khojki", "Khoj" },
+	{ "Khitan large script", "Kitl" },
+	{ "Khitan small script", "Kits" },
+	{ "Kannada", "Knda" },
+	{ "Kpelle", "Kpel" },
+	{ "Kaithi", "Kthi" },
+	{ "Tai Tham", "Lana" },
+	{ "Lao", "Laoo" },
+	{ "Latin", "Latn" },
+	{ "Leke", "Leke" },
+	{ "Lepcha", "Lepc" },
+	{ "Limbu", "Limb" },
+	{ "Linear A", "Lina" },
+	{ "Linear B", "Linb" },
+	{ "Lisu", "Lisu" },
+	{ "Loma", "Loma" },
+	{ "Lycian", "Lyci" },
+	{ "Lydian", "Lydi" },
+	{ "Mahajani", "Mahj" },
+	{ "Makasar", "Maka" },
+	{ "Mandaic", "Mand" },
+	{ "Manichaean", "Mani" },
+	{ "Marchen", "Marc" },
+	{ "Mayan Hieroglyphs", "Maya" },
+	{ "Medefaidrin", "Medf" },
+	{ "Mende Kikakui", "Mend" },
+	{ "Meroitic Cursive", "Merc" },
+	{ "Meroitic Hieroglyphs", "Mero" },
+	{ "Malayalam", "Mlym" },
+	{ "Modi", "Modi" },
+	{ "Mongolian", "Mong" },
+	{ "Moon", "Moon" },
+	{ "Mro", "Mroo" },
+	{ "Meitei Mayek", "Mtei" },
+	{ "Multani", "Mult" },
+	{ "Myanmar (Burmese)", "Mymr" },
+	{ "Nandinagari", "Nand" },
+	{ "Old North Arabian", "Narb" },
+	{ "Nabataean", "Nbat" },
+	{ "Newa", "Newa" },
+	{ "Naxi Dongba", "Nkdb" },
+	{ "Nakhi Geba", "Nkgb" },
+	{ "N'ko", "Nkoo" },
+	{ "Nüshu", "Nshu" },
+	{ "Ogham", "Ogam" },
+	{ "Ol Chiki", "Olck" },
+	{ "Old Turkic", "Orkh" },
+	{ "Oriya", "Orya" },
+	{ "Osage", "Osge" },
+	{ "Osmanya", "Osma" },
+	{ "Old Uyghur", "Ougr" },
+	{ "Palmyrene", "Palm" },
+	{ "Pau Cin Hau", "Pauc" },
+	{ "Proto-Cuneiform", "Pcun" },
+	{ "Proto-Elamite", "Pelm" },
+	{ "Old Permic", "Perm" },
+	{ "Phags-pa", "Phag" },
+	{ "Inscriptional Pahlavi", "Phli" },
+	{ "Psalter Pahlavi", "Phlp" },
+	{ "Book Pahlavi", "Phlv" },
+	{ "Phoenician", "Phnx" },
+	{ "Klingon", "Piqd" },
+	{ "Miao", "Plrd" },
+	{ "Inscriptional Parthian", "Prti" },
+	{ "Proto-Sinaitic", "Psin" },
+	{ "Ranjana", "Ranj" },
+	{ "Rejang", "Rjng" },
+	{ "Hanifi Rohingya", "Rohg" },
+	{ "Rongorongo", "Roro" },
+	{ "Runic", "Runr" },
+	{ "Samaritan", "Samr" },
+	{ "Sarati", "Sara" },
+	{ "Old South Arabian", "Sarb" },
+	{ "Saurashtra", "Saur" },
+	{ "SignWriting", "Sgnw" },
+	{ "Shavian", "Shaw" },
+	{ "Sharada", "Shrd" },
+	{ "Shuishu", "Shui" },
+	{ "Siddham", "Sidd" },
+	{ "Khudawadi", "Sind" },
+	{ "Sinhala", "Sinh" },
+	{ "Sogdian", "Sogd" },
+	{ "Old Sogdian", "Sogo" },
+	{ "Sora Sompeng", "Sora" },
+	{ "Soyombo", "Soyo" },
+	{ "Sundanese", "Sund" },
+	{ "Syloti Nagri", "Sylo" },
+	{ "Syriac", "Syrc" },
+	{ "Tagbanwa", "Tagb" },
+	{ "Takri", "Takr" },
+	{ "Tai Le", "Tale" },
+	{ "New Tai Lue", "Talu" },
+	{ "Tamil", "Taml" },
+	{ "Tangut", "Tang" },
+	{ "Tai Viet", "Tavt" },
+	{ "Telugu", "Telu" },
+	{ "Tengwar", "Teng" },
+	{ "Tifinagh", "Tfng" },
+	{ "Tagalog", "Tglg" },
+	{ "Thaana", "Thaa" },
+	{ "Thai", "Thai" },
+	{ "Tibetan", "Tibt" },
+	{ "Tirhuta", "Tirh" },
+	{ "Tangsa", "Tnsa" },
+	{ "Toto", "Toto" },
+	{ "Ugaritic", "Ugar" },
+	{ "Vai", "Vaii" },
+	{ "Visible Speech", "Visp" },
+	{ "Vithkuqi", "Vith" },
+	{ "Warang Citi", "Wara" },
+	{ "Wancho", "Wcho" },
+	{ "Woleai", "Wole" },
+	{ "Old Persian", "Xpeo" },
+	{ "Cuneiform", "Xsux" },
+	{ "Yezidi", "Yezi" },
+	{ "Yi", "Yiii" },
+	{ "Zanabazar Square", "Zanb" },
+	{ nullptr, nullptr }
+};
+
+#endif // LOCALES_H
diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp
index 5fae13779e..30fa434fad 100644
--- a/core/string/node_path.cpp
+++ b/core/string/node_path.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -199,6 +199,21 @@ Vector<StringName> NodePath::get_subnames() const {
 	return Vector<StringName>();
 }
 
+StringName NodePath::get_concatenated_names() const {
+	ERR_FAIL_COND_V(!data, StringName());
+
+	if (!data->concatenated_path) {
+		int pc = data->path.size();
+		String concatenated;
+		const StringName *sn = data->path.ptr();
+		for (int i = 0; i < pc; i++) {
+			concatenated += i == 0 ? sn[i].operator String() : "/" + sn[i];
+		}
+		data->concatenated_path = concatenated;
+	}
+	return data->concatenated_path;
+}
+
 StringName NodePath::get_concatenated_subnames() const {
 	ERR_FAIL_COND_V(!data, StringName());
 
@@ -293,12 +308,12 @@ void NodePath::simplify() {
 			break;
 		}
 		if (data->path[i].operator String() == ".") {
-			data->path.remove(i);
+			data->path.remove_at(i);
 			i--;
 		} else if (i > 0 && data->path[i].operator String() == ".." && data->path[i - 1].operator String() != "." && data->path[i - 1].operator String() != "..") {
 			//remove both
-			data->path.remove(i - 1);
-			data->path.remove(i - 1);
+			data->path.remove_at(i - 1);
+			data->path.remove_at(i - 1);
 			i -= 2;
 			if (data->path.size() == 0) {
 				data->path.push_back(".");
@@ -368,7 +383,7 @@ NodePath::NodePath(const String &p_path) {
 		for (int i = from; i <= path.length(); i++) {
 			if (path[i] == ':' || path[i] == 0) {
 				String str = path.substr(from, i - from);
-				if (str == "") {
+				if (str.is_empty()) {
 					if (path[i] == 0) {
 						continue; // Allow end-of-path :
 					}
diff --git a/core/string/node_path.h b/core/string/node_path.h
index a277ab26fa..2bce33e21e 100644
--- a/core/string/node_path.h
+++ b/core/string/node_path.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -39,6 +39,7 @@ class NodePath {
 		SafeRefCount refcount;
 		Vector<StringName> path;
 		Vector<StringName> subpath;
+		StringName concatenated_path;
 		StringName concatenated_subpath;
 		bool absolute;
 		bool has_slashes;
@@ -59,6 +60,7 @@ public:
 	StringName get_subname(int p_idx) const;
 	Vector<StringName> get_names() const;
 	Vector<StringName> get_subnames() const;
+	StringName get_concatenated_names() const;
 	StringName get_concatenated_subnames() const;
 
 	NodePath rel_path_to(const NodePath &p_np) const;
diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp
index f8be564740..07302cc8c3 100644
--- a/core/string/optimized_translation.cpp
+++ b/core/string/optimized_translation.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -37,9 +37,9 @@ extern "C" {
 }
 
 struct CompressedString {
-	int orig_len;
+	int orig_len = 0;
 	CharString compressed;
-	int offset;
+	int offset = 0;
 };
 
 void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
@@ -53,7 +53,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
 	int size = Math::larger_prime(keys.size());
 
 	Vector<Vector<Pair<int, CharString>>> buckets;
-	Vector<Map<uint32_t, int>> table;
+	Vector<HashMap<uint32_t, int>> table;
 	Vector<uint32_t> hfunc_table;
 	Vector<CompressedString> compressed;
 
@@ -108,7 +108,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
 
 	for (int i = 0; i < size; i++) {
 		const Vector<Pair<int, CharString>> &b = buckets[i];
-		Map<uint32_t, int> &t = table.write[i];
+		HashMap<uint32_t, int> &t = table.write[i];
 
 		if (b.size() == 0) {
 			continue;
@@ -147,7 +147,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
 	int btindex = 0;
 
 	for (int i = 0; i < size; i++) {
-		const Map<uint32_t, int> &t = table[i];
+		const HashMap<uint32_t, int> &t = table[i];
 		if (t.size() == 0) {
 			htw[i] = 0xFFFFFFFF; //nothing
 			continue;
diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h
index bccf932383..f3dbfe8f5c 100644
--- a/core/string/optimized_translation.h
+++ b/core/string/optimized_translation.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -38,7 +38,7 @@ class OptimizedTranslation : public Translation {
 
 	//this translation uses a sort of modified perfect hash algorithm
 	//it requires hashing strings twice and then does a binary search,
-	//so it's slower, but at the same time it has an extreemly high chance
+	//so it's slower, but at the same time it has an extremely high chance
 	//of catching untranslated strings
 
 	//load/store friendly types
diff --git a/core/string/print_string.cpp b/core/string/print_string.cpp
index 345371d733..592da58fe7 100644
--- a/core/string/print_string.cpp
+++ b/core/string/print_string.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -30,13 +30,12 @@
 
 #include "print_string.h"
 
+#include "core/core_globals.h"
 #include "core/os/os.h"
 
 #include <stdio.h>
 
 static PrintHandlerList *print_handler_list = nullptr;
-bool _print_line_enabled = true;
-bool _print_error_enabled = true;
 
 void add_print_handler(PrintHandlerList *p_handler) {
 	_global_lock();
@@ -45,7 +44,7 @@ void add_print_handler(PrintHandlerList *p_handler) {
 	_global_unlock();
 }
 
-void remove_print_handler(PrintHandlerList *p_handler) {
+void remove_print_handler(const PrintHandlerList *p_handler) {
 	_global_lock();
 
 	PrintHandlerList *prev = nullptr;
@@ -69,8 +68,8 @@ void remove_print_handler(PrintHandlerList *p_handler) {
 	ERR_FAIL_COND(l == nullptr);
 }
 
-void print_line(String p_string) {
-	if (!_print_line_enabled) {
+void __print_line(String p_string) {
+	if (!CoreGlobals::print_line_enabled) {
 		return;
 	}
 
@@ -79,7 +78,98 @@ void print_line(String p_string) {
 	_global_lock();
 	PrintHandlerList *l = print_handler_list;
 	while (l) {
-		l->printfunc(l->userdata, p_string, false);
+		l->printfunc(l->userdata, p_string, false, false);
+		l = l->next;
+	}
+
+	_global_unlock();
+}
+
+void __print_line_rich(String p_string) {
+	if (!CoreGlobals::print_line_enabled) {
+		return;
+	}
+
+	// Convert a subset of BBCode tags to ANSI escape codes for correct display in the terminal.
+	// Support of those ANSI escape codes varies across terminal emulators,
+	// especially for italic and strikethrough.
+	String p_string_ansi = p_string;
+
+	p_string_ansi = p_string_ansi.replace("[b]", "\u001b[1m");
+	p_string_ansi = p_string_ansi.replace("[/b]", "\u001b[22m");
+	p_string_ansi = p_string_ansi.replace("[i]", "\u001b[3m");
+	p_string_ansi = p_string_ansi.replace("[/i]", "\u001b[23m");
+	p_string_ansi = p_string_ansi.replace("[u]", "\u001b[4m");
+	p_string_ansi = p_string_ansi.replace("[/u]", "\u001b[24m");
+	p_string_ansi = p_string_ansi.replace("[s]", "\u001b[9m");
+	p_string_ansi = p_string_ansi.replace("[/s]", "\u001b[29m");
+
+	p_string_ansi = p_string_ansi.replace("[indent]", "    ");
+	p_string_ansi = p_string_ansi.replace("[/indent]", "");
+	p_string_ansi = p_string_ansi.replace("[code]", "\u001b[2m");
+	p_string_ansi = p_string_ansi.replace("[/code]", "\u001b[22m");
+	p_string_ansi = p_string_ansi.replace("[url]", "");
+	p_string_ansi = p_string_ansi.replace("[/url]", "");
+	p_string_ansi = p_string_ansi.replace("[center]", "\n\t\t\t");
+	p_string_ansi = p_string_ansi.replace("[/center]", "");
+	p_string_ansi = p_string_ansi.replace("[right]", "\n\t\t\t\t\t\t");
+	p_string_ansi = p_string_ansi.replace("[/right]", "");
+
+	if (p_string_ansi.contains("[color")) {
+		p_string_ansi = p_string_ansi.replace("[color=black]", "\u001b[30m");
+		p_string_ansi = p_string_ansi.replace("[color=red]", "\u001b[91m");
+		p_string_ansi = p_string_ansi.replace("[color=green]", "\u001b[92m");
+		p_string_ansi = p_string_ansi.replace("[color=lime]", "\u001b[92m");
+		p_string_ansi = p_string_ansi.replace("[color=yellow]", "\u001b[93m");
+		p_string_ansi = p_string_ansi.replace("[color=blue]", "\u001b[94m");
+		p_string_ansi = p_string_ansi.replace("[color=magenta]", "\u001b[95m");
+		p_string_ansi = p_string_ansi.replace("[color=pink]", "\u001b[38;5;218m");
+		p_string_ansi = p_string_ansi.replace("[color=purple]", "\u001b[38;5;98m");
+		p_string_ansi = p_string_ansi.replace("[color=cyan]", "\u001b[96m");
+		p_string_ansi = p_string_ansi.replace("[color=white]", "\u001b[97m");
+		p_string_ansi = p_string_ansi.replace("[color=orange]", "\u001b[38;5;208m");
+		p_string_ansi = p_string_ansi.replace("[color=gray]", "\u001b[90m");
+		p_string_ansi = p_string_ansi.replace("[/color]", "\u001b[39m");
+	}
+	if (p_string_ansi.contains("[bgcolor")) {
+		p_string_ansi = p_string_ansi.replace("[bgcolor=black]", "\u001b[40m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=red]", "\u001b[101m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=green]", "\u001b[102m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=lime]", "\u001b[102m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=yellow]", "\u001b[103m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=blue]", "\u001b[104m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=magenta]", "\u001b[105m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=pink]", "\u001b[48;5;218m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=purple]", "\u001b[48;5;98m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=cyan]", "\u001b[106m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=white]", "\u001b[107m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=orange]", "\u001b[48;5;208m");
+		p_string_ansi = p_string_ansi.replace("[bgcolor=gray]", "\u001b[100m");
+		p_string_ansi = p_string_ansi.replace("[/bgcolor]", "\u001b[49m");
+	}
+	if (p_string_ansi.contains("[fgcolor")) {
+		p_string_ansi = p_string_ansi.replace("[fgcolor=black]", "\u001b[30;40m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=red]", "\u001b[91;101m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=green]", "\u001b[92;102m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=lime]", "\u001b[92;102m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=yellow]", "\u001b[93;103m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=blue]", "\u001b[94;104m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=magenta]", "\u001b[95;105m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=pink]", "\u001b[38;5;218;48;5;218m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=purple]", "\u001b[38;5;98;48;5;98m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=cyan]", "\u001b[96;106m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=white]", "\u001b[97;107m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=orange]", "\u001b[38;5;208;48;5;208m");
+		p_string_ansi = p_string_ansi.replace("[fgcolor=gray]", "\u001b[90;100m");
+		p_string_ansi = p_string_ansi.replace("[/fgcolor]", "\u001b[39;49m");
+	}
+
+	OS::get_singleton()->print_rich("%s\n", p_string_ansi.utf8().get_data());
+
+	_global_lock();
+	PrintHandlerList *l = print_handler_list;
+	while (l) {
+		l->printfunc(l->userdata, p_string, false, true);
 		l = l->next;
 	}
 
@@ -87,7 +177,7 @@ void print_line(String p_string) {
 }
 
 void print_error(String p_string) {
-	if (!_print_error_enabled) {
+	if (!CoreGlobals::print_error_enabled) {
 		return;
 	}
 
@@ -96,7 +186,7 @@ void print_error(String p_string) {
 	_global_lock();
 	PrintHandlerList *l = print_handler_list;
 	while (l) {
-		l->printfunc(l->userdata, p_string, true);
+		l->printfunc(l->userdata, p_string, true, false);
 		l = l->next;
 	}
 
@@ -108,3 +198,7 @@ void print_verbose(String p_string) {
 		print_line(p_string);
 	}
 }
+
+String stringify_variants(Variant p_var) {
+	return p_var.operator String();
+}
diff --git a/core/string/print_string.h b/core/string/print_string.h
index 1a9ff1efd6..ca930a3a0f 100644
--- a/core/string/print_string.h
+++ b/core/string/print_string.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -31,11 +31,11 @@
 #ifndef PRINT_STRING_H
 #define PRINT_STRING_H
 
-#include "core/string/ustring.h"
+#include "core/variant/variant.h"
 
 extern void (*_print_func)(String);
 
-typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error);
+typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error, bool p_rich);
 
 struct PrintHandlerList {
 	PrintHandlerFunc printfunc = nullptr;
@@ -46,13 +46,37 @@ struct PrintHandlerList {
 	PrintHandlerList() {}
 };
 
+String stringify_variants(Variant p_var);
+
+template <typename... Args>
+String stringify_variants(Variant p_var, Args... p_args) {
+	return p_var.operator String() + " " + stringify_variants(p_args...);
+}
+
 void add_print_handler(PrintHandlerList *p_handler);
-void remove_print_handler(PrintHandlerList *p_handler);
+void remove_print_handler(const PrintHandlerList *p_handler);
 
-extern bool _print_line_enabled;
-extern bool _print_error_enabled;
-extern void print_line(String p_string);
+extern void __print_line(String p_string);
+extern void __print_line_rich(String p_string);
 extern void print_error(String p_string);
 extern void print_verbose(String p_string);
 
+inline void print_line(Variant v) {
+	__print_line(stringify_variants(v));
+}
+
+inline void print_line_rich(Variant v) {
+	__print_line_rich(stringify_variants(v));
+}
+
+template <typename... Args>
+void print_line(Variant p_var, Args... p_args) {
+	__print_line(stringify_variants(p_var, p_args...));
+}
+
+template <typename... Args>
+void print_line_rich(Variant p_var, Args... p_args) {
+	__print_line_rich(stringify_variants(p_var, p_args...));
+}
+
 #endif // PRINT_STRING_H
diff --git a/core/string/string_buffer.h b/core/string/string_buffer.h
index 33897c3674..424952f786 100644
--- a/core/string/string_buffer.h
+++ b/core/string/string_buffer.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
diff --git a/core/string/string_builder.cpp b/core/string/string_builder.cpp
index 834c87c845..7359ff59e1 100644
--- a/core/string/string_builder.cpp
+++ b/core/string/string_builder.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -33,7 +33,7 @@
 #include <string.h>
 
 StringBuilder &StringBuilder::append(const String &p_string) {
-	if (p_string == String()) {
+	if (p_string.is_empty()) {
 		return *this;
 	}
 
diff --git a/core/string/string_builder.h b/core/string/string_builder.h
index 30ce2a06f7..897efa95ef 100644
--- a/core/string/string_builder.h
+++ b/core/string/string_builder.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
diff --git a/core/string/string_name.cpp b/core/string/string_name.cpp
index 9024f60dae..9c4fc4e1b7 100644
--- a/core/string/string_name.cpp
+++ b/core/string/string_name.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -73,23 +73,38 @@ void StringName::cleanup() {
 				d = d->next;
 			}
 		}
-		print_line("\nStringName Reference Ranking:\n");
+
+		print_line("\nStringName reference ranking (from most to least referenced):\n");
+
 		data.sort_custom<DebugSortReferences>();
-		for (int i = 0; i < MIN(100, data.size()); i++) {
+		int unreferenced_stringnames = 0;
+		int rarely_referenced_stringnames = 0;
+		for (int i = 0; i < data.size(); i++) {
 			print_line(itos(i + 1) + ": " + data[i]->get_name() + " - " + itos(data[i]->debug_references));
+			if (data[i]->debug_references == 0) {
+				unreferenced_stringnames += 1;
+			} else if (data[i]->debug_references < 5) {
+				rarely_referenced_stringnames += 1;
+			}
 		}
+
+		print_line(vformat("\nOut of %d StringNames, %d StringNames were never referenced during this run (0 times) (%.2f%%).", data.size(), unreferenced_stringnames, unreferenced_stringnames / float(data.size()) * 100));
+		print_line(vformat("Out of %d StringNames, %d StringNames were rarely referenced during this run (1-4 times) (%.2f%%).", data.size(), rarely_referenced_stringnames, rarely_referenced_stringnames / float(data.size()) * 100));
 	}
 #endif
 	int lost_strings = 0;
 	for (int i = 0; i < STRING_TABLE_LEN; i++) {
 		while (_table[i]) {
 			_Data *d = _table[i];
-			lost_strings++;
-			if (d->static_count.get() != d->refcount.get() && OS::get_singleton()->is_stdout_verbose()) {
-				if (d->cname) {
-					print_line("Orphan StringName: " + String(d->cname));
-				} else {
-					print_line("Orphan StringName: " + String(d->name));
+			if (d->static_count.get() != d->refcount.get()) {
+				lost_strings++;
+
+				if (OS::get_singleton()->is_stdout_verbose()) {
+					if (d->cname) {
+						print_line("Orphan StringName: " + String(d->cname));
+					} else {
+						print_line("Orphan StringName: " + String(d->name));
+					}
 				}
 			}
 
@@ -232,6 +247,7 @@ StringName::StringName(const char *p_name, bool p_static) {
 	_data->cname = nullptr;
 	_data->next = _table[idx];
 	_data->prev = nullptr;
+
 #ifdef DEBUG_ENABLED
 	if (unlikely(debug_stringname)) {
 		// Keep in memory, force static.
@@ -310,7 +326,7 @@ StringName::StringName(const String &p_name, bool p_static) {
 
 	ERR_FAIL_COND(!configured);
 
-	if (p_name == String()) {
+	if (p_name.is_empty()) {
 		return;
 	}
 
@@ -434,7 +450,7 @@ StringName StringName::search(const char32_t *p_name) {
 }
 
 StringName StringName::search(const String &p_name) {
-	ERR_FAIL_COND_V(p_name == "", StringName());
+	ERR_FAIL_COND_V(p_name.is_empty(), StringName());
 
 	MutexLock lock(mutex);
 
diff --git a/core/string/string_name.h b/core/string/string_name.h
index ce7988744b..ff4c41af94 100644
--- a/core/string/string_name.h
+++ b/core/string/string_name.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -35,6 +35,8 @@
 #include "core/string/ustring.h"
 #include "core/templates/safe_refcount.h"
 
+#define UNIQUE_NODE_PREFIX "%"
+
 class Main;
 
 struct StaticCString {
@@ -70,7 +72,7 @@ class StringName {
 	_Data *_data = nullptr;
 
 	union _HashUnion {
-		_Data *ptr;
+		_Data *ptr = nullptr;
 		uint32_t hash;
 	};
 
@@ -100,6 +102,17 @@ public:
 	bool operator==(const String &p_name) const;
 	bool operator==(const char *p_name) const;
 	bool operator!=(const String &p_name) const;
+
+	_FORCE_INLINE_ bool is_node_unique_name() const {
+		if (!_data) {
+			return false;
+		}
+		if (_data->cname != nullptr) {
+			return (char32_t)_data->cname[0] == (char32_t)UNIQUE_NODE_PREFIX[0];
+		} else {
+			return (char32_t)_data->name[0] == (char32_t)UNIQUE_NODE_PREFIX[0];
+		}
+	}
 	_FORCE_INLINE_ bool operator<(const StringName &p_name) const {
 		return _data < p_name._data;
 	}
@@ -181,6 +194,18 @@ bool operator!=(const char *p_name, const StringName &p_string_name);
 
 StringName _scs_create(const char *p_chr, bool p_static = false);
 
+/*
+ * The SNAME macro is used to speed up StringName creation, as it allows caching it after the first usage in a very efficient way.
+ * It should NOT be used everywhere, but instead in places where high performance is required and the creation of a StringName
+ * can be costly. Places where it should be used are:
+ * - Control::get_theme_*(<name> and Window::get_theme_*(<name> functions.
+ * - emit_signal(<name>,..) function
+ * - call_deferred(<name>,..) function
+ * - Comparisons to a StringName in overridden _set and _get methods.
+ *
+ * Use in places that can be called hundreds of times per frame (or more) is recommended, but this situation is very rare. If in doubt, do not use.
+ */
+
 #define SNAME(m_arg) ([]() -> const StringName & { static StringName sname = _scs_create(m_arg, true); return sname; })()
 
 #endif // STRING_NAME_H
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index cf61467d08..b83b7c786f 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -33,793 +33,12 @@
 #include "core/config/project_settings.h"
 #include "core/io/resource_loader.h"
 #include "core/os/os.h"
+#include "core/string/locales.h"
 
 #ifdef TOOLS_ENABLED
 #include "main/main.h"
 #endif
 
-// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes),
-// with the addition of glibc locales with their regional identifiers.
-// This list must match the language names (in English) of locale_names.
-//
-// References:
-// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
-// - https://lh.2xlibre.net/locales/
-// - https://iso639-3.sil.org/
-
-static const char *locale_list[] = {
-	"aa", //  Afar
-	"aa_DJ", //  Afar (Djibouti)
-	"aa_ER", //  Afar (Eritrea)
-	"aa_ET", //  Afar (Ethiopia)
-	"af", //  Afrikaans
-	"af_ZA", //  Afrikaans (South Africa)
-	"agr_PE", //  Aguaruna (Peru)
-	"ak_GH", //  Akan (Ghana)
-	"am_ET", //  Amharic (Ethiopia)
-	"an_ES", //  Aragonese (Spain)
-	"anp_IN", //  Angika (India)
-	"ar", //  Arabic
-	"ar_AE", //  Arabic (United Arab Emirates)
-	"ar_BH", //  Arabic (Bahrain)
-	"ar_DZ", //  Arabic (Algeria)
-	"ar_EG", //  Arabic (Egypt)
-	"ar_IN", //  Arabic (India)
-	"ar_IQ", //  Arabic (Iraq)
-	"ar_JO", //  Arabic (Jordan)
-	"ar_KW", //  Arabic (Kuwait)
-	"ar_LB", //  Arabic (Lebanon)
-	"ar_LY", //  Arabic (Libya)
-	"ar_MA", //  Arabic (Morocco)
-	"ar_OM", //  Arabic (Oman)
-	"ar_QA", //  Arabic (Qatar)
-	"ar_SA", //  Arabic (Saudi Arabia)
-	"ar_SD", //  Arabic (Sudan)
-	"ar_SS", //  Arabic (South Soudan)
-	"ar_SY", //  Arabic (Syria)
-	"ar_TN", //  Arabic (Tunisia)
-	"ar_YE", //  Arabic (Yemen)
-	"as_IN", //  Assamese (India)
-	"ast_ES", //  Asturian (Spain)
-	"ayc_PE", //  Southern Aymara (Peru)
-	"ay_PE", //  Aymara (Peru)
-	"az", //  Azerbaijani
-	"az_AZ", //  Azerbaijani (Azerbaijan)
-	"be", //  Belarusian
-	"be_BY", //  Belarusian (Belarus)
-	"bem_ZM", //  Bemba (Zambia)
-	"ber_DZ", //  Berber languages (Algeria)
-	"ber_MA", //  Berber languages (Morocco)
-	"bg", //  Bulgarian
-	"bg_BG", //  Bulgarian (Bulgaria)
-	"bhb_IN", //  Bhili (India)
-	"bho_IN", //  Bhojpuri (India)
-	"bi_TV", //  Bislama (Tuvalu)
-	"bn", //  Bengali
-	"bn_BD", //  Bengali (Bangladesh)
-	"bn_IN", //  Bengali (India)
-	"bo", //  Tibetan
-	"bo_CN", //  Tibetan (China)
-	"bo_IN", //  Tibetan (India)
-	"br", //  Breton
-	"br_FR", //  Breton (France)
-	"brx_IN", //  Bodo (India)
-	"bs_BA", //  Bosnian (Bosnia and Herzegovina)
-	"byn_ER", //  Bilin (Eritrea)
-	"ca", //  Catalan
-	"ca_AD", //  Catalan (Andorra)
-	"ca_ES", //  Catalan (Spain)
-	"ca_FR", //  Catalan (France)
-	"ca_IT", //  Catalan (Italy)
-	"ce_RU", //  Chechen (Russia)
-	"chr_US", //  Cherokee (United States)
-	"cmn_TW", //  Mandarin Chinese (Taiwan)
-	"crh_UA", //  Crimean Tatar (Ukraine)
-	"csb_PL", //  Kashubian (Poland)
-	"cs", //  Czech
-	"cs_CZ", //  Czech (Czech Republic)
-	"cv_RU", //  Chuvash (Russia)
-	"cy_GB", //  Welsh (United Kingdom)
-	"da", //  Danish
-	"da_DK", //  Danish (Denmark)
-	"de", //  German
-	"de_AT", //  German (Austria)
-	"de_BE", //  German (Belgium)
-	"de_CH", //  German (Switzerland)
-	"de_DE", //  German (Germany)
-	"de_IT", //  German (Italy)
-	"de_LU", //  German (Luxembourg)
-	"doi_IN", //  Dogri (India)
-	"dv_MV", //  Dhivehi (Maldives)
-	"dz_BT", //  Dzongkha (Bhutan)
-	"el", //  Greek
-	"el_CY", //  Greek (Cyprus)
-	"el_GR", //  Greek (Greece)
-	"en", //  English
-	"en_AG", //  English (Antigua and Barbuda)
-	"en_AU", //  English (Australia)
-	"en_BW", //  English (Botswana)
-	"en_CA", //  English (Canada)
-	"en_DK", //  English (Denmark)
-	"en_GB", //  English (United Kingdom)
-	"en_HK", //  English (Hong Kong)
-	"en_IE", //  English (Ireland)
-	"en_IL", //  English (Israel)
-	"en_IN", //  English (India)
-	"en_NG", //  English (Nigeria)
-	"en_NZ", //  English (New Zealand)
-	"en_PH", //  English (Philippines)
-	"en_SG", //  English (Singapore)
-	"en_US", //  English (United States)
-	"en_ZA", //  English (South Africa)
-	"en_ZM", //  English (Zambia)
-	"en_ZW", //  English (Zimbabwe)
-	"eo", //  Esperanto
-	"es", //  Spanish
-	"es_AR", //  Spanish (Argentina)
-	"es_BO", //  Spanish (Bolivia)
-	"es_CL", //  Spanish (Chile)
-	"es_CO", //  Spanish (Colombia)
-	"es_CR", //  Spanish (Costa Rica)
-	"es_CU", //  Spanish (Cuba)
-	"es_DO", //  Spanish (Dominican Republic)
-	"es_EC", //  Spanish (Ecuador)
-	"es_ES", //  Spanish (Spain)
-	"es_GT", //  Spanish (Guatemala)
-	"es_HN", //  Spanish (Honduras)
-	"es_MX", //  Spanish (Mexico)
-	"es_NI", //  Spanish (Nicaragua)
-	"es_PA", //  Spanish (Panama)
-	"es_PE", //  Spanish (Peru)
-	"es_PR", //  Spanish (Puerto Rico)
-	"es_PY", //  Spanish (Paraguay)
-	"es_SV", //  Spanish (El Salvador)
-	"es_US", //  Spanish (United States)
-	"es_UY", //  Spanish (Uruguay)
-	"es_VE", //  Spanish (Venezuela)
-	"et", //  Estonian
-	"et_EE", //  Estonian (Estonia)
-	"eu", //  Basque
-	"eu_ES", //  Basque (Spain)
-	"fa", //  Persian
-	"fa_IR", //  Persian (Iran)
-	"ff_SN", //  Fulah (Senegal)
-	"fi", //  Finnish
-	"fi_FI", //  Finnish (Finland)
-	"fil", //  Filipino
-	"fil_PH", //  Filipino (Philippines)
-	"fo_FO", //  Faroese (Faroe Islands)
-	"fr", //  French
-	"fr_BE", //  French (Belgium)
-	"fr_CA", //  French (Canada)
-	"fr_CH", //  French (Switzerland)
-	"fr_FR", //  French (France)
-	"fr_LU", //  French (Luxembourg)
-	"fur_IT", //  Friulian (Italy)
-	"fy_DE", //  Western Frisian (Germany)
-	"fy_NL", //  Western Frisian (Netherlands)
-	"ga", //  Irish
-	"ga_IE", //  Irish (Ireland)
-	"gd_GB", //  Scottish Gaelic (United Kingdom)
-	"gez_ER", //  Geez (Eritrea)
-	"gez_ET", //  Geez (Ethiopia)
-	"gl", //  Galician
-	"gl_ES", //  Galician (Spain)
-	"gu_IN", //  Gujarati (India)
-	"gv_GB", //  Manx (United Kingdom)
-	"hak_TW", //  Hakka Chinese (Taiwan)
-	"ha_NG", //  Hausa (Nigeria)
-	"he", //  Hebrew
-	"he_IL", //  Hebrew (Israel)
-	"hi", //  Hindi
-	"hi_IN", //  Hindi (India)
-	"hne_IN", //  Chhattisgarhi (India)
-	"hr", //  Croatian
-	"hr_HR", //  Croatian (Croatia)
-	"hsb_DE", //  Upper Sorbian (Germany)
-	"ht_HT", //  Haitian (Haiti)
-	"hu", //  Hungarian
-	"hu_HU", //  Hungarian (Hungary)
-	"hus_MX", //  Huastec (Mexico)
-	"hy_AM", //  Armenian (Armenia)
-	"ia_FR", //  Interlingua (France)
-	"id", //  Indonesian
-	"id_ID", //  Indonesian (Indonesia)
-	"ig_NG", //  Igbo (Nigeria)
-	"ik_CA", //  Inupiaq (Canada)
-	"is", //  Icelandic
-	"is_IS", //  Icelandic (Iceland)
-	"it", //  Italian
-	"it_CH", //  Italian (Switzerland)
-	"it_IT", //  Italian (Italy)
-	"iu_CA", //  Inuktitut (Canada)
-	"ja", //  Japanese
-	"ja_JP", //  Japanese (Japan)
-	"kab_DZ", //  Kabyle (Algeria)
-	"ka", //  Georgian
-	"ka_GE", //  Georgian (Georgia)
-	"kk_KZ", //  Kazakh (Kazakhstan)
-	"kl_GL", //  Kalaallisut (Greenland)
-	"km", //  Central Khmer
-	"km_KH", //  Central Khmer (Cambodia)
-	"kn_IN", //  Kannada (India)
-	"kok_IN", //  Konkani (India)
-	"ko", //  Korean
-	"ko_KR", //  Korean (South Korea)
-	"ks_IN", //  Kashmiri (India)
-	"ku", //  Kurdish
-	"ku_TR", //  Kurdish (Turkey)
-	"kw_GB", //  Cornish (United Kingdom)
-	"ky_KG", //  Kirghiz (Kyrgyzstan)
-	"lb_LU", //  Luxembourgish (Luxembourg)
-	"lg_UG", //  Ganda (Uganda)
-	"li_BE", //  Limburgan (Belgium)
-	"li_NL", //  Limburgan (Netherlands)
-	"lij_IT", //  Ligurian (Italy)
-	"ln_CD", //  Lingala (Congo)
-	"lo_LA", //  Lao (Laos)
-	"lt", //  Lithuanian
-	"lt_LT", //  Lithuanian (Lithuania)
-	"lv", //  Latvian
-	"lv_LV", //  Latvian (Latvia)
-	"lzh_TW", //  Literary Chinese (Taiwan)
-	"mag_IN", //  Magahi (India)
-	"mai_IN", //  Maithili (India)
-	"mg_MG", //  Malagasy (Madagascar)
-	"mh_MH", //  Marshallese (Marshall Islands)
-	"mhr_RU", //  Eastern Mari (Russia)
-	"mi", //  Māori
-	"mi_NZ", //  Māori (New Zealand)
-	"miq_NI", //  Mískito (Nicaragua)
-	"mk", //  Macedonian
-	"mk_MK", //  Macedonian (Macedonia)
-	"ml", //  Malayalam
-	"ml_IN", //  Malayalam (India)
-	"mni_IN", //  Manipuri (India)
-	"mn_MN", //  Mongolian (Mongolia)
-	"mr", //  Marathi
-	"mr_IN", //  Marathi (India)
-	"ms", //  Malay
-	"ms_MY", //  Malay (Malaysia)
-	"mt", //  Maltese
-	"mt_MT", //  Maltese (Malta)
-	"my_MM", //  Burmese (Myanmar)
-	"myv_RU", //  Erzya (Russia)
-	"nah_MX", //  Nahuatl languages (Mexico)
-	"nan_TW", //  Min Nan Chinese (Taiwan)
-	"nb", //  Norwegian Bokmål
-	"nb_NO", //  Norwegian Bokmål (Norway)
-	"nds_DE", //  Low German (Germany)
-	"nds_NL", //  Low German (Netherlands)
-	"ne_NP", //  Nepali (Nepal)
-	"nhn_MX", //  Central Nahuatl (Mexico)
-	"niu_NU", //  Niuean (Niue)
-	"niu_NZ", //  Niuean (New Zealand)
-	"nl", //  Dutch
-	"nl_AW", //  Dutch (Aruba)
-	"nl_BE", //  Dutch (Belgium)
-	"nl_NL", //  Dutch (Netherlands)
-	"nn", //  Norwegian Nynorsk
-	"nn_NO", //  Norwegian Nynorsk (Norway)
-	"nr_ZA", //  South Ndebele (South Africa)
-	"nso_ZA", //  Pedi (South Africa)
-	"oc_FR", //  Occitan (France)
-	"om", //  Oromo
-	"om_ET", //  Oromo (Ethiopia)
-	"om_KE", //  Oromo (Kenya)
-	"or", //  Oriya
-	"or_IN", //  Oriya (India)
-	"os_RU", //  Ossetian (Russia)
-	"pa_IN", //  Panjabi (India)
-	"pap", //  Papiamento
-	"pap_AN", //  Papiamento (Netherlands Antilles)
-	"pap_AW", //  Papiamento (Aruba)
-	"pap_CW", //  Papiamento (Curaçao)
-	"pa_PK", //  Panjabi (Pakistan)
-	"pl", //  Polish
-	"pl_PL", //  Polish (Poland)
-	"pr", //  Pirate
-	"ps_AF", //  Pushto (Afghanistan)
-	"pt", //  Portuguese
-	"pt_BR", //  Portuguese (Brazil)
-	"pt_PT", //  Portuguese (Portugal)
-	"quy_PE", //  Ayacucho Quechua (Peru)
-	"quz_PE", //  Cusco Quechua (Peru)
-	"raj_IN", //  Rajasthani (India)
-	"ro", //  Romanian
-	"ro_RO", //  Romanian (Romania)
-	"ru", //  Russian
-	"ru_RU", //  Russian (Russia)
-	"ru_UA", //  Russian (Ukraine)
-	"rw_RW", //  Kinyarwanda (Rwanda)
-	"sa_IN", //  Sanskrit (India)
-	"sat_IN", //  Santali (India)
-	"sc_IT", //  Sardinian (Italy)
-	"sco", //  Scots
-	"sd_IN", //  Sindhi (India)
-	"se_NO", //  Northern Sami (Norway)
-	"sgs_LT", //  Samogitian (Lithuania)
-	"shs_CA", //  Shuswap (Canada)
-	"sid_ET", //  Sidamo (Ethiopia)
-	"si", //  Sinhala
-	"si_LK", //  Sinhala (Sri Lanka)
-	"sk", //  Slovak
-	"sk_SK", //  Slovak (Slovakia)
-	"sl", //  Slovenian
-	"sl_SI", //  Slovenian (Slovenia)
-	"so", //  Somali
-	"so_DJ", //  Somali (Djibouti)
-	"so_ET", //  Somali (Ethiopia)
-	"so_KE", //  Somali (Kenya)
-	"so_SO", //  Somali (Somalia)
-	"son_ML", //  Songhai languages (Mali)
-	"sq", //  Albanian
-	"sq_AL", //  Albanian (Albania)
-	"sq_KV", //  Albanian (Kosovo)
-	"sq_MK", //  Albanian (Macedonia)
-	"sr", //  Serbian
-	"sr_Cyrl", //  Serbian (Cyrillic)
-	"sr_Latn", //  Serbian (Latin)
-	"sr_ME", //  Serbian (Montenegro)
-	"sr_RS", //  Serbian (Serbia)
-	"ss_ZA", //  Swati (South Africa)
-	"st_ZA", //  Southern Sotho (South Africa)
-	"sv", //  Swedish
-	"sv_FI", //  Swedish (Finland)
-	"sv_SE", //  Swedish (Sweden)
-	"sw_KE", //  Swahili (Kenya)
-	"sw_TZ", //  Swahili (Tanzania)
-	"szl_PL", //  Silesian (Poland)
-	"ta", //  Tamil
-	"ta_IN", //  Tamil (India)
-	"ta_LK", //  Tamil (Sri Lanka)
-	"tcy_IN", //  Tulu (India)
-	"te", //  Telugu
-	"te_IN", //  Telugu (India)
-	"tg_TJ", //  Tajik (Tajikistan)
-	"the_NP", //  Chitwania Tharu (Nepal)
-	"th", //  Thai
-	"th_TH", //  Thai (Thailand)
-	"ti", //  Tigrinya
-	"ti_ER", //  Tigrinya (Eritrea)
-	"ti_ET", //  Tigrinya (Ethiopia)
-	"tig_ER", //  Tigre (Eritrea)
-	"tk_TM", //  Turkmen (Turkmenistan)
-	"tl_PH", //  Tagalog (Philippines)
-	"tn_ZA", //  Tswana (South Africa)
-	"tr", //  Turkish
-	"tr_CY", //  Turkish (Cyprus)
-	"tr_TR", //  Turkish (Turkey)
-	"ts_ZA", //  Tsonga (South Africa)
-	"tt", //  Tatar
-	"tt_RU", //  Tatar (Russia)
-	"tzm", // Central Atlas Tamazight
-	"tzm_MA", // Central Atlas Tamazight (Marrocos)
-	"ug_CN", //  Uighur (China)
-	"uk", //  Ukrainian
-	"uk_UA", //  Ukrainian (Ukraine)
-	"unm_US", //  Unami (United States)
-	"ur", //  Urdu
-	"ur_IN", //  Urdu (India)
-	"ur_PK", //  Urdu (Pakistan)
-	"uz", //  Uzbek
-	"uz_UZ", //  Uzbek (Uzbekistan)
-	"ve_ZA", //  Venda (South Africa)
-	"vi", //  Vietnamese
-	"vi_VN", //  Vietnamese (Vietnam)
-	"wa_BE", //  Walloon (Belgium)
-	"wae_CH", //  Walser (Switzerland)
-	"wal_ET", //  Wolaytta (Ethiopia)
-	"wo_SN", //  Wolof (Senegal)
-	"xh_ZA", //  Xhosa (South Africa)
-	"yi_US", //  Yiddish (United States)
-	"yo_NG", //  Yoruba (Nigeria)
-	"yue_HK", //  Yue Chinese (Hong Kong)
-	"zh", //  Chinese
-	"zh_CN", //  Chinese (China)
-	"zh_HK", //  Chinese (Hong Kong)
-	"zh_SG", //  Chinese (Singapore)
-	"zh_TW", //  Chinese (Taiwan)
-	"zu_ZA", //  Zulu (South Africa)
-	nullptr
-};
-
-static const char *locale_names[] = {
-	"Afar",
-	"Afar (Djibouti)",
-	"Afar (Eritrea)",
-	"Afar (Ethiopia)",
-	"Afrikaans",
-	"Afrikaans (South Africa)",
-	"Aguaruna (Peru)",
-	"Akan (Ghana)",
-	"Amharic (Ethiopia)",
-	"Aragonese (Spain)",
-	"Angika (India)",
-	"Arabic",
-	"Arabic (United Arab Emirates)",
-	"Arabic (Bahrain)",
-	"Arabic (Algeria)",
-	"Arabic (Egypt)",
-	"Arabic (India)",
-	"Arabic (Iraq)",
-	"Arabic (Jordan)",
-	"Arabic (Kuwait)",
-	"Arabic (Lebanon)",
-	"Arabic (Libya)",
-	"Arabic (Morocco)",
-	"Arabic (Oman)",
-	"Arabic (Qatar)",
-	"Arabic (Saudi Arabia)",
-	"Arabic (Sudan)",
-	"Arabic (South Soudan)",
-	"Arabic (Syria)",
-	"Arabic (Tunisia)",
-	"Arabic (Yemen)",
-	"Assamese (India)",
-	"Asturian (Spain)",
-	"Southern Aymara (Peru)",
-	"Aymara (Peru)",
-	"Azerbaijani",
-	"Azerbaijani (Azerbaijan)",
-	"Belarusian",
-	"Belarusian (Belarus)",
-	"Bemba (Zambia)",
-	"Berber languages (Algeria)",
-	"Berber languages (Morocco)",
-	"Bulgarian",
-	"Bulgarian (Bulgaria)",
-	"Bhili (India)",
-	"Bhojpuri (India)",
-	"Bislama (Tuvalu)",
-	"Bengali",
-	"Bengali (Bangladesh)",
-	"Bengali (India)",
-	"Tibetan",
-	"Tibetan (China)",
-	"Tibetan (India)",
-	"Breton",
-	"Breton (France)",
-	"Bodo (India)",
-	"Bosnian (Bosnia and Herzegovina)",
-	"Bilin (Eritrea)",
-	"Catalan",
-	"Catalan (Andorra)",
-	"Catalan (Spain)",
-	"Catalan (France)",
-	"Catalan (Italy)",
-	"Chechen (Russia)",
-	"Cherokee (United States)",
-	"Mandarin Chinese (Taiwan)",
-	"Crimean Tatar (Ukraine)",
-	"Kashubian (Poland)",
-	"Czech",
-	"Czech (Czech Republic)",
-	"Chuvash (Russia)",
-	"Welsh (United Kingdom)",
-	"Danish",
-	"Danish (Denmark)",
-	"German",
-	"German (Austria)",
-	"German (Belgium)",
-	"German (Switzerland)",
-	"German (Germany)",
-	"German (Italy)",
-	"German (Luxembourg)",
-	"Dogri (India)",
-	"Dhivehi (Maldives)",
-	"Dzongkha (Bhutan)",
-	"Greek",
-	"Greek (Cyprus)",
-	"Greek (Greece)",
-	"English",
-	"English (Antigua and Barbuda)",
-	"English (Australia)",
-	"English (Botswana)",
-	"English (Canada)",
-	"English (Denmark)",
-	"English (United Kingdom)",
-	"English (Hong Kong)",
-	"English (Ireland)",
-	"English (Israel)",
-	"English (India)",
-	"English (Nigeria)",
-	"English (New Zealand)",
-	"English (Philippines)",
-	"English (Singapore)",
-	"English (United States)",
-	"English (South Africa)",
-	"English (Zambia)",
-	"English (Zimbabwe)",
-	"Esperanto",
-	"Spanish",
-	"Spanish (Argentina)",
-	"Spanish (Bolivia)",
-	"Spanish (Chile)",
-	"Spanish (Colombia)",
-	"Spanish (Costa Rica)",
-	"Spanish (Cuba)",
-	"Spanish (Dominican Republic)",
-	"Spanish (Ecuador)",
-	"Spanish (Spain)",
-	"Spanish (Guatemala)",
-	"Spanish (Honduras)",
-	"Spanish (Mexico)",
-	"Spanish (Nicaragua)",
-	"Spanish (Panama)",
-	"Spanish (Peru)",
-	"Spanish (Puerto Rico)",
-	"Spanish (Paraguay)",
-	"Spanish (El Salvador)",
-	"Spanish (United States)",
-	"Spanish (Uruguay)",
-	"Spanish (Venezuela)",
-	"Estonian",
-	"Estonian (Estonia)",
-	"Basque",
-	"Basque (Spain)",
-	"Persian",
-	"Persian (Iran)",
-	"Fulah (Senegal)",
-	"Finnish",
-	"Finnish (Finland)",
-	"Filipino",
-	"Filipino (Philippines)",
-	"Faroese (Faroe Islands)",
-	"French",
-	"French (Belgium)",
-	"French (Canada)",
-	"French (Switzerland)",
-	"French (France)",
-	"French (Luxembourg)",
-	"Friulian (Italy)",
-	"Western Frisian (Germany)",
-	"Western Frisian (Netherlands)",
-	"Irish",
-	"Irish (Ireland)",
-	"Scottish Gaelic (United Kingdom)",
-	"Geez (Eritrea)",
-	"Geez (Ethiopia)",
-	"Galician",
-	"Galician (Spain)",
-	"Gujarati (India)",
-	"Manx (United Kingdom)",
-	"Hakka Chinese (Taiwan)",
-	"Hausa (Nigeria)",
-	"Hebrew",
-	"Hebrew (Israel)",
-	"Hindi",
-	"Hindi (India)",
-	"Chhattisgarhi (India)",
-	"Croatian",
-	"Croatian (Croatia)",
-	"Upper Sorbian (Germany)",
-	"Haitian (Haiti)",
-	"Hungarian",
-	"Hungarian (Hungary)",
-	"Huastec (Mexico)",
-	"Armenian (Armenia)",
-	"Interlingua (France)",
-	"Indonesian",
-	"Indonesian (Indonesia)",
-	"Igbo (Nigeria)",
-	"Inupiaq (Canada)",
-	"Icelandic",
-	"Icelandic (Iceland)",
-	"Italian",
-	"Italian (Switzerland)",
-	"Italian (Italy)",
-	"Inuktitut (Canada)",
-	"Japanese",
-	"Japanese (Japan)",
-	"Kabyle (Algeria)",
-	"Georgian",
-	"Georgian (Georgia)",
-	"Kazakh (Kazakhstan)",
-	"Kalaallisut (Greenland)",
-	"Central Khmer",
-	"Central Khmer (Cambodia)",
-	"Kannada (India)",
-	"Konkani (India)",
-	"Korean",
-	"Korean (South Korea)",
-	"Kashmiri (India)",
-	"Kurdish",
-	"Kurdish (Turkey)",
-	"Cornish (United Kingdom)",
-	"Kirghiz (Kyrgyzstan)",
-	"Luxembourgish (Luxembourg)",
-	"Ganda (Uganda)",
-	"Limburgan (Belgium)",
-	"Limburgan (Netherlands)",
-	"Ligurian (Italy)",
-	"Lingala (Congo)",
-	"Lao (Laos)",
-	"Lithuanian",
-	"Lithuanian (Lithuania)",
-	"Latvian",
-	"Latvian (Latvia)",
-	"Literary Chinese (Taiwan)",
-	"Magahi (India)",
-	"Maithili (India)",
-	"Malagasy (Madagascar)",
-	"Marshallese (Marshall Islands)",
-	"Eastern Mari (Russia)",
-	"Māori",
-	"Māori (New Zealand)",
-	"Mískito (Nicaragua)",
-	"Macedonian",
-	"Macedonian (Macedonia)",
-	"Malayalam",
-	"Malayalam (India)",
-	"Manipuri (India)",
-	"Mongolian (Mongolia)",
-	"Marathi",
-	"Marathi (India)",
-	"Malay",
-	"Malay (Malaysia)",
-	"Maltese",
-	"Maltese (Malta)",
-	"Burmese (Myanmar)",
-	"Erzya (Russia)",
-	"Nahuatl languages (Mexico)",
-	"Min Nan Chinese (Taiwan)",
-	"Norwegian Bokmål",
-	"Norwegian Bokmål (Norway)",
-	"Low German (Germany)",
-	"Low German (Netherlands)",
-	"Nepali (Nepal)",
-	"Central Nahuatl (Mexico)",
-	"Niuean (Niue)",
-	"Niuean (New Zealand)",
-	"Dutch",
-	"Dutch (Aruba)",
-	"Dutch (Belgium)",
-	"Dutch (Netherlands)",
-	"Norwegian Nynorsk",
-	"Norwegian Nynorsk (Norway)",
-	"South Ndebele (South Africa)",
-	"Pedi (South Africa)",
-	"Occitan (France)",
-	"Oromo",
-	"Oromo (Ethiopia)",
-	"Oromo (Kenya)",
-	"Oriya",
-	"Oriya (India)",
-	"Ossetian (Russia)",
-	"Panjabi (India)",
-	"Papiamento",
-	"Papiamento (Netherlands Antilles)",
-	"Papiamento (Aruba)",
-	"Papiamento (Curaçao)",
-	"Panjabi (Pakistan)",
-	"Polish",
-	"Polish (Poland)",
-	"Pirate",
-	"Pushto (Afghanistan)",
-	"Portuguese",
-	"Portuguese (Brazil)",
-	"Portuguese (Portugal)",
-	"Ayacucho Quechua (Peru)",
-	"Cusco Quechua (Peru)",
-	"Rajasthani (India)",
-	"Romanian",
-	"Romanian (Romania)",
-	"Russian",
-	"Russian (Russia)",
-	"Russian (Ukraine)",
-	"Kinyarwanda (Rwanda)",
-	"Sanskrit (India)",
-	"Santali (India)",
-	"Sardinian (Italy)",
-	"Scots (Scotland)",
-	"Sindhi (India)",
-	"Northern Sami (Norway)",
-	"Samogitian (Lithuania)",
-	"Shuswap (Canada)",
-	"Sidamo (Ethiopia)",
-	"Sinhala",
-	"Sinhala (Sri Lanka)",
-	"Slovak",
-	"Slovak (Slovakia)",
-	"Slovenian",
-	"Slovenian (Slovenia)",
-	"Somali",
-	"Somali (Djibouti)",
-	"Somali (Ethiopia)",
-	"Somali (Kenya)",
-	"Somali (Somalia)",
-	"Songhai languages (Mali)",
-	"Albanian",
-	"Albanian (Albania)",
-	"Albanian (Kosovo)",
-	"Albanian (Macedonia)",
-	"Serbian",
-	"Serbian (Cyrillic)",
-	"Serbian (Latin)",
-	"Serbian (Montenegro)",
-	"Serbian (Serbia)",
-	"Swati (South Africa)",
-	"Southern Sotho (South Africa)",
-	"Swedish",
-	"Swedish (Finland)",
-	"Swedish (Sweden)",
-	"Swahili (Kenya)",
-	"Swahili (Tanzania)",
-	"Silesian (Poland)",
-	"Tamil",
-	"Tamil (India)",
-	"Tamil (Sri Lanka)",
-	"Tulu (India)",
-	"Telugu",
-	"Telugu (India)",
-	"Tajik (Tajikistan)",
-	"Chitwania Tharu (Nepal)",
-	"Thai",
-	"Thai (Thailand)",
-	"Tigrinya",
-	"Tigrinya (Eritrea)",
-	"Tigrinya (Ethiopia)",
-	"Tigre (Eritrea)",
-	"Turkmen (Turkmenistan)",
-	"Tagalog (Philippines)",
-	"Tswana (South Africa)",
-	"Turkish",
-	"Turkish (Cyprus)",
-	"Turkish (Turkey)",
-	"Tsonga (South Africa)",
-	"Tatar",
-	"Tatar (Russia)",
-	"Central Atlas Tamazight",
-	"Central Atlas Tamazight (Marrocos)",
-	"Uighur (China)",
-	"Ukrainian",
-	"Ukrainian (Ukraine)",
-	"Unami (United States)",
-	"Urdu",
-	"Urdu (India)",
-	"Urdu (Pakistan)",
-	"Uzbek",
-	"Uzbek (Uzbekistan)",
-	"Venda (South Africa)",
-	"Vietnamese",
-	"Vietnamese (Vietnam)",
-	"Walloon (Belgium)",
-	"Walser (Switzerland)",
-	"Wolaytta (Ethiopia)",
-	"Wolof (Senegal)",
-	"Xhosa (South Africa)",
-	"Yiddish (United States)",
-	"Yoruba (Nigeria)",
-	"Yue Chinese (Hong Kong)",
-	"Chinese",
-	"Chinese (China)",
-	"Chinese (Hong Kong)",
-	"Chinese (Singapore)",
-	"Chinese (Taiwan)",
-	"Zulu (South Africa)",
-	nullptr
-};
-
-// Windows has some weird locale identifiers which do not honor the ISO 639-1
-// standardized nomenclature. Whenever those don't conflict with existing ISO
-// identifiers, we override them.
-//
-// Reference:
-// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx
-
-static const char *locale_renames[][2] = {
-	{ "in", "id" }, // Indonesian
-	{ "iw", "he" }, // Hebrew
-	{ "no", "nb" }, // Norwegian Bokmål
-	{ "C", "en" }, // "C" is the simple/default/untranslated Computer locale.
-	// ASCII-only, English, no currency symbols. Godot treats this as "en".
-	// See https://unix.stackexchange.com/a/87763/164141 "The C locale is"...
-	{ nullptr, nullptr }
-};
-
-///////////////////////////////////////////////
-
 Dictionary Translation::_get_messages() const {
 	Dictionary d;
 	for (const KeyValue<StringName, StringName> &E : translation_map) {
@@ -849,17 +68,7 @@ void Translation::_set_messages(const Dictionary &p_messages) {
 }
 
 void Translation::set_locale(const String &p_locale) {
-	String univ_locale = TranslationServer::standardize_locale(p_locale);
-
-	if (!TranslationServer::is_locale_valid(univ_locale)) {
-		String trimmed_locale = TranslationServer::get_language_code(univ_locale);
-
-		ERR_FAIL_COND_MSG(!TranslationServer::is_locale_valid(trimmed_locale), "Invalid locale: " + trimmed_locale + ".");
-
-		locale = trimmed_locale;
-	} else {
-		locale = univ_locale;
-	}
+	locale = TranslationServer::get_singleton()->standardize_locale(p_locale);
 
 	if (OS::get_singleton()->get_main_loop() && TranslationServer::get_singleton()->get_loaded_locales().has(this)) {
 		OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
@@ -886,12 +95,12 @@ StringName Translation::get_message(const StringName &p_src_text, const StringNa
 		WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class");
 	}
 
-	const Map<StringName, StringName>::Element *E = translation_map.find(p_src_text);
+	HashMap<StringName, StringName>::ConstIterator E = translation_map.find(p_src_text);
 	if (!E) {
 		return StringName();
 	}
 
-	return E->get();
+	return E->value;
 }
 
 StringName Translation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const {
@@ -932,13 +141,13 @@ void Translation::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("erase_message", "src_message", "context"), &Translation::erase_message, DEFVAL(""));
 	ClassDB::bind_method(D_METHOD("get_message_list"), &Translation::_get_message_list);
 	ClassDB::bind_method(D_METHOD("get_message_count"), &Translation::get_message_count);
-	ClassDB::bind_method(D_METHOD("_set_messages"), &Translation::_set_messages);
+	ClassDB::bind_method(D_METHOD("_set_messages", "messages"), &Translation::_set_messages);
 	ClassDB::bind_method(D_METHOD("_get_messages"), &Translation::_get_messages);
 
 	GDVIRTUAL_BIND(_get_plural_message, "src_message", "src_plural_message", "n", "context");
 	GDVIRTUAL_BIND(_get_message, "src_message", "context");
 
-	ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages");
+	ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages");
 	ADD_PROPERTY(PropertyInfo(Variant::STRING, "locale"), "set_locale", "get_locale");
 }
 
@@ -1004,121 +213,306 @@ static _character_accent_pair _character_to_accented[] = {
 	{ 'z', U"ź" },
 };
 
-bool TranslationServer::is_locale_valid(const String &p_locale) {
-	const char **ptr = locale_list;
+Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info;
 
-	while (*ptr) {
-		if (*ptr == p_locale) {
-			return true;
+HashMap<String, String> TranslationServer::language_map;
+HashMap<String, String> TranslationServer::script_map;
+HashMap<String, String> TranslationServer::locale_rename_map;
+HashMap<String, String> TranslationServer::country_name_map;
+HashMap<String, String> TranslationServer::variant_map;
+HashMap<String, String> TranslationServer::country_rename_map;
+
+void TranslationServer::init_locale_info() {
+	// Init locale info.
+	language_map.clear();
+	int idx = 0;
+	while (language_list[idx][0] != nullptr) {
+		language_map[language_list[idx][0]] = String::utf8(language_list[idx][1]);
+		idx++;
+	}
+
+	// Init locale-script map.
+	locale_script_info.clear();
+	idx = 0;
+	while (locale_scripts[idx][0] != nullptr) {
+		LocaleScriptInfo info;
+		info.name = locale_scripts[idx][0];
+		info.script = locale_scripts[idx][1];
+		info.default_country = locale_scripts[idx][2];
+		Vector<String> supported_countries = String(locale_scripts[idx][3]).split(",", false);
+		for (int i = 0; i < supported_countries.size(); i++) {
+			info.supported_countries.insert(supported_countries[i]);
 		}
-		ptr++;
+		locale_script_info.push_back(info);
+		idx++;
 	}
 
-	return false;
-}
+	// Init supported script list.
+	script_map.clear();
+	idx = 0;
+	while (script_list[idx][0] != nullptr) {
+		script_map[script_list[idx][1]] = String::utf8(script_list[idx][0]);
+		idx++;
+	}
 
-String TranslationServer::standardize_locale(const String &p_locale) {
-	// Replaces '-' with '_' for macOS Sierra-style locales
-	String univ_locale = p_locale.replace("-", "_");
+	// Init regional variant map.
+	variant_map.clear();
+	idx = 0;
+	while (locale_variants[idx][0] != nullptr) {
+		variant_map[locale_variants[idx][0]] = locale_variants[idx][1];
+		idx++;
+	}
 
-	// Handles known non-ISO locale names used e.g. on Windows
-	int idx = 0;
+	// Init locale renames.
+	locale_rename_map.clear();
+	idx = 0;
 	while (locale_renames[idx][0] != nullptr) {
-		if (locale_renames[idx][0] == univ_locale) {
-			univ_locale = locale_renames[idx][1];
-			break;
+		if (!String(locale_renames[idx][1]).is_empty()) {
+			locale_rename_map[locale_renames[idx][0]] = locale_renames[idx][1];
 		}
 		idx++;
 	}
 
-	return univ_locale;
+	// Init country names.
+	country_name_map.clear();
+	idx = 0;
+	while (country_names[idx][0] != nullptr) {
+		country_name_map[String(country_names[idx][0])] = String::utf8(country_names[idx][1]);
+		idx++;
+	}
+
+	// Init country renames.
+	country_rename_map.clear();
+	idx = 0;
+	while (country_renames[idx][0] != nullptr) {
+		if (!String(country_renames[idx][1]).is_empty()) {
+			country_rename_map[country_renames[idx][0]] = country_renames[idx][1];
+		}
+		idx++;
+	}
 }
 
-String TranslationServer::get_language_code(const String &p_locale) {
-	ERR_FAIL_COND_V_MSG(p_locale.length() < 2, p_locale, "Invalid locale '" + p_locale + "'.");
-	// Most language codes are two letters, but some are three,
-	// so we have to look for a regional code separator ('_' or '-')
-	// to extract the left part.
-	// For example we get 'nah_MX' as input and should return 'nah'.
-	int split = p_locale.find("_");
-	if (split == -1) {
-		split = p_locale.find("-");
+String TranslationServer::standardize_locale(const String &p_locale) const {
+	// Replaces '-' with '_' for macOS style locales.
+	String univ_locale = p_locale.replace("-", "_");
+
+	// Extract locale elements.
+	String lang, script, country, variant;
+	Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
+	lang = locale_elements[0];
+	if (locale_elements.size() >= 2) {
+		if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
+			script = locale_elements[1];
+		}
+		if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
+			country = locale_elements[1];
+		}
 	}
-	if (split == -1) { // No separator, so the locale is already only a language code.
-		return p_locale;
+	if (locale_elements.size() >= 3) {
+		if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
+			country = locale_elements[2];
+		} else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) {
+			variant = locale_elements[2].to_lower();
+		}
+	}
+	if (locale_elements.size() >= 4) {
+		if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang) {
+			variant = locale_elements[3].to_lower();
+		}
 	}
-	return p_locale.left(split);
-}
 
-void TranslationServer::set_locale(const String &p_locale) {
-	String univ_locale = standardize_locale(p_locale);
+	// Try extract script and variant from the extra part.
+	Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";");
+	for (int i = 0; i < script_extra.size(); i++) {
+		if (script_extra[i].to_lower() == "cyrillic") {
+			script = "Cyrl";
+			break;
+		} else if (script_extra[i].to_lower() == "latin") {
+			script = "Latn";
+			break;
+		} else if (script_extra[i].to_lower() == "devanagari") {
+			script = "Deva";
+			break;
+		} else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang) {
+			variant = script_extra[i].to_lower();
+		}
+	}
 
-	if (!is_locale_valid(univ_locale)) {
-		String trimmed_locale = get_language_code(univ_locale);
-		print_verbose(vformat("Unsupported locale '%s', falling back to '%s'.", p_locale, trimmed_locale));
+	// Handles known non-ISO language names used e.g. on Windows.
+	if (locale_rename_map.has(lang)) {
+		lang = locale_rename_map[lang];
+	}
 
-		if (!is_locale_valid(trimmed_locale)) {
-			ERR_PRINT(vformat("Unsupported locale '%s', falling back to 'en'.", trimmed_locale));
-			locale = "en";
-		} else {
-			locale = trimmed_locale;
+	// Handle country renames.
+	if (country_rename_map.has(country)) {
+		country = country_rename_map[country];
+	}
+
+	// Remove unsupported script codes.
+	if (!script_map.has(script)) {
+		script = "";
+	}
+
+	// Add script code base on language and country codes for some ambiguous cases.
+	if (script.is_empty()) {
+		for (int i = 0; i < locale_script_info.size(); i++) {
+			const LocaleScriptInfo &info = locale_script_info[i];
+			if (info.name == lang) {
+				if (country.is_empty() || info.supported_countries.has(country)) {
+					script = info.script;
+					break;
+				}
+			}
+		}
+	}
+	if (!script.is_empty() && country.is_empty()) {
+		// Add conntry code based on script for some ambiguous cases.
+		for (int i = 0; i < locale_script_info.size(); i++) {
+			const LocaleScriptInfo &info = locale_script_info[i];
+			if (info.name == lang && info.script == script) {
+				country = info.default_country;
+				break;
+			}
+		}
+	}
+
+	// Combine results.
+	String locale = lang;
+	if (!script.is_empty()) {
+		locale = locale + "_" + script;
+	}
+	if (!country.is_empty()) {
+		locale = locale + "_" + country;
+	}
+	if (!variant.is_empty()) {
+		locale = locale + "_" + variant;
+	}
+	return locale;
+}
+
+int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const {
+	String locale_a = standardize_locale(p_locale_a);
+	String locale_b = standardize_locale(p_locale_b);
+
+	if (locale_a == locale_b) {
+		// Exact match.
+		return 10;
+	}
+
+	Vector<String> locale_a_elements = locale_a.split("_");
+	Vector<String> locale_b_elements = locale_b.split("_");
+	if (locale_a_elements[0] == locale_b_elements[0]) {
+		// Matching language, both locales have extra parts.
+		// Return number of matching elements.
+		int matching_elements = 1;
+		for (int i = 1; i < locale_a_elements.size(); i++) {
+			for (int j = 1; j < locale_b_elements.size(); j++) {
+				if (locale_a_elements[i] == locale_b_elements[j]) {
+					matching_elements++;
+				}
+			}
 		}
+		return matching_elements;
 	} else {
-		locale = univ_locale;
+		// No match.
+		return 0;
+	}
+}
+
+String TranslationServer::get_locale_name(const String &p_locale) const {
+	String locale = standardize_locale(p_locale);
+
+	String lang, script, country;
+	Vector<String> locale_elements = locale.split("_");
+	lang = locale_elements[0];
+	if (locale_elements.size() >= 2) {
+		if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
+			script = locale_elements[1];
+		}
+		if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
+			country = locale_elements[1];
+		}
+	}
+	if (locale_elements.size() >= 3) {
+		if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
+			country = locale_elements[2];
+		}
 	}
 
-	if (OS::get_singleton()->get_main_loop()) {
-		OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
+	String name = language_map[lang];
+	if (!script.is_empty()) {
+		name = name + " (" + script_map[script] + ")";
+	}
+	if (!country.is_empty()) {
+		name = name + ", " + country_name_map[country];
 	}
+	return name;
+}
 
-	ResourceLoader::reload_translation_remaps();
+Vector<String> TranslationServer::get_all_languages() const {
+	Vector<String> languages;
+
+	for (const KeyValue<String, String> &E : language_map) {
+		languages.push_back(E.key);
+	}
+
+	return languages;
 }
 
-String TranslationServer::get_locale() const {
-	return locale;
+String TranslationServer::get_language_name(const String &p_language) const {
+	return language_map[p_language];
 }
 
-String TranslationServer::get_locale_name(const String &p_locale) const {
-	if (!locale_name_map.has(p_locale)) {
-		return String();
+Vector<String> TranslationServer::get_all_scripts() const {
+	Vector<String> scripts;
+
+	for (const KeyValue<String, String> &E : script_map) {
+		scripts.push_back(E.key);
 	}
-	return locale_name_map[p_locale];
+
+	return scripts;
 }
 
-Array TranslationServer::get_loaded_locales() const {
-	Array locales;
-	for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
-		const Ref<Translation> &t = E->get();
-		ERR_FAIL_COND_V(t.is_null(), Array());
-		String l = t->get_locale();
+String TranslationServer::get_script_name(const String &p_script) const {
+	return script_map[p_script];
+}
 
-		locales.push_back(l);
+Vector<String> TranslationServer::get_all_countries() const {
+	Vector<String> countries;
+
+	for (const KeyValue<String, String> &E : country_name_map) {
+		countries.push_back(E.key);
 	}
 
-	return locales;
+	return countries;
 }
 
-Vector<String> TranslationServer::get_all_locales() {
-	Vector<String> locales;
+String TranslationServer::get_country_name(const String &p_country) const {
+	return country_name_map[p_country];
+}
 
-	const char **ptr = locale_list;
+void TranslationServer::set_locale(const String &p_locale) {
+	locale = standardize_locale(p_locale);
 
-	while (*ptr) {
-		locales.push_back(*ptr);
-		ptr++;
+	if (OS::get_singleton()->get_main_loop()) {
+		OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
 	}
 
-	return locales;
+	ResourceLoader::reload_translation_remaps();
 }
 
-Vector<String> TranslationServer::get_all_locale_names() {
-	Vector<String> locales;
+String TranslationServer::get_locale() const {
+	return locale;
+}
 
-	const char **ptr = locale_names;
+Array TranslationServer::get_loaded_locales() const {
+	Array locales;
+	for (const Ref<Translation> &E : translations) {
+		const Ref<Translation> &t = E;
+		ERR_FAIL_COND_V(t.is_null(), Array());
+		String l = t->get_locale();
 
-	while (*ptr) {
-		locales.push_back(String::utf8(*ptr));
-		ptr++;
+		locales.push_back(l);
 	}
 
 	return locales;
@@ -1134,23 +528,20 @@ void TranslationServer::remove_translation(const Ref<Translation> &p_translation
 
 Ref<Translation> TranslationServer::get_translation_object(const String &p_locale) {
 	Ref<Translation> res;
-	String lang = get_language_code(p_locale);
-	bool near_match_found = false;
+	int best_score = 0;
 
-	for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
-		const Ref<Translation> &t = E->get();
+	for (const Ref<Translation> &E : translations) {
+		const Ref<Translation> &t = E;
 		ERR_FAIL_COND_V(t.is_null(), nullptr);
 		String l = t->get_locale();
 
-		// Exact match.
-		if (l == p_locale) {
-			return t;
-		}
-
-		// If near match found, keep that match, but keep looking to try to look for perfect match.
-		if (get_language_code(l) == lang && !near_match_found) {
+		int score = compare_locales(p_locale, l);
+		if (score > 0 && score >= best_score) {
 			res = t;
-			near_match_found = true;
+			best_score = score;
+			if (score == 10) {
+				break; // Exact match, skip the rest.
+			}
 		}
 	}
 	return res;
@@ -1167,8 +558,6 @@ StringName TranslationServer::translate(const StringName &p_message, const Strin
 		return p_message;
 	}
 
-	ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid.");
-
 	StringName res = _get_message_from_translations(p_message, p_context, locale, false);
 
 	if (!res && fallback.length() >= 2) {
@@ -1190,8 +579,6 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons
 		return p_message_plural;
 	}
 
-	ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid.");
-
 	StringName res = _get_message_from_translations(p_message, p_context, locale, true, p_message_plural, p_n);
 
 	if (!res && fallback.length() >= 2) {
@@ -1209,51 +596,30 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons
 }
 
 StringName TranslationServer::_get_message_from_translations(const StringName &p_message, const StringName &p_context, const String &p_locale, bool plural, const String &p_message_plural, int p_n) const {
-	// Locale can be of the form 'll_CC', i.e. language code and regional code,
-	// e.g. 'en_US', 'en_GB', etc. It might also be simply 'll', e.g. 'en'.
-	// To find the relevant translation, we look for those with locale starting
-	// with the language code, and then if any is an exact match for the long
-	// form. If not found, we fall back to a near match (another locale with
-	// same language code).
-
-	// Note: ResourceLoader::_path_remap reproduces this locale near matching
-	// logic, so be sure to propagate changes there when changing things here.
-
 	StringName res;
-	String lang = get_language_code(p_locale);
-	bool near_match = false;
+	int best_score = 0;
 
-	for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
-		const Ref<Translation> &t = E->get();
+	for (const Ref<Translation> &E : translations) {
+		const Ref<Translation> &t = E;
 		ERR_FAIL_COND_V(t.is_null(), p_message);
 		String l = t->get_locale();
 
-		bool exact_match = (l == p_locale);
-		if (!exact_match) {
-			if (near_match) {
-				continue; // Only near-match once, but keep looking for exact matches.
+		int score = compare_locales(p_locale, l);
+		if (score > 0 && score >= best_score) {
+			StringName r;
+			if (!plural) {
+				r = t->get_message(p_message, p_context);
+			} else {
+				r = t->get_plural_message(p_message, p_message_plural, p_n, p_context);
 			}
-			if (get_language_code(l) != lang) {
-				continue; // Language code does not match.
+			if (!r) {
+				continue;
+			}
+			res = r;
+			best_score = score;
+			if (score == 10) {
+				break; // Exact match, skip the rest.
 			}
-		}
-
-		StringName r;
-		if (!plural) {
-			r = t->get_message(p_message, p_context);
-		} else {
-			r = t->get_plural_message(p_message, p_message_plural, p_n, p_context);
-		}
-
-		if (!r) {
-			continue;
-		}
-		res = r;
-
-		if (exact_match) {
-			break;
-		} else {
-			near_match = true;
 		}
 	}
 
@@ -1287,7 +653,7 @@ bool TranslationServer::_load_translations(const String &p_from) {
 void TranslationServer::setup() {
 	String test = GLOBAL_DEF("internationalization/locale/test", "");
 	test = test.strip_edges();
-	if (test != "") {
+	if (!test.is_empty()) {
 		set_locale(test);
 	} else {
 		set_locale(OS::get_singleton()->get_locale());
@@ -1305,18 +671,7 @@ void TranslationServer::setup() {
 	pseudolocalization_skip_placeholders_enabled = GLOBAL_DEF("internationalization/pseudolocalization/skip_placeholders", true);
 
 #ifdef TOOLS_ENABLED
-	{
-		String options = "";
-		int idx = 0;
-		while (locale_list[idx]) {
-			if (idx > 0) {
-				options += ",";
-			}
-			options += locale_list[idx];
-			idx++;
-		}
-		ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_ENUM, options));
-	}
+	ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_LOCALE_ID, ""));
 #endif
 }
 
@@ -1330,8 +685,12 @@ Ref<Translation> TranslationServer::get_tool_translation() const {
 
 String TranslationServer::get_tool_locale() {
 #ifdef TOOLS_ENABLED
-	if (TranslationServer::get_singleton()->get_tool_translation().is_valid() && (Engine::get_singleton()->is_editor_hint() || Main::is_project_manager())) {
-		return tool_translation->get_locale();
+	if (Engine::get_singleton()->is_editor_hint() || Engine::get_singleton()->is_project_manager_hint()) {
+		if (TranslationServer::get_singleton()->get_tool_translation().is_valid()) {
+			return tool_translation->get_locale();
+		} else {
+			return "en";
+		}
 	} else {
 #else
 	{
@@ -1532,7 +891,7 @@ String TranslationServer::wrap_with_fakebidi_characters(String &p_message) const
 	return res;
 }
 
-String TranslationServer::add_padding(String &p_message, int p_length) const {
+String TranslationServer::add_padding(const String &p_message, int p_length) const {
 	String res;
 	String prefix = pseudolocalization_prefix;
 	String suffix;
@@ -1548,7 +907,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const {
 }
 
 const char32_t *TranslationServer::get_accented_version(char32_t p_character) const {
-	if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) {
+	if (!is_ascii_char(p_character)) {
 		return nullptr;
 	}
 
@@ -1562,14 +921,27 @@ const char32_t *TranslationServer::get_accented_version(char32_t p_character) co
 }
 
 bool TranslationServer::is_placeholder(String &p_message, int p_index) const {
-	return p_message[p_index] == '%' && p_index < p_message.size() - 1 &&
-		   (p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' ||
-				   p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f');
+	return p_index < p_message.size() - 1 && p_message[p_index] == '%' &&
+			(p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' ||
+					p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f');
 }
 
 void TranslationServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_locale", "locale"), &TranslationServer::set_locale);
 	ClassDB::bind_method(D_METHOD("get_locale"), &TranslationServer::get_locale);
+	ClassDB::bind_method(D_METHOD("get_tool_locale"), &TranslationServer::get_tool_locale);
+
+	ClassDB::bind_method(D_METHOD("compare_locales", "locale_a", "locale_b"), &TranslationServer::compare_locales);
+	ClassDB::bind_method(D_METHOD("standardize_locale", "locale"), &TranslationServer::standardize_locale);
+
+	ClassDB::bind_method(D_METHOD("get_all_languages"), &TranslationServer::get_all_languages);
+	ClassDB::bind_method(D_METHOD("get_language_name", "language"), &TranslationServer::get_language_name);
+
+	ClassDB::bind_method(D_METHOD("get_all_scripts"), &TranslationServer::get_all_scripts);
+	ClassDB::bind_method(D_METHOD("get_script_name", "script"), &TranslationServer::get_script_name);
+
+	ClassDB::bind_method(D_METHOD("get_all_countries"), &TranslationServer::get_all_countries);
+	ClassDB::bind_method(D_METHOD("get_country_name", "country"), &TranslationServer::get_country_name);
 
 	ClassDB::bind_method(D_METHOD("get_locale_name", "locale"), &TranslationServer::get_locale_name);
 
@@ -1603,8 +975,5 @@ void TranslationServer::load_translations() {
 
 TranslationServer::TranslationServer() {
 	singleton = this;
-
-	for (int i = 0; locale_list[i]; ++i) {
-		locale_name_map.insert(locale_list[i], String::utf8(locale_names[i]));
-	}
+	init_locale_info();
 }
diff --git a/core/string/translation.h b/core/string/translation.h
index 6aec0bb8ea..20c6ebd5a5 100644
--- a/core/string/translation.h
+++ b/core/string/translation.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -41,7 +41,7 @@ class Translation : public Resource {
 	RES_BASE_EXTENSION("translation");
 
 	String locale = "en";
-	Map<StringName, StringName> translation_map;
+	HashMap<StringName, StringName> translation_map;
 
 	virtual Vector<String> _get_message_list() const;
 	virtual Dictionary _get_messages() const;
@@ -74,12 +74,10 @@ class TranslationServer : public Object {
 	String locale = "en";
 	String fallback;
 
-	Set<Ref<Translation>> translations;
+	HashSet<Ref<Translation>> translations;
 	Ref<Translation> tool_translation;
 	Ref<Translation> doc_translation;
 
-	Map<String, String> locale_name_map;
-
 	bool enabled = true;
 
 	bool pseudolocalization_enabled = false;
@@ -98,7 +96,7 @@ class TranslationServer : public Object {
 	String double_vowels(String &p_message) const;
 	String replace_with_accented_string(String &p_message) const;
 	String wrap_with_fakebidi_characters(String &p_message) const;
-	String add_padding(String &p_message, int p_length) const;
+	String add_padding(const String &p_message, int p_length) const;
 	const char32_t *get_accented_version(char32_t p_character) const;
 	bool is_placeholder(String &p_message, int p_index) const;
 
@@ -109,6 +107,23 @@ class TranslationServer : public Object {
 
 	static void _bind_methods();
 
+	struct LocaleScriptInfo {
+		String name;
+		String script;
+		String default_country;
+		HashSet<String> supported_countries;
+	};
+	static Vector<LocaleScriptInfo> locale_script_info;
+
+	static HashMap<String, String> language_map;
+	static HashMap<String, String> script_map;
+	static HashMap<String, String> locale_rename_map;
+	static HashMap<String, String> country_name_map;
+	static HashMap<String, String> country_rename_map;
+	static HashMap<String, String> variant_map;
+
+	void init_locale_info();
+
 public:
 	_FORCE_INLINE_ static TranslationServer *get_singleton() { return singleton; }
 
@@ -119,6 +134,15 @@ public:
 	String get_locale() const;
 	Ref<Translation> get_translation_object(const String &p_locale);
 
+	Vector<String> get_all_languages() const;
+	String get_language_name(const String &p_language) const;
+
+	Vector<String> get_all_scripts() const;
+	String get_script_name(const String &p_script) const;
+
+	Vector<String> get_all_countries() const;
+	String get_country_name(const String &p_country) const;
+
 	String get_locale_name(const String &p_locale) const;
 
 	Array get_loaded_locales() const;
@@ -136,11 +160,9 @@ public:
 	void set_editor_pseudolocalization(bool p_enabled);
 	void reload_pseudolocalization();
 
-	static Vector<String> get_all_locales();
-	static Vector<String> get_all_locale_names();
-	static bool is_locale_valid(const String &p_locale);
-	static String standardize_locale(const String &p_locale);
-	static String get_language_code(const String &p_locale);
+	String standardize_locale(const String &p_locale) const;
+
+	int compare_locales(const String &p_locale_a, const String &p_locale_b) const;
 
 	String get_tool_locale();
 	void set_tool_translation(const Ref<Translation> &p_translation);
diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp
index 1da00aa54b..fa656b634d 100644
--- a/core/string/translation_po.cpp
+++ b/core/string/translation_po.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -35,7 +35,7 @@
 #ifdef DEBUG_TRANSLATION_PO
 void TranslationPO::print_translation_map() {
 	Error err;
-	FileAccess *file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err);
+	Ref<FileAccess> file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err);
 	if (err != OK) {
 		ERR_PRINT("Failed to open translation_map_print_test.txt");
 		return;
@@ -62,7 +62,6 @@ void TranslationPO::print_translation_map() {
 			file->store_line("");
 		}
 	}
-	file->close();
 }
 #endif
 
@@ -71,21 +70,14 @@ Dictionary TranslationPO::_get_messages() const {
 
 	Dictionary d;
 
-	List<StringName> context_l;
-	translation_map.get_key_list(&context_l);
-	for (const StringName &ctx : context_l) {
-		const HashMap<StringName, Vector<StringName>> &id_str_map = translation_map[ctx];
-
+	for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
 		Dictionary d2;
-		List<StringName> id_l;
-		id_str_map.get_key_list(&id_l);
-		// Save list of id and strs associated with a context in a temporary dictionary.
-		for (List<StringName>::Element *E2 = id_l.front(); E2; E2 = E2->next()) {
-			StringName id = E2->get();
-			d2[id] = id_str_map[id];
+
+		for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) {
+			d2[E2.key] = E2.value;
 		}
 
-		d[ctx] = d2;
+		d[E.key] = d2;
 	}
 
 	return d;
@@ -275,31 +267,24 @@ void TranslationPO::get_message_list(List<StringName> *r_messages) const {
 	// OptimizedTranslation uses this function to get the list of msgid.
 	// Return all the keys of translation_map under "" context.
 
-	List<StringName> context_l;
-	translation_map.get_key_list(&context_l);
-
-	for (const StringName &E : context_l) {
-		if (String(E) != "") {
+	for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
+		if (E.key != StringName()) {
 			continue;
 		}
 
-		List<StringName> msgid_l;
-		translation_map[E].get_key_list(&msgid_l);
-
-		for (List<StringName>::Element *E2 = msgid_l.front(); E2; E2 = E2->next()) {
-			r_messages->push_back(E2->get());
+		for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) {
+			r_messages->push_back(E2.key);
 		}
 	}
 }
 
 int TranslationPO::get_message_count() const {
-	List<StringName> context_l;
-	translation_map.get_key_list(&context_l);
-
 	int count = 0;
-	for (const StringName &E : context_l) {
-		count += translation_map[E].size();
+
+	for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
+		count += E.value.size();
 	}
+
 	return count;
 }
 
diff --git a/core/string/translation_po.h b/core/string/translation_po.h
index 0e1d03d6ca..7d63af2246 100644
--- a/core/string/translation_po.h
+++ b/core/string/translation_po.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
diff --git a/core/string/ucaps.h b/core/string/ucaps.h
index b785ac7879..357d36e703 100644
--- a/core/string/ucaps.h
+++ b/core/string/ucaps.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 397743fb6e..c02be9e5b7 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -35,9 +35,11 @@
 #include "core/math/math_funcs.h"
 #include "core/os/memory.h"
 #include "core/string/print_string.h"
+#include "core/string/string_name.h"
 #include "core/string/translation.h"
 #include "core/string/ucaps.h"
 #include "core/variant/variant.h"
+#include "core/version_generated.gen.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -53,34 +55,14 @@
 
 static const int MAX_DECIMALS = 32;
 
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
-	return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
-	return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
-	return (c >= 'a' && c <= 'z');
-}
-
 static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
-	return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
 }
 
 const char CharString::_null = 0;
 const char16_t Char16String::_null = 0;
 const char32_t String::_null = 0;
 
-bool is_symbol(char32_t c) {
-	return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
 	const String &s = p_s;
 	int beg = CLAMP(p_col, 0, s.length());
@@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const {
 }
 
 Char16String &Char16String::operator+=(char16_t p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	set(length() - 1, p_char);
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+
+	char16_t *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
 
 	return *this;
 }
 
-Char16String &Char16String::operator=(const char16_t *p_cstr) {
+void Char16String::operator=(const char16_t *p_cstr) {
 	copy_from(p_cstr);
-	return *this;
 }
 
 const char16_t *Char16String::get_data() const {
@@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const {
 }
 
 CharString &CharString::operator+=(char p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	set(length() - 1, p_char);
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+
+	char *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
 
 	return *this;
 }
 
-CharString &CharString::operator=(const char *p_cstr) {
+void CharString::operator=(const char *p_cstr) {
 	copy_from(p_cstr);
-	return *this;
 }
 
 const char *CharString::get_data() const {
@@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) {
 		return;
 	}
 
-	int len = 0;
-	const char *ptr = p_cstr;
-	while (*(ptr++) != 0) {
-		len++;
-	}
+	const size_t len = strlen(p_cstr);
 
 	if (len == 0) {
 		resize(0);
@@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) {
 
 	char32_t *dst = this->ptrw();
 
-	for (int i = 0; i < len + 1; i++) {
-		dst[i] = p_cstr[i];
+	for (size_t i = 0; i <= len; i++) {
+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+		if (c == 0 && i < len) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 }
 
@@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
 	char32_t *dst = this->ptrw();
 
 	for (int i = 0; i < len; i++) {
-		dst[i] = p_cstr[i];
+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+		if (c == 0) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 	dst[len] = 0;
 }
@@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
 }
 
 void String::copy_from(const char32_t &p_char) {
-	resize(2);
-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
-		set(0, 0xfffd);
-	} else {
-		set(0, p_char);
+	if (p_char == 0) {
+		print_unicode_error("NUL character", true);
+		return;
+	}
+	if ((p_char & 0xfffff800) == 0xd800) {
+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+	}
+	if (p_char > 0x10ffff) {
+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
 	}
-	set(1, 0);
+
+	resize(2);
+
+	char32_t *dst = ptrw();
+	dst[0] = p_char;
+	dst[1] = 0;
 }
 
 void String::copy_from(const char32_t *p_cstr) {
@@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
 // p_length <= p_char strlen
 void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
 	resize(p_length + 1);
-	set(p_length, 0);
-
 	char32_t *dst = ptrw();
+	dst[p_length] = 0;
 
 	for (int i = 0; i < p_length; i++) {
-		if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
-			dst[i] = 0xfffd;
-		} else {
-			dst[i] = p_char[i];
+		if (p_char[i] == 0) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+			continue;
+		}
+		if ((p_char[i] & 0xfffff800) == 0xd800) {
+			print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
 		}
+		if (p_char[i] > 0x10ffff) {
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+		}
+		dst[i] = p_char[i];
 	}
 }
 
@@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const {
 	return res;
 }
 
+String String::operator+(char32_t p_char) const {
+	String res = *this;
+	res += p_char;
+	return res;
+}
+
 String operator+(const char *p_chr, const String &p_str) {
 	String tmp = p_chr;
 	tmp += p_str;
@@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
 	// wchar_t is 16-bit
 	String tmp = String::utf16((const char16_t *)p_chr);
 #else
-	// wchar_t is 32-bi
+	// wchar_t is 32-bit
 	String tmp = (const char32_t *)p_chr;
 #endif
 	tmp += p_str;
@@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) {
 }
 
 String &String::operator+=(const String &p_str) {
-	if (is_empty()) {
+	const int lhs_len = length();
+	if (lhs_len == 0) {
 		*this = p_str;
 		return *this;
 	}
 
-	if (p_str.is_empty()) {
+	const int rhs_len = p_str.length();
+	if (rhs_len == 0) {
 		return *this;
 	}
 
-	int from = length();
-
-	resize(length() + p_str.size());
+	resize(lhs_len + rhs_len + 1);
 
-	const char32_t *src = p_str.get_data();
-	char32_t *dst = ptrw();
+	const char32_t *src = p_str.ptr();
+	char32_t *dst = ptrw() + lhs_len;
 
-	set(length(), 0);
-
-	for (int i = 0; i < p_str.length(); i++) {
-		dst[from + i] = src[i];
-	}
+	// Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
+	memcpy(dst, src, rhs_len * sizeof(char32_t));
+	*(dst + rhs_len) = _null;
 
 	return *this;
 }
@@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) {
 		return *this;
 	}
 
-	int src_len = 0;
-	const char *ptr = p_str;
-	while (*(ptr++) != 0) {
-		src_len++;
-	}
-
-	int from = length();
+	const int lhs_len = length();
+	const size_t rhs_len = strlen(p_str);
 
-	resize(from + src_len + 1);
-
-	char32_t *dst = ptrw();
+	resize(lhs_len + rhs_len + 1);
 
-	set(length(), 0);
+	char32_t *dst = ptrw() + lhs_len;
 
-	for (int i = 0; i < src_len; i++) {
-		dst[from + i] = p_str[i];
+	for (size_t i = 0; i <= rhs_len; i++) {
+		uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
+		if (c == 0 && i < rhs_len) {
+			print_unicode_error("NUL character", true);
+			dst[i] = 0x20;
+		} else {
+			dst[i] = c;
+		}
 	}
 
 	return *this;
@@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) {
 }
 
 String &String::operator+=(char32_t p_char) {
-	resize(size() ? size() + 1 : 2);
-	set(length(), 0);
-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
-		set(length() - 1, 0xfffd);
-	} else {
-		set(length() - 1, p_char);
+	if (p_char == 0) {
+		print_unicode_error("NUL character", true);
+		return *this;
+	}
+	if ((p_char & 0xfffff800) == 0xd800) {
+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+	}
+	if (p_char > 0x10ffff) {
+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
 	}
 
+	const int lhs_len = length();
+	resize(lhs_len + 2);
+	char32_t *dst = ptrw();
+	dst[lhs_len] = p_char;
+	dst[lhs_len + 1] = 0;
+
 	return *this;
 }
 
@@ -952,10 +970,6 @@ const char32_t *String::get_data() const {
 	return size() ? &operator[](0) : &zero;
 }
 
-void String::erase(int p_pos, int p_chars) {
-	*this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars)));
-}
-
 String String::capitalize() const {
 	String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges();
 	String cap;
@@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
 	int start_index = 0;
 
 	for (int i = 1; i < this->size(); i++) {
-		bool is_upper = is_upper_case(cstr[i]);
+		bool is_upper = is_ascii_upper_case(cstr[i]);
 		bool is_number = is_digit(cstr[i]);
 
 		bool are_next_2_lower = false;
 		bool is_next_lower = false;
 		bool is_next_number = false;
-		bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+		bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
 		bool was_precedent_number = is_digit(cstr[i - 1]);
 
 		if (i + 2 < this->size()) {
-			are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+			are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
 		}
 
 		if (i + 1 < this->size()) {
-			is_next_lower = is_lower_case(cstr[i + 1]);
+			is_next_lower = is_ascii_lower_case(cstr[i + 1]);
 			is_next_number = is_digit(cstr[i + 1]);
 		}
 
@@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
 }
 
 String String::num_real(double p_num, bool p_trailing) {
-	if (Math::is_nan(p_num)) {
-		return "nan";
-	}
-
-	if (Math::is_inf(p_num)) {
-		if (signbit(p_num)) {
-			return "-inf";
+	if (p_num == (double)(int64_t)p_num) {
+		if (p_trailing) {
+			return num_int64((int64_t)p_num) + ".0";
 		} else {
-			return "inf";
+			return num_int64((int64_t)p_num);
 		}
 	}
-
-	String s;
-	String sd;
-
-	// Integer part.
-
-	bool neg = p_num < 0;
-	p_num = ABS(p_num);
-	int64_t intn = (int64_t)p_num;
-
-	// Decimal part.
-
-	if (intn != p_num) {
-		double dec = p_num - (double)intn;
-
-		int digit = 0;
-
 #ifdef REAL_T_IS_DOUBLE
-		int decimals = 14;
-		double tolerance = 1e-14;
+	int decimals = 14;
 #else
-		int decimals = 6;
-		double tolerance = 1e-6;
+	int decimals = 6;
 #endif
-		// We want to align the digits to the above sane default, so we only
-		// need to subtract log10 for numbers with a positive power of ten.
-		if (p_num > 10) {
-			decimals -= (int)floor(log10(p_num));
-		}
-
-		if (decimals > MAX_DECIMALS) {
-			decimals = MAX_DECIMALS;
-		}
-
-		// In case the value ends up ending in "99999", we want to add a
-		// tiny bit to the value we're checking when deciding when to stop,
-		// so we multiply by slightly above 1 (1 + 1e-7 or 1e-15).
-		double check_multiplier = 1 + tolerance / 10;
-
-		int64_t dec_int = 0;
-		int64_t dec_max = 0;
-
-		while (true) {
-			dec *= 10.0;
-			dec_int = dec_int * 10 + (int64_t)dec % 10;
-			dec_max = dec_max * 10 + 9;
-			digit++;
-
-			if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) {
-				break;
-			}
-
-			if (digit == decimals) {
-				break;
-			}
-		}
-
-		dec *= 10;
-		int last = (int64_t)dec % 10;
-
-		if (last > 5) {
-			if (dec_int == dec_max) {
-				dec_int = 0;
-				intn++;
-			} else {
-				dec_int++;
-			}
-		}
-
-		String decimal;
-		for (int i = 0; i < digit; i++) {
-			char num[2] = { 0, 0 };
-			num[0] = '0' + dec_int % 10;
-			decimal = num + decimal;
-			dec_int /= 10;
-		}
-		sd = '.' + decimal;
-	} else if (p_trailing) {
-		sd = ".0";
-	} else {
-		sd = "";
-	}
-
-	if (intn == 0) {
-		s = "0";
-	} else {
-		while (intn) {
-			char32_t num = '0' + (intn % 10);
-			intn /= 10;
-			s = num + s;
-		}
-	}
-
-	s = s + sd;
-	if (neg) {
-		s = "-" + s;
+	// We want to align the digits to the above sane default, so we only
+	// need to subtract log10 for numbers with a positive power of ten.
+	if (p_num > 10) {
+		decimals -= (int)floor(log10(p_num));
 	}
-	return s;
+	return num(p_num, decimals);
 }
 
 String String::num_scientific(double p_num) {
@@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
 	return ret;
 }
 
+void String::print_unicode_error(const String &p_message, bool p_critical) const {
+	if (p_critical) {
+		print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+	} else {
+		print_error(vformat("Unicode parsing error: %s", p_message));
+	}
+}
+
 CharString String::ascii(bool p_allow_extended) const {
 	if (!length()) {
 		return CharString();
@@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const {
 		if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
 			cs[i] = c;
 		} else {
-			print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
 			cs[i] = 0x20;
 		}
 	}
@@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) {
 	return ret;
 }
 
-bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
-
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
 	if (!p_utf8) {
-		return true;
+		return ERR_INVALID_DATA;
 	}
 
 	String aux;
@@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 		}
 	}
 
+	bool decode_error = false;
+	bool decode_failed = false;
 	{
 		const char *ptrtmp = p_utf8;
 		const char *ptrtmp_limit = &p_utf8[p_len];
 		int skip = 0;
+		uint8_t c_start = 0;
 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
-			if (skip == 0) {
-				uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+			uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
 
+			if (skip == 0) {
+				if (p_skip_cr && c == '\r') {
+					ptrtmp++;
+					continue;
+				}
 				/* Determine the number of characters in sequence */
 				if ((c & 0x80) == 0) {
 					skip = 0;
@@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 					skip = 2;
 				} else if ((c & 0xf8) == 0xf0) {
 					skip = 3;
+				} else if ((c & 0xfc) == 0xf8) {
+					skip = 4;
+				} else if ((c & 0xfe) == 0xfc) {
+					skip = 5;
 				} else {
-					_UNICERROR("invalid skip at " + num_int64(cstr_size));
-					return true; //invalid utf8
+					skip = 0;
+					print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
+					decode_failed = true;
 				}
+				c_start = c;
 
 				if (skip == 1 && (c & 0x1e) == 0) {
-					_UNICERROR("overlong rejected at " + num_int64(cstr_size));
-					return true; //reject overlong
+					print_unicode_error(vformat("Overlong encoding (%x ...)", c));
+					decode_error = true;
 				}
-
 				str_size++;
-
 			} else {
-				--skip;
+				if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
+					print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
+					decode_error = true;
+				}
+				if (c < 0x80 || c > 0xbf) {
+					print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
+					decode_failed = true;
+					skip = 0;
+				} else {
+					--skip;
+				}
 			}
 
 			cstr_size++;
@@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 		}
 
 		if (skip) {
-			_UNICERROR("no space left");
-			return true; //not enough space
+			print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
+			decode_failed = true;
 		}
 	}
 
 	if (str_size == 0) {
 		clear();
-		return false;
+		return OK; // empty string
 	}
 
 	resize(str_size + 1);
 	char32_t *dst = ptrw();
 	dst[str_size] = 0;
 
+	int skip = 0;
+	uint32_t unichar = 0;
 	while (cstr_size) {
-		int len = 0;
-
-		/* Determine the number of characters in sequence */
-		if ((*p_utf8 & 0x80) == 0) {
-			len = 1;
-		} else if ((*p_utf8 & 0xe0) == 0xc0) {
-			len = 2;
-		} else if ((*p_utf8 & 0xf0) == 0xe0) {
-			len = 3;
-		} else if ((*p_utf8 & 0xf8) == 0xf0) {
-			len = 4;
-		} else {
-			_UNICERROR("invalid len");
-			return true; //invalid UTF8
-		}
-
-		if (len > cstr_size) {
-			_UNICERROR("no space left");
-			return true; //not enough space
-		}
+		uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
 
-		if (len == 2 && (*p_utf8 & 0x1E) == 0) {
-			_UNICERROR("no space left");
-			return true; //reject overlong
-		}
-
-		/* Convert the first character */
-
-		uint32_t unichar = 0;
-
-		if (len == 1) {
-			unichar = *p_utf8;
+		if (skip == 0) {
+			if (p_skip_cr && c == '\r') {
+				p_utf8++;
+				continue;
+			}
+			/* Determine the number of characters in sequence */
+			if ((c & 0x80) == 0) {
+				*(dst++) = c;
+				unichar = 0;
+				skip = 0;
+			} else if ((c & 0xe0) == 0xc0) {
+				unichar = (0xff >> 3) & c;
+				skip = 1;
+			} else if ((c & 0xf0) == 0xe0) {
+				unichar = (0xff >> 4) & c;
+				skip = 2;
+			} else if ((c & 0xf8) == 0xf0) {
+				unichar = (0xff >> 5) & c;
+				skip = 3;
+			} else if ((c & 0xfc) == 0xf8) {
+				unichar = (0xff >> 6) & c;
+				skip = 4;
+			} else if ((c & 0xfe) == 0xfc) {
+				unichar = (0xff >> 7) & c;
+				skip = 5;
+			} else {
+				*(dst++) = 0x20;
+				unichar = 0;
+				skip = 0;
+			}
 		} else {
-			unichar = (0xff >> (len + 1)) & *p_utf8;
-
-			for (int i = 1; i < len; i++) {
-				if ((p_utf8[i] & 0xc0) != 0x80) {
-					_UNICERROR("invalid utf8");
-					return true; //invalid utf8
-				}
-				if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
-					_UNICERROR("invalid utf8 overlong");
-					return true; //no overlong
+			if (c < 0x80 || c > 0xbf) {
+				*(dst++) = 0x20;
+				skip = 0;
+			} else {
+				unichar = (unichar << 6) | (c & 0x3f);
+				--skip;
+				if (skip == 0) {
+					if (unichar == 0) {
+						print_unicode_error("NUL character", true);
+						decode_failed = true;
+						unichar = 0x20;
+					}
+					if ((unichar & 0xfffff800) == 0xd800) {
+						print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
+						decode_error = true;
+					}
+					if (unichar > 0x10ffff) {
+						print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
+						decode_error = true;
+					}
+					*(dst++) = unichar;
 				}
-				unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
 			}
 		}
-		if (unichar >= 0xd800 && unichar <= 0xdfff) {
-			_UNICERROR("invalid code point");
-			return CharString();
-		}
 
-		*(dst++) = unichar;
-		cstr_size -= len;
-		p_utf8 += len;
+		cstr_size--;
+		p_utf8++;
+	}
+	if (skip) {
+		*(dst++) = 0x20;
 	}
 
-	return false;
-#undef _UNICERROR
+	if (decode_failed) {
+		return ERR_INVALID_DATA;
+	} else if (decode_error) {
+		return ERR_PARSE_ERROR;
+	} else {
+		return OK;
+	}
 }
 
 CharString String::utf8() const {
@@ -1881,15 +1846,17 @@ CharString String::utf8() const {
 			fl += 2;
 		} else if (c <= 0xffff) { // 16 bits
 			fl += 3;
-		} else if (c <= 0x0010ffff) { // 21 bits
+		} else if (c <= 0x001fffff) { // 21 bits
 			fl += 4;
+		} else if (c <= 0x03ffffff) { // 26 bits
+			fl += 5;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
+		} else if (c <= 0x7fffffff) { // 31 bits
+			fl += 6;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
 		} else {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return CharString();
-		}
-		if (c >= 0xd800 && c <= 0xdfff) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return CharString();
+			fl += 1;
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
 		}
 	}
 
@@ -1915,11 +1882,26 @@ CharString String::utf8() const {
 			APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
-		} else { // 21 bits
+		} else if (c <= 0x001fffff) { // 21 bits
 			APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else if (c <= 0x03ffffff) { // 26 bits
+			APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else if (c <= 0x7fffffff) { // 31 bits
+			APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+		} else {
+			APPEND_CHAR(0x20);
 		}
 	}
 #undef APPEND_CHAR
@@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
 	return ret;
 }
 
-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
-
+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
 	if (!p_utf16) {
-		return true;
+		return ERR_INVALID_DATA;
 	}
 
 	String aux;
@@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
 		}
 	}
 
+	bool decode_error = false;
 	{
 		const char16_t *ptrtmp = p_utf16;
 		const char16_t *ptrtmp_limit = &p_utf16[p_len];
-		int skip = 0;
+		uint32_t c_prev = 0;
+		bool skip = false;
 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
 			uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
-			if (skip == 0) {
-				if ((c & 0xfffffc00) == 0xd800) {
-					skip = 1; // lead surrogate
-				} else if ((c & 0xfffffc00) == 0xdc00) {
-					_UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
-					return true; // invalid UTF16
-				} else {
-					skip = 0;
+
+			if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+				if (skip) {
+					print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
+					decode_error = true;
 				}
-				str_size++;
-			} else {
-				if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
-					--skip;
+				skip = true;
+			} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+				if (skip) {
+					str_size--;
 				} else {
-					_UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
-					return true; // invalid UTF16
+					print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
+					decode_error = true;
 				}
+				skip = false;
+			} else {
+				skip = false;
 			}
 
+			c_prev = c;
+			str_size++;
 			cstr_size++;
 			ptrtmp++;
 		}
 
 		if (skip) {
-			_UNICERROR("no space left");
-			return true; // not enough space
+			print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
+			decode_error = true;
 		}
 	}
 
 	if (str_size == 0) {
 		clear();
-		return false;
+		return OK; // empty string
 	}
 
 	resize(str_size + 1);
 	char32_t *dst = ptrw();
 	dst[str_size] = 0;
 
+	bool skip = false;
+	uint32_t c_prev = 0;
 	while (cstr_size) {
-		int len = 0;
 		uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
 
-		if ((c & 0xfffffc00) == 0xd800) {
-			len = 2;
+		if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+			if (skip) {
+				*(dst++) = c_prev; // unpaired, store as is
+			}
+			skip = true;
+		} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+			if (skip) {
+				*(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
+			} else {
+				*(dst++) = c; // unpaired, store as is
+			}
+			skip = false;
 		} else {
-			len = 1;
-		}
-
-		if (len > cstr_size) {
-			_UNICERROR("no space left");
-			return true; //not enough space
+			*(dst++) = c;
+			skip = false;
 		}
 
-		uint32_t unichar = 0;
-		if (len == 1) {
-			unichar = c;
-		} else {
-			uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
-			unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
-		}
+		cstr_size--;
+		p_utf16++;
+		c_prev = c;
+	}
 
-		*(dst++) = unichar;
-		cstr_size -= len;
-		p_utf16 += len;
+	if (skip) {
+		*(dst++) = c_prev;
 	}
 
-	return false;
-#undef _UNICERROR
+	if (decode_error) {
+		return ERR_PARSE_ERROR;
+	} else {
+		return OK;
+	}
 }
 
 Char16String String::utf16() const {
@@ -2054,15 +2044,14 @@ Char16String String::utf16() const {
 		uint32_t c = d[i];
 		if (c <= 0xffff) { // 16 bits.
 			fl += 1;
+			if ((c & 0xfffff800) == 0xd800) {
+				print_unicode_error(vformat("Unpaired surrogate (%x)", c));
+			}
 		} else if (c <= 0x10ffff) { // 32 bits.
 			fl += 2;
 		} else {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return Char16String();
-		}
-		if (c >= 0xd800 && c <= 0xdfff) {
-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
-			return Char16String();
+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
+			fl += 1;
 		}
 	}
 
@@ -2081,9 +2070,11 @@ Char16String String::utf16() const {
 
 		if (c <= 0xffff) { // 16 bits.
 			APPEND_CHAR(c);
-		} else { // 32 bits.
+		} else if (c <= 0x10ffff) { // 32 bits.
 			APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
 			APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+		} else {
+			APPEND_CHAR(0x20);
 		}
 	}
 #undef APPEND_CHAR
@@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const {
 		}
 		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
 		bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
-		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 		hex *= 16;
 		hex += n;
 		s++;
@@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const {
 		}
 		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
 		bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
-		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 		binary *= 2;
 		binary += n;
 		s++;
@@ -2217,7 +2208,7 @@ int64_t String::to_int() const {
 		char32_t c = operator[](i);
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) {
 		char c = p_str[i];
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) {
 		wchar_t c = p_str[i];
 		if (is_digit(c)) {
 			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 			integer *= 10;
 			integer += c - '0';
 
@@ -2308,7 +2299,7 @@ bool String::is_numeric() const {
 				return false;
 			}
 			dot = true;
-		} else if (c < '0' || c > '9') {
+		} else if (!is_digit(c)) {
 			return false;
 		}
 	}
@@ -2317,28 +2308,33 @@ bool String::is_numeric() const {
 }
 
 template <class C>
-static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number,
-				 * optionally preceded by white space. Must
-				 * have form "-I.FE-X", where I is the integer
-				 * part of the mantissa, F is the fractional
-				 * part of the mantissa, and X is the
-				 * exponent. Either of the signs may be "+",
-				 * "-", or omitted. Either I or F may be
-				 * omitted, or both. The decimal point isn't
-				 * necessary unless F is present. The "E" may
-				 * actually be an "e". E and X may both be
-				 * omitted (but not just one). */
-		C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's
-				 * address here. */
-{
-	static const int maxExponent = 511; /* Largest possible base 10 exponent.  Any
-					 * exponent larger than this will already
-					 * produce underflow or overflow, so there's
-					 * no need to worry about additional digits.
-					 */
-	static const double powersOf10[] = { /* Table giving binary powers of 10.  Entry */
-		10., /* is 10^2^i.  Used to convert decimal */
-		100., /* exponents into floating-point numbers. */
+static double built_in_strtod(
+		/* A decimal ASCII floating-point number,
+		 * optionally preceded by white space. Must
+		 * have form "-I.FE-X", where I is the integer
+		 * part of the mantissa, F is the fractional
+		 * part of the mantissa, and X is the
+		 * exponent. Either of the signs may be "+",
+		 * "-", or omitted. Either I or F may be
+		 * omitted, or both. The decimal point isn't
+		 * necessary unless F is present. The "E" may
+		 * actually be an "e". E and X may both be
+		 * omitted (but not just one). */
+		const C *string,
+		/* If non-nullptr, store terminating Cacter's
+		 * address here. */
+		C **endPtr = nullptr) {
+	/* Largest possible base 10 exponent. Any
+	 * exponent larger than this will already
+	 * produce underflow or overflow, so there's
+	 * no need to worry about additional digits. */
+	static const int maxExponent = 511;
+	/* Table giving binary powers of 10. Entry
+	 * is 10^2^i. Used to convert decimal
+	 * exponents into floating-point numbers. */
+	static const double powersOf10[] = {
+		10.,
+		100.,
 		1.0e4,
 		1.0e8,
 		1.0e16,
@@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	const double *d;
 	const C *p;
 	int c;
-	int exp = 0; /* Exponent read from "EX" field. */
-	int fracExp = 0; /* Exponent that derives from the fractional
-				 * part. Under normal circumstances, it is
-				 * the negative of the number of digits in F.
-				 * However, if I is very long, the last digits
-				 * of I get dropped (otherwise a long I with a
-				 * large negative exponent could cause an
-				 * unnecessary overflow on I alone). In this
-				 * case, fracExp is incremented one for each
-				 * dropped digit. */
-	int mantSize; /* Number of digits in mantissa. */
-	int decPt; /* Number of mantissa digits BEFORE decimal
-				 * point. */
-	const C *pExp; /* Temporarily holds location of exponent in
-				 * string. */
+	/* Exponent read from "EX" field. */
+	int exp = 0;
+	/* Exponent that derives from the fractional
+	 * part. Under normal circumstances, it is
+	 * the negative of the number of digits in F.
+	 * However, if I is very long, the last digits
+	 * of I get dropped (otherwise a long I with a
+	 * large negative exponent could cause an
+	 * unnecessary overflow on I alone). In this
+	 * case, fracExp is incremented one for each
+	 * dropped digit. */
+	int fracExp = 0;
+	/* Number of digits in mantissa. */
+	int mantSize;
+	/* Number of mantissa digits BEFORE decimal point. */
+	int decPt;
+	/* Temporarily holds location of exponent in string. */
+	const C *pExp;
 
 	/*
-     * Strip off leading blanks and check for a sign.
-     */
+	 * Strip off leading blanks and check for a sign.
+	 */
 
 	p = string;
 	while (*p == ' ' || *p == '\t' || *p == '\n') {
@@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Count the number of digits in the mantissa (including the decimal
-     * point), and also locate the decimal point.
-     */
+	 * Count the number of digits in the mantissa (including the decimal
+	 * point), and also locate the decimal point.
+	 */
 
 	decPt = -1;
 	for (mantSize = 0;; mantSize += 1) {
@@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Now suck up the digits in the mantissa. Use two integers to collect 9
-     * digits each (this is faster than using floating-point). If the mantissa
-     * has more than 18 digits, ignore the extras, since they can't affect the
-     * value anyway.
-     */
+	 * Now suck up the digits in the mantissa. Use two integers to collect 9
+	 * digits each (this is faster than using floating-point). If the mantissa
+	 * has more than 18 digits, ignore the extras, since they can't affect the
+	 * value anyway.
+	 */
 
 	pExp = p;
 	p -= mantSize;
@@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Skim off the exponent.
-     */
+	 * Skim off the exponent.
+	 */
 
 	p = pExp;
 	if ((*p == 'E') || (*p == 'e')) {
@@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
 	}
 
 	/*
-     * Generate a floating-point number that represents the exponent. Do this
-     * by processing the exponent one bit at a time to combine many powers of
-     * 2 of 10. Then combine the exponent with the fraction.
-     */
+	 * Generate a floating-point number that represents the exponent. Do this
+	 * by processing the exponent one bit at a time to combine many powers of
+	 * 2 of 10. Then combine the exponent with the fraction.
+	 */
 
 	if (exp < 0) {
 		expSign = true;
@@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
 								return INT64_MIN;
 							}
 						} else {
-							ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+							ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
 						}
 					}
 					integer *= 10;
@@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const {
 	return _base_is_subsequence_of(p_string, false);
 }
 
-bool String::is_subsequence_ofi(const String &p_string) const {
+bool String::is_subsequence_ofn(const String &p_string) const {
 	return _base_is_subsequence_of(p_string, true);
 }
 
@@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const {
 String String::repeat(int p_count) const {
 	ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
 
-	String new_string;
-	const char32_t *src = this->get_data();
-
-	new_string.resize(length() * p_count + 1);
-	new_string[length() * p_count] = 0;
-
-	for (int i = 0; i < p_count; i++) {
-		for (int j = 0; j < length(); j++) {
-			new_string[i * length() + j] = src[j];
-		}
-	}
-
+	int len = length();
+	String new_string = *this;
+	new_string.resize(p_count * len + 1);
+
+	char32_t *dst = new_string.ptrw();
+	int offset = 1;
+	int stride = 1;
+	while (offset < p_count) {
+		memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
+		offset += stride;
+		stride = MIN(stride * 2, p_count - offset);
+	}
+	dst[p_count * len] = _null;
 	return new_string;
 }
 
-String String::left(int p_pos) const {
-	if (p_pos < 0) {
-		p_pos = length() + p_pos;
+String String::left(int p_len) const {
+	if (p_len < 0) {
+		p_len = length() + p_len;
 	}
 
-	if (p_pos <= 0) {
+	if (p_len <= 0) {
 		return "";
 	}
 
-	if (p_pos >= length()) {
+	if (p_len >= length()) {
 		return *this;
 	}
 
-	return substr(0, p_pos);
+	return substr(0, p_len);
 }
 
-String String::right(int p_pos) const {
-	if (p_pos < 0) {
-		p_pos = length() + p_pos;
+String String::right(int p_len) const {
+	if (p_len < 0) {
+		p_len = length() + p_len;
 	}
 
-	if (p_pos <= 0) {
+	if (p_len <= 0) {
 		return "";
 	}
 
-	if (p_pos >= length()) {
+	if (p_len >= length()) {
 		return *this;
 	}
 
-	return substr(length() - p_pos);
+	return substr(length() - p_len);
 }
 
 char32_t String::unicode_at(int p_idx) const {
@@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const {
 	return operator[](p_idx);
 }
 
+String String::indent(const String &p_prefix) const {
+	String new_string;
+	int line_start = 0;
+
+	for (int i = 0; i < length(); i++) {
+		const char32_t c = operator[](i);
+		if (c == '\n') {
+			if (i == line_start) {
+				new_string += c; // Leave empty lines empty.
+			} else {
+				new_string += p_prefix + substr(line_start, i - line_start + 1);
+			}
+			line_start = i + 1;
+		}
+	}
+	if (line_start != length()) {
+		new_string += p_prefix + substr(line_start);
+	}
+	return new_string;
+}
+
 String String::dedent() const {
 	String new_string;
 	String indent;
@@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const {
 	return substr(0, end + 1);
 }
 
+bool String::is_network_share_path() const {
+	return begins_with("//") || begins_with("\\\\");
+}
+
 String String::simplify_path() const {
 	String s = *this;
 	String drive;
@@ -3637,6 +3662,9 @@ String String::simplify_path() const {
 	} else if (s.begins_with("user://")) {
 		drive = "user://";
 		s = s.substr(7, s.length());
+	} else if (is_network_share_path()) {
+		drive = s.substr(0, 2);
+		s = s.substr(2, s.length() - 2);
 	} else if (s.begins_with("/") || s.begins_with("\\")) {
 		drive = s.substr(0, 1);
 		s = s.substr(1, s.length() - 1);
@@ -3665,15 +3693,15 @@ String String::simplify_path() const {
 	for (int i = 0; i < dirs.size(); i++) {
 		String d = dirs[i];
 		if (d == ".") {
-			dirs.remove(i);
+			dirs.remove_at(i);
 			i--;
 		} else if (d == "..") {
 			if (i == 0) {
-				dirs.remove(i);
+				dirs.remove_at(i);
 				i--;
 			} else {
-				dirs.remove(i);
-				dirs.remove(i - 1);
+				dirs.remove_at(i);
+				dirs.remove_at(i - 1);
 				i -= 2;
 			}
 		}
@@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const {
 	}
 }
 
+static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
+	if (p_index == 0 && is_digit(p_char)) {
+		return false; // No start with number plz.
+	}
+	return is_ascii_identifier_char(p_char);
+}
+
+String String::validate_identifier() const {
+	if (is_empty()) {
+		return "_"; // Empty string is not a valid identifier;
+	}
+
+	String result = *this;
+	int len = result.length();
+	char32_t *buffer = result.ptrw();
+
+	for (int i = 0; i < len; i++) {
+		if (!_is_valid_identifier_bit(i, buffer[i])) {
+			buffer[i] = '_';
+		}
+	}
+
+	return result;
+}
+
 bool String::is_valid_identifier() const {
 	int len = length();
 
@@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const {
 	const char32_t *str = &operator[](0);
 
 	for (int i = 0; i < len; i++) {
-		if (i == 0) {
-			if (is_digit(str[0])) {
-				return false; // no start with number plz
-			}
-		}
-
-		bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
-
-		if (!valid_char) {
+		if (!_is_valid_identifier_bit(i, str[i])) {
 			return false;
 		}
 	}
@@ -3775,18 +3820,15 @@ String String::uri_encode() const {
 	const CharString temp = utf8();
 	String res;
 	for (int i = 0; i < temp.length(); ++i) {
-		char ord = temp[i];
-		if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+		uint8_t ord = temp[i];
+		if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
 			res += ord;
 		} else {
-			char h_Val[3];
-#if defined(__GNUC__) || defined(_MSC_VER)
-			snprintf(h_Val, 3, "%02hhX", ord);
-#else
-			sprintf(h_Val, "%02hhX", ord);
-#endif
-			res += "%";
-			res += h_Val;
+			char p[4] = { '%', 0, 0, 0 };
+			static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+			p[1] = hex[ord >> 4];
+			p[2] = hex[ord & 0xF];
+			res += p;
 		}
 	}
 	return res;
@@ -3798,9 +3840,9 @@ String String::uri_decode() const {
 	for (int i = 0; i < src.length(); ++i) {
 		if (src[i] == '%' && i + 2 < src.length()) {
 			char ord1 = src[i + 1];
-			if (is_digit(ord1) || is_upper_case(ord1)) {
+			if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
 				char ord2 = src[i + 2];
-				if (is_digit(ord2) || is_upper_case(ord2)) {
+				if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
 					char bytes[3] = { (char)ord1, (char)ord2, 0 };
 					res += (char)strtol(bytes, nullptr, 16);
 					i += 2;
@@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
 					for (int i = 2; i < p_src_len; i++) {
 						eat = i + 1;
 						char32_t ct = p_src[i];
-						if (ct == ';' || ct < '0' || ct > '9') {
+						if (ct == ';' || !is_digit(ct)) {
 							break;
 						}
 					}
@@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const {
 
 	int begin = 0;
 
-	while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+	while (begin < end && !is_digit(s[begin])) {
 		begin++;
 	}
 
@@ -4102,7 +4144,7 @@ bool String::is_valid_int() const {
 	}
 
 	for (int i = from; i < len; i++) {
-		if (operator[](i) < '0' || operator[](i) > '9') {
+		if (!is_digit(operator[](i))) {
 			return false; // no start with number plz
 		}
 	}
@@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const {
 		dst += "/";
 	}
 
-	String base;
-
 	if (src.begins_with("res://") && dst.begins_with("res://")) {
-		base = "res:/";
 		src = src.replace("res://", "/");
 		dst = dst.replace("res://", "/");
 
 	} else if (src.begins_with("user://") && dst.begins_with("user://")) {
-		base = "user:/";
 		src = src.replace("user://", "/");
 		dst = dst.replace("user://", "/");
 
@@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const {
 			return p_path; //impossible to do this
 		}
 
-		base = src_begin;
 		src = src.substr(src_begin.length(), src.length());
 		dst = dst.substr(dst_begin.length(), dst.length());
 	}
@@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const {
 		return false;
 	}
 
-	if (stripped == String()) {
+	if (stripped.is_empty()) {
 		return false;
 	}
 
@@ -4338,13 +4375,13 @@ bool String::is_relative_path() const {
 String String::get_base_dir() const {
 	int end = 0;
 
-	// url scheme style base
+	// URL scheme style base.
 	int basepos = find("://");
 	if (basepos != -1) {
 		end = basepos + 3;
 	}
 
-	// windows top level directory base
+	// Windows top level directory base.
 	if (end == 0) {
 		basepos = find(":/");
 		if (basepos == -1) {
@@ -4355,7 +4392,24 @@ String String::get_base_dir() const {
 		}
 	}
 
-	// unix root directory base
+	// Windows UNC network share path.
+	if (end == 0) {
+		if (is_network_share_path()) {
+			basepos = find("/", 2);
+			if (basepos == -1) {
+				basepos = find("\\", 2);
+			}
+			int servpos = find("/", basepos + 1);
+			if (servpos == -1) {
+				servpos = find("\\", basepos + 1);
+			}
+			if (servpos != -1) {
+				end = servpos + 1;
+			}
+		}
+	}
+
+	// Unix root directory base.
 	if (end == 0) {
 		if (begins_with("/")) {
 			end = 1;
@@ -4412,7 +4466,7 @@ String String::property_name_encode() const {
 	// as well as '"', '=' or ' ' (32)
 	const char32_t *cstr = get_data();
 	for (int i = 0; cstr[i]; i++) {
-		if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) {
+		if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
 			return "\"" + c_escape_multiline() + "\"";
 		}
 	}
@@ -4421,7 +4475,7 @@ String String::property_name_encode() const {
 }
 
 // Changes made to the set of invalid characters must also be reflected in the String documentation.
-const String String::invalid_node_name_characters = ". : @ / \"";
+const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX;
 
 String String::validate_node_name() const {
 	Vector<String> chars = String::invalid_node_name_characters.split(" ");
@@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const {
 	int min_chars = 0;
 	int min_decimals = 0;
 	bool in_decimals = false;
-	bool pad_with_zeroes = false;
+	bool pad_with_zeros = false;
 	bool left_justified = false;
 	bool show_sign = false;
 
@@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const {
 
 					// Padding.
 					int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars;
-					String pad_char = pad_with_zeroes ? String("0") : String(" ");
+					String pad_char = pad_with_zeros ? String("0") : String(" ");
 					if (left_justified) {
 						str = str.rpad(pad_chars_count, pad_char);
 					} else {
@@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const {
 					}
 
 					// Sign.
-					if (show_sign && value >= 0) {
-						str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+");
-					} else if (value < 0) {
-						str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-");
+					if (show_sign || value < 0) {
+						String sign_char = value < 0 ? "-" : "+";
+						if (left_justified) {
+							str = str.insert(0, sign_char);
+						} else {
+							str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
+						}
 					}
 
 					formatted += str;
@@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const {
 
 					// Padding. Leave room for sign later if required.
 					int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
-					String pad_char = pad_with_zeroes ? String("0") : String(" ");
+					String pad_char = pad_with_zeros ? String("0") : String(" ");
 					if (left_justified) {
-						if (pad_with_zeroes) {
-							return "left justification cannot be used with zeros as the padding";
-						} else {
-							str = str.rpad(pad_chars_count, pad_char);
-						}
+						str = str.rpad(pad_chars_count, pad_char);
 					} else {
 						str = str.lpad(pad_chars_count, pad_char);
 					}
@@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const {
 						if (left_justified) {
 							str = str.insert(0, sign_char);
 						} else {
-							str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char);
+							str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
 						}
 					}
 
@@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const {
 						min_decimals += n;
 					} else {
 						if (c == '0' && min_chars == 0) {
-							pad_with_zeroes = true;
+							if (left_justified) {
+								WARN_PRINT("'0' flag ignored with '-' flag in string format");
+							} else {
+								pad_with_zeros = true;
+							}
 						} else {
 							min_chars *= 10;
 							min_chars += n;
@@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const {
 					// Back to defaults:
 					min_chars = 0;
 					min_decimals = 6;
-					pad_with_zeroes = false;
+					pad_with_zeros = false;
 					left_justified = false;
 					show_sign = false;
 					in_decimals = false;
@@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const {
 }
 
 #ifdef TOOLS_ENABLED
+/**
+ * "Tools TRanslate". Performs string replacement for internationalization
+ * within the editor. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `TTRN()` instead of `TTR()`.
+ *
+ * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTR()` instead.
+ */
 String TTR(const String &p_text, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		return TranslationServer::get_singleton()->tool_translate(p_text, p_context);
@@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) {
 	return p_text;
 }
 
+/**
+ * "Tools TRanslate for N items". Performs string replacement for
+ * internationalization within the editor. A translation context can optionally
+ * be specified to disambiguate between identical source strings in
+ * translations. Use `TTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTRN()` instead.
+ */
 String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
@@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St
 	return p_text_plural;
 }
 
+/**
+ * "Docs TRanslate". Used for the editor class reference documentation,
+ * handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
 String DTR(const String &p_text, const String &p_context) {
 	// Comes straight from the XML, so remove indentation and any trailing whitespace.
 	const String text = p_text.dedent().strip_edges();
 
 	if (TranslationServer::get_singleton()) {
-		return TranslationServer::get_singleton()->doc_translate(text, p_context);
+		return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
 
-	return text;
+	return text.replace("$DOCS_URL", VERSION_DOCS_URL);
 }
 
+/**
+ * "Docs TRanslate for N items". Used for the editor class reference documentation
+ * (with support for plurals), handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
 String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	const String text = p_text.dedent().strip_edges();
 	const String text_plural = p_text_plural.dedent().strip_edges();
 
 	if (TranslationServer::get_singleton()) {
-		return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context);
+		return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
 
 	// Return message based on English plural rule if translation is not possible.
 	if (p_n == 1) {
-		return text;
+		return text.replace("$DOCS_URL", VERSION_DOCS_URL);
 	}
-	return text_plural;
+	return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL);
 }
 #endif
 
+/**
+ * "Run-time TRanslate". Performs string replacement for internationalization
+ * within a running project. The translation string must be supplied by the
+ * project, as Godot does not provide built-in translations for `RTR()` strings
+ * to keep binary size low. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `RTRN()` instead of `RTR()`.
+ *
+ * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTR()` instead.
+ */
 String RTR(const String &p_text, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context);
-		if (rtr == String() || rtr == p_text) {
+		if (rtr.is_empty() || rtr == p_text) {
 			return TranslationServer::get_singleton()->translate(p_text, p_context);
 		} else {
 			return rtr;
@@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) {
 	return p_text;
 }
 
+/**
+ * "Run-time TRanslate for N items". Performs string replacement for
+ * internationalization within a running project. The translation string must be
+ * supplied by the project, as Godot does not provide built-in translations for
+ * `RTRN()` strings to keep binary size low. A translation context can
+ * optionally be specified to disambiguate between identical source strings in
+ * translations. Use `RTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTRN()` instead.
+ */
 String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
 	if (TranslationServer::get_singleton()) {
 		String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
-		if (rtr == String() || rtr == p_text || rtr == p_text_plural) {
+		if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) {
 			return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
 		} else {
 			return rtr;
diff --git a/core/string/ustring.h b/core/string/ustring.h
index 1d80ccf58d..6c3169f136 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -5,8 +5,8 @@
 /*                           GODOT ENGINE                                */
 /*                      https://godotengine.org                          */
 /*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
 /*                                                                       */
 /* Permission is hereby granted, free of charge, to any person obtaining */
 /* a copy of this software and associated documentation files (the       */
@@ -28,10 +28,12 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
+// Note: _GODOT suffix added to avoid conflict with ICU header with the same guard.
+
 #ifndef USTRING_GODOT_H
 #define USTRING_GODOT_H
-// Note: Renamed to avoid conflict with ICU header with the same name.
 
+#include "core/string/char_utils.h"
 #include "core/templates/cowdata.h"
 #include "core/templates/vector.h"
 #include "core/typedefs.h"
@@ -108,13 +110,10 @@ public:
 
 	_FORCE_INLINE_ Char16String() {}
 	_FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); }
-	_FORCE_INLINE_ Char16String &operator=(const Char16String &p_str) {
-		_cowdata._ref(p_str._cowdata);
-		return *this;
-	}
+	_FORCE_INLINE_ void operator=(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); }
 	_FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); }
 
-	Char16String &operator=(const char16_t *p_cstr);
+	void operator=(const char16_t *p_cstr);
 	bool operator<(const Char16String &p_right) const;
 	Char16String &operator+=(char16_t p_char);
 	int length() const { return size() ? size() - 1 : 0; }
@@ -152,13 +151,10 @@ public:
 
 	_FORCE_INLINE_ CharString() {}
 	_FORCE_INLINE_ CharString(const CharString &p_str) { _cowdata._ref(p_str._cowdata); }
-	_FORCE_INLINE_ CharString &operator=(const CharString &p_str) {
-		_cowdata._ref(p_str._cowdata);
-		return *this;
-	}
+	_FORCE_INLINE_ void operator=(const CharString &p_str) { _cowdata._ref(p_str._cowdata); }
 	_FORCE_INLINE_ CharString(const char *p_cstr) { copy_from(p_cstr); }
 
-	CharString &operator=(const char *p_cstr);
+	void operator=(const char *p_cstr);
 	bool operator<(const CharString &p_right) const;
 	CharString &operator+=(char p_char);
 	int length() const { return size() ? size() - 1 : 0; }
@@ -209,7 +205,7 @@ public:
 	_FORCE_INLINE_ char32_t *ptrw() { return _cowdata.ptrw(); }
 	_FORCE_INLINE_ const char32_t *ptr() const { return _cowdata.ptr(); }
 
-	void remove(int p_index) { _cowdata.remove(p_index); }
+	void remove_at(int p_index) { _cowdata.remove_at(p_index); }
 
 	_FORCE_INLINE_ void clear() { resize(0); }
 
@@ -230,6 +226,7 @@ public:
 	bool operator==(const String &p_str) const;
 	bool operator!=(const String &p_str) const;
 	String operator+(const String &p_str) const;
+	String operator+(char32_t p_char) const;
 
 	String &operator+=(const String &);
 	String &operator+=(char32_t p_char);
@@ -275,6 +272,9 @@ public:
 
 	bool is_valid_string() const;
 
+	/* debug, error messages */
+	void print_unicode_error(const String &p_message, bool p_critical = false) const;
+
 	/* complex helpers */
 	String substr(int p_from, int p_chars = -1) const;
 	int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed
@@ -291,7 +291,7 @@ public:
 	bool ends_with(const String &p_string) const;
 	bool is_enclosed_in(const String &p_string) const;
 	bool is_subsequence_of(const String &p_string) const;
-	bool is_subsequence_ofi(const String &p_string) const;
+	bool is_subsequence_ofn(const String &p_string) const;
 	bool is_quoted() const;
 	Vector<String> bigrams() const;
 	float similarity(const String &p_string) const;
@@ -360,8 +360,9 @@ public:
 	int count(const String &p_string, int p_from = 0, int p_to = 0) const;
 	int countn(const String &p_string, int p_from = 0, int p_to = 0) const;
 
-	String left(int p_pos) const;
-	String right(int p_pos) const;
+	String left(int p_len) const;
+	String right(int p_len) const;
+	String indent(const String &p_prefix) const;
 	String dedent() const;
 	String strip_edges(bool left = true, bool right = true) const;
 	String strip_escapes() const;
@@ -376,11 +377,11 @@ public:
 
 	CharString ascii(bool p_allow_extended = false) const;
 	CharString utf8() const;
-	bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error
+	Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false);
 	static String utf8(const char *p_utf8, int p_len = -1);
 
 	Char16String utf16() const;
-	bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error
+	Error parse_utf16(const char16_t *p_utf16, int p_len = -1);
 	static String utf16(const char16_t *p_utf16, int p_len = -1);
 
 	static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
@@ -399,6 +400,8 @@ public:
 	Vector<uint8_t> sha256_buffer() const;
 
 	_FORCE_INLINE_ bool is_empty() const { return length() == 0; }
+	_FORCE_INLINE_ bool contains(const char *p_str) const { return find(p_str) != -1; }
+	_FORCE_INLINE_ bool contains(const String &p_str) const { return find(p_str) != -1; }
 
 	// path functions
 	bool is_absolute_path() const;
@@ -410,6 +413,7 @@ public:
 	String get_file() const;
 	static String humanize_size(uint64_t p_size);
 	String simplify_path() const;
+	bool is_network_share_path() const;
 
 	String xml_escape(bool p_escape_quotes = false) const;
 	String xml_unescape() const;
@@ -427,6 +431,7 @@ public:
 	// node functions
 	static const String invalid_node_name_characters;
 	String validate_node_name() const;
+	String validate_identifier() const;
 
 	bool is_valid_identifier() const;
 	bool is_valid_int() const;
@@ -442,11 +447,7 @@ public:
 
 	_FORCE_INLINE_ String() {}
 	_FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); }
-
-	String &operator=(const String &p_str) {
-		_cowdata._ref(p_str._cowdata);
-		return *this;
-	}
+	_FORCE_INLINE_ void operator=(const String &p_str) { _cowdata._ref(p_str._cowdata); }
 
 	Vector<uint8_t> to_ascii_buffer() const;
 	Vector<uint8_t> to_utf8_buffer() const;
@@ -527,19 +528,24 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St
 #define TTRGET(m_value) TTR(m_value)
 
 #else
-#define TTR(m_value) String()
-#define TTRN(m_value) String()
-#define DTR(m_value) String()
-#define DTRN(m_value) String()
 #define TTRC(m_value) (m_value)
 #define TTRGET(m_value) (m_value)
 #endif
 
+// Use this to mark property names for editor translation.
+// Often for dynamic properties defined in _get_property_list().
+// Property names defined directly inside EDITOR_DEF, GLOBAL_DEF, and ADD_PROPERTY macros don't need this.
+#define PNAME(m_value) (m_value)
+
+// Similar to PNAME, but to mark groups, i.e. properties with PROPERTY_USAGE_GROUP.
+// Groups defined directly inside ADD_GROUP macros don't need this.
+// The arguments are the same as ADD_GROUP. m_prefix is only used for extraction.
+#define GNAME(m_value, m_prefix) (m_value)
+
 // Runtime translate for the public node API.
 String RTR(const String &p_text, const String &p_context = "");
 String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = "");
 
-bool is_symbol(char32_t c);
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end);
 
 _FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) {