summaryrefslogtreecommitdiff
path: root/core/string
diff options
context:
space:
mode:
Diffstat (limited to 'core/string')
-rw-r--r--core/string/char_range.inc1456
-rw-r--r--core/string/char_utils.h112
-rw-r--r--core/string/locales.h1197
-rw-r--r--core/string/node_path.cpp27
-rw-r--r--core/string/node_path.h6
-rw-r--r--core/string/optimized_translation.cpp14
-rw-r--r--core/string/optimized_translation.h6
-rw-r--r--core/string/print_string.cpp114
-rw-r--r--core/string/print_string.h40
-rw-r--r--core/string/string_buffer.h4
-rw-r--r--core/string/string_builder.cpp6
-rw-r--r--core/string/string_builder.h4
-rw-r--r--core/string/string_name.cpp40
-rw-r--r--core/string/string_name.h31
-rw-r--r--core/string/translation.cpp1265
-rw-r--r--core/string/translation.h46
-rw-r--r--core/string/translation_po.cpp47
-rw-r--r--core/string/translation_po.h4
-rw-r--r--core/string/ucaps.h4
-rw-r--r--core/string/ustring.cpp1059
-rw-r--r--core/string/ustring.h64
21 files changed, 3994 insertions, 1552 deletions
diff --git a/core/string/char_range.inc b/core/string/char_range.inc
new file mode 100644
index 0000000000..c0be9016ad
--- /dev/null
+++ b/core/string/char_range.inc
@@ -0,0 +1,1456 @@
+/*************************************************************************/
+/* char_range.inc */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef CHAR_RANGE_INC
+#define CHAR_RANGE_INC
+
+#include "core/typedefs.h"
+
+struct CharRange {
+ char32_t start;
+ char32_t end;
+};
+
+static CharRange xid_start[] = {
+ { 0x41, 0x5a },
+ { 0x5f, 0x5f },
+ { 0x61, 0x7a },
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xba, 0xba },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+ { 0xf8, 0x2c1 },
+ { 0x2c6, 0x2d1 },
+ { 0x2e0, 0x2e4 },
+ { 0x2ec, 0x2ec },
+ { 0x2ee, 0x2ee },
+ { 0x370, 0x374 },
+ { 0x376, 0x377 },
+ { 0x37a, 0x37d },
+ { 0x37f, 0x37f },
+ { 0x386, 0x386 },
+ { 0x388, 0x38a },
+ { 0x38c, 0x38c },
+ { 0x38e, 0x3a1 },
+ { 0x3a3, 0x3f5 },
+ { 0x3f7, 0x481 },
+ { 0x48a, 0x52f },
+ { 0x531, 0x556 },
+ { 0x559, 0x559 },
+ { 0x560, 0x588 },
+ { 0x5d0, 0x5ea },
+ { 0x5ef, 0x5f2 },
+ { 0x620, 0x64a },
+ { 0x66e, 0x66f },
+ { 0x671, 0x6d3 },
+ { 0x6d5, 0x6d5 },
+ { 0x6e5, 0x6e6 },
+ { 0x6ee, 0x6ef },
+ { 0x6fa, 0x6fc },
+ { 0x6ff, 0x6ff },
+ { 0x710, 0x710 },
+ { 0x712, 0x72f },
+ { 0x74d, 0x7a5 },
+ { 0x7b1, 0x7b1 },
+ { 0x7ca, 0x7ea },
+ { 0x7f4, 0x7f5 },
+ { 0x7fa, 0x7fa },
+ { 0x800, 0x815 },
+ { 0x81a, 0x81a },
+ { 0x824, 0x824 },
+ { 0x828, 0x828 },
+ { 0x840, 0x858 },
+ { 0x860, 0x86a },
+ { 0x870, 0x887 },
+ { 0x889, 0x88e },
+ { 0x8a0, 0x8c9 },
+ { 0x904, 0x939 },
+ { 0x93d, 0x93d },
+ { 0x950, 0x950 },
+ { 0x958, 0x961 },
+ { 0x971, 0x980 },
+ { 0x985, 0x98c },
+ { 0x98f, 0x990 },
+ { 0x993, 0x9a8 },
+ { 0x9aa, 0x9b0 },
+ { 0x9b2, 0x9b2 },
+ { 0x9b6, 0x9b9 },
+ { 0x9bd, 0x9bd },
+ { 0x9ce, 0x9ce },
+ { 0x9dc, 0x9dd },
+ { 0x9df, 0x9e1 },
+ { 0x9f0, 0x9f1 },
+ { 0x9fc, 0x9fc },
+ { 0xa05, 0xa0a },
+ { 0xa0f, 0xa10 },
+ { 0xa13, 0xa28 },
+ { 0xa2a, 0xa30 },
+ { 0xa32, 0xa33 },
+ { 0xa35, 0xa36 },
+ { 0xa38, 0xa39 },
+ { 0xa59, 0xa5c },
+ { 0xa5e, 0xa5e },
+ { 0xa72, 0xa74 },
+ { 0xa85, 0xa8d },
+ { 0xa8f, 0xa91 },
+ { 0xa93, 0xaa8 },
+ { 0xaaa, 0xab0 },
+ { 0xab2, 0xab3 },
+ { 0xab5, 0xab9 },
+ { 0xabd, 0xabd },
+ { 0xad0, 0xad0 },
+ { 0xae0, 0xae1 },
+ { 0xaf9, 0xaf9 },
+ { 0xb05, 0xb0c },
+ { 0xb0f, 0xb10 },
+ { 0xb13, 0xb28 },
+ { 0xb2a, 0xb30 },
+ { 0xb32, 0xb33 },
+ { 0xb35, 0xb39 },
+ { 0xb3d, 0xb3d },
+ { 0xb5c, 0xb5d },
+ { 0xb5f, 0xb61 },
+ { 0xb71, 0xb71 },
+ { 0xb83, 0xb83 },
+ { 0xb85, 0xb8a },
+ { 0xb8e, 0xb90 },
+ { 0xb92, 0xb95 },
+ { 0xb99, 0xb9a },
+ { 0xb9c, 0xb9c },
+ { 0xb9e, 0xb9f },
+ { 0xba3, 0xba4 },
+ { 0xba8, 0xbaa },
+ { 0xbae, 0xbb9 },
+ { 0xbd0, 0xbd0 },
+ { 0xc05, 0xc0c },
+ { 0xc0e, 0xc10 },
+ { 0xc12, 0xc28 },
+ { 0xc2a, 0xc39 },
+ { 0xc3d, 0xc3d },
+ { 0xc58, 0xc5a },
+ { 0xc5d, 0xc5d },
+ { 0xc60, 0xc61 },
+ { 0xc80, 0xc80 },
+ { 0xc85, 0xc8c },
+ { 0xc8e, 0xc90 },
+ { 0xc92, 0xca8 },
+ { 0xcaa, 0xcb3 },
+ { 0xcb5, 0xcb9 },
+ { 0xcbd, 0xcbd },
+ { 0xcdd, 0xcde },
+ { 0xce0, 0xce1 },
+ { 0xcf1, 0xcf2 },
+ { 0xd04, 0xd0c },
+ { 0xd0e, 0xd10 },
+ { 0xd12, 0xd3a },
+ { 0xd3d, 0xd3d },
+ { 0xd4e, 0xd4e },
+ { 0xd54, 0xd56 },
+ { 0xd5f, 0xd61 },
+ { 0xd7a, 0xd7f },
+ { 0xd85, 0xd96 },
+ { 0xd9a, 0xdb1 },
+ { 0xdb3, 0xdbb },
+ { 0xdbd, 0xdbd },
+ { 0xdc0, 0xdc6 },
+ { 0xe01, 0xe30 },
+ { 0xe32, 0xe33 },
+ { 0xe40, 0xe46 },
+ { 0xe81, 0xe82 },
+ { 0xe84, 0xe84 },
+ { 0xe86, 0xe8a },
+ { 0xe8c, 0xea3 },
+ { 0xea5, 0xea5 },
+ { 0xea7, 0xeb0 },
+ { 0xeb2, 0xeb3 },
+ { 0xebd, 0xebd },
+ { 0xec0, 0xec4 },
+ { 0xec6, 0xec6 },
+ { 0xedc, 0xedf },
+ { 0xf00, 0xf00 },
+ { 0xf40, 0xf47 },
+ { 0xf49, 0xf6c },
+ { 0xf88, 0xf8c },
+ { 0x1000, 0x102a },
+ { 0x103f, 0x103f },
+ { 0x1050, 0x1055 },
+ { 0x105a, 0x105d },
+ { 0x1061, 0x1061 },
+ { 0x1065, 0x1066 },
+ { 0x106e, 0x1070 },
+ { 0x1075, 0x1081 },
+ { 0x108e, 0x108e },
+ { 0x10a0, 0x10c5 },
+ { 0x10c7, 0x10c7 },
+ { 0x10cd, 0x10cd },
+ { 0x10d0, 0x10fa },
+ { 0x10fc, 0x1248 },
+ { 0x124a, 0x124d },
+ { 0x1250, 0x1256 },
+ { 0x1258, 0x1258 },
+ { 0x125a, 0x125d },
+ { 0x1260, 0x1288 },
+ { 0x128a, 0x128d },
+ { 0x1290, 0x12b0 },
+ { 0x12b2, 0x12b5 },
+ { 0x12b8, 0x12be },
+ { 0x12c0, 0x12c0 },
+ { 0x12c2, 0x12c5 },
+ { 0x12c8, 0x12d6 },
+ { 0x12d8, 0x1310 },
+ { 0x1312, 0x1315 },
+ { 0x1318, 0x135a },
+ { 0x1380, 0x138f },
+ { 0x13a0, 0x13f5 },
+ { 0x13f8, 0x13fd },
+ { 0x1401, 0x166c },
+ { 0x166f, 0x167f },
+ { 0x1681, 0x169a },
+ { 0x16a0, 0x16ea },
+ { 0x16ee, 0x16f8 },
+ { 0x1700, 0x1711 },
+ { 0x171f, 0x1731 },
+ { 0x1740, 0x1751 },
+ { 0x1760, 0x176c },
+ { 0x176e, 0x1770 },
+ { 0x1780, 0x17b3 },
+ { 0x17d7, 0x17d7 },
+ { 0x17dc, 0x17dc },
+ { 0x1820, 0x1878 },
+ { 0x1880, 0x1884 },
+ { 0x1887, 0x18a8 },
+ { 0x18aa, 0x18aa },
+ { 0x18b0, 0x18f5 },
+ { 0x1900, 0x191e },
+ { 0x1950, 0x196d },
+ { 0x1970, 0x1974 },
+ { 0x1980, 0x19ab },
+ { 0x19b0, 0x19c9 },
+ { 0x1a00, 0x1a16 },
+ { 0x1a20, 0x1a54 },
+ { 0x1aa7, 0x1aa7 },
+ { 0x1b05, 0x1b33 },
+ { 0x1b45, 0x1b4c },
+ { 0x1b83, 0x1ba0 },
+ { 0x1bae, 0x1baf },
+ { 0x1bba, 0x1be5 },
+ { 0x1c00, 0x1c23 },
+ { 0x1c4d, 0x1c4f },
+ { 0x1c5a, 0x1c7d },
+ { 0x1c80, 0x1c88 },
+ { 0x1c90, 0x1cba },
+ { 0x1cbd, 0x1cbf },
+ { 0x1ce9, 0x1cec },
+ { 0x1cee, 0x1cf3 },
+ { 0x1cf5, 0x1cf6 },
+ { 0x1cfa, 0x1cfa },
+ { 0x1d00, 0x1dbf },
+ { 0x1e00, 0x1f15 },
+ { 0x1f18, 0x1f1d },
+ { 0x1f20, 0x1f45 },
+ { 0x1f48, 0x1f4d },
+ { 0x1f50, 0x1f57 },
+ { 0x1f59, 0x1f59 },
+ { 0x1f5b, 0x1f5b },
+ { 0x1f5d, 0x1f5d },
+ { 0x1f5f, 0x1f7d },
+ { 0x1f80, 0x1fb4 },
+ { 0x1fb6, 0x1fbc },
+ { 0x1fbe, 0x1fbe },
+ { 0x1fc2, 0x1fc4 },
+ { 0x1fc6, 0x1fcc },
+ { 0x1fd0, 0x1fd3 },
+ { 0x1fd6, 0x1fdb },
+ { 0x1fe0, 0x1fec },
+ { 0x1ff2, 0x1ff4 },
+ { 0x1ff6, 0x1ffc },
+ { 0x2071, 0x2071 },
+ { 0x207f, 0x207f },
+ { 0x2090, 0x209c },
+ { 0x2102, 0x2102 },
+ { 0x2107, 0x2107 },
+ { 0x210a, 0x2113 },
+ { 0x2115, 0x2115 },
+ { 0x2118, 0x211d },
+ { 0x2124, 0x2124 },
+ { 0x2126, 0x2126 },
+ { 0x2128, 0x2128 },
+ { 0x212a, 0x2139 },
+ { 0x213c, 0x213f },
+ { 0x2145, 0x2149 },
+ { 0x214e, 0x214e },
+ { 0x2160, 0x2188 },
+ { 0x2c00, 0x2ce4 },
+ { 0x2ceb, 0x2cee },
+ { 0x2cf2, 0x2cf3 },
+ { 0x2d00, 0x2d25 },
+ { 0x2d27, 0x2d27 },
+ { 0x2d2d, 0x2d2d },
+ { 0x2d30, 0x2d67 },
+ { 0x2d6f, 0x2d6f },
+ { 0x2d80, 0x2d96 },
+ { 0x2da0, 0x2da6 },
+ { 0x2da8, 0x2dae },
+ { 0x2db0, 0x2db6 },
+ { 0x2db8, 0x2dbe },
+ { 0x2dc0, 0x2dc6 },
+ { 0x2dc8, 0x2dce },
+ { 0x2dd0, 0x2dd6 },
+ { 0x2dd8, 0x2dde },
+ { 0x3005, 0x3007 },
+ { 0x3021, 0x3029 },
+ { 0x3031, 0x3035 },
+ { 0x3038, 0x303c },
+ { 0x3041, 0x3096 },
+ { 0x309b, 0x309f },
+ { 0x30a1, 0x30fa },
+ { 0x30fc, 0x30ff },
+ { 0x3105, 0x312f },
+ { 0x3131, 0x318e },
+ { 0x31a0, 0x31bf },
+ { 0x31f0, 0x31ff },
+ { 0x3400, 0x4dbf },
+ { 0x4e00, 0xa48c },
+ { 0xa4d0, 0xa4fd },
+ { 0xa500, 0xa60c },
+ { 0xa610, 0xa61f },
+ { 0xa62a, 0xa62b },
+ { 0xa640, 0xa66e },
+ { 0xa67f, 0xa69d },
+ { 0xa6a0, 0xa6ef },
+ { 0xa717, 0xa71f },
+ { 0xa722, 0xa788 },
+ { 0xa78b, 0xa7ca },
+ { 0xa7d0, 0xa7d1 },
+ { 0xa7d3, 0xa7d3 },
+ { 0xa7d5, 0xa7d9 },
+ { 0xa7f2, 0xa801 },
+ { 0xa803, 0xa805 },
+ { 0xa807, 0xa80a },
+ { 0xa80c, 0xa822 },
+ { 0xa840, 0xa873 },
+ { 0xa882, 0xa8b3 },
+ { 0xa8f2, 0xa8f7 },
+ { 0xa8fb, 0xa8fb },
+ { 0xa8fd, 0xa8fe },
+ { 0xa90a, 0xa925 },
+ { 0xa930, 0xa946 },
+ { 0xa960, 0xa97c },
+ { 0xa984, 0xa9b2 },
+ { 0xa9cf, 0xa9cf },
+ { 0xa9e0, 0xa9e4 },
+ { 0xa9e6, 0xa9ef },
+ { 0xa9fa, 0xa9fe },
+ { 0xaa00, 0xaa28 },
+ { 0xaa40, 0xaa42 },
+ { 0xaa44, 0xaa4b },
+ { 0xaa60, 0xaa76 },
+ { 0xaa7a, 0xaa7a },
+ { 0xaa7e, 0xaaaf },
+ { 0xaab1, 0xaab1 },
+ { 0xaab5, 0xaab6 },
+ { 0xaab9, 0xaabd },
+ { 0xaac0, 0xaac0 },
+ { 0xaac2, 0xaac2 },
+ { 0xaadb, 0xaadd },
+ { 0xaae0, 0xaaea },
+ { 0xaaf2, 0xaaf4 },
+ { 0xab01, 0xab06 },
+ { 0xab09, 0xab0e },
+ { 0xab11, 0xab16 },
+ { 0xab20, 0xab26 },
+ { 0xab28, 0xab2e },
+ { 0xab30, 0xab5a },
+ { 0xab5c, 0xab69 },
+ { 0xab70, 0xabe2 },
+ { 0xac00, 0xd7a3 },
+ { 0xd7b0, 0xd7c6 },
+ { 0xd7cb, 0xd7fb },
+ { 0xf900, 0xfa6d },
+ { 0xfa70, 0xfad9 },
+ { 0xfb00, 0xfb06 },
+ { 0xfb13, 0xfb17 },
+ { 0xfb1d, 0xfb1d },
+ { 0xfb1f, 0xfb28 },
+ { 0xfb2a, 0xfb36 },
+ { 0xfb38, 0xfb3c },
+ { 0xfb3e, 0xfb3e },
+ { 0xfb40, 0xfb41 },
+ { 0xfb43, 0xfb44 },
+ { 0xfb46, 0xfbb1 },
+ { 0xfbd3, 0xfd3d },
+ { 0xfd50, 0xfd8f },
+ { 0xfd92, 0xfdc7 },
+ { 0xfdf0, 0xfdfb },
+ { 0xfe70, 0xfe74 },
+ { 0xfe76, 0xfefc },
+ { 0xff21, 0xff3a },
+ { 0xff41, 0xff5a },
+ { 0xff66, 0xffbe },
+ { 0xffc2, 0xffc7 },
+ { 0xffca, 0xffcf },
+ { 0xffd2, 0xffd7 },
+ { 0xffda, 0xffdc },
+ { 0x10000, 0x1000b },
+ { 0x1000d, 0x10026 },
+ { 0x10028, 0x1003a },
+ { 0x1003c, 0x1003d },
+ { 0x1003f, 0x1004d },
+ { 0x10050, 0x1005d },
+ { 0x10080, 0x100fa },
+ { 0x10140, 0x10174 },
+ { 0x10280, 0x1029c },
+ { 0x102a0, 0x102d0 },
+ { 0x10300, 0x1031f },
+ { 0x1032d, 0x1034a },
+ { 0x10350, 0x10375 },
+ { 0x10380, 0x1039d },
+ { 0x103a0, 0x103c3 },
+ { 0x103c8, 0x103cf },
+ { 0x103d1, 0x103d5 },
+ { 0x10400, 0x1049d },
+ { 0x104b0, 0x104d3 },
+ { 0x104d8, 0x104fb },
+ { 0x10500, 0x10527 },
+ { 0x10530, 0x10563 },
+ { 0x10570, 0x1057a },
+ { 0x1057c, 0x1058a },
+ { 0x1058c, 0x10592 },
+ { 0x10594, 0x10595 },
+ { 0x10597, 0x105a1 },
+ { 0x105a3, 0x105b1 },
+ { 0x105b3, 0x105b9 },
+ { 0x105bb, 0x105bc },
+ { 0x10600, 0x10736 },
+ { 0x10740, 0x10755 },
+ { 0x10760, 0x10767 },
+ { 0x10780, 0x10785 },
+ { 0x10787, 0x107b0 },
+ { 0x107b2, 0x107ba },
+ { 0x10800, 0x10805 },
+ { 0x10808, 0x10808 },
+ { 0x1080a, 0x10835 },
+ { 0x10837, 0x10838 },
+ { 0x1083c, 0x1083c },
+ { 0x1083f, 0x10855 },
+ { 0x10860, 0x10876 },
+ { 0x10880, 0x1089e },
+ { 0x108e0, 0x108f2 },
+ { 0x108f4, 0x108f5 },
+ { 0x10900, 0x10915 },
+ { 0x10920, 0x10939 },
+ { 0x10980, 0x109b7 },
+ { 0x109be, 0x109bf },
+ { 0x10a00, 0x10a00 },
+ { 0x10a10, 0x10a13 },
+ { 0x10a15, 0x10a17 },
+ { 0x10a19, 0x10a35 },
+ { 0x10a60, 0x10a7c },
+ { 0x10a80, 0x10a9c },
+ { 0x10ac0, 0x10ac7 },
+ { 0x10ac9, 0x10ae4 },
+ { 0x10b00, 0x10b35 },
+ { 0x10b40, 0x10b55 },
+ { 0x10b60, 0x10b72 },
+ { 0x10b80, 0x10b91 },
+ { 0x10c00, 0x10c48 },
+ { 0x10c80, 0x10cb2 },
+ { 0x10cc0, 0x10cf2 },
+ { 0x10d00, 0x10d23 },
+ { 0x10e80, 0x10ea9 },
+ { 0x10eb0, 0x10eb1 },
+ { 0x10f00, 0x10f1c },
+ { 0x10f27, 0x10f27 },
+ { 0x10f30, 0x10f45 },
+ { 0x10f70, 0x10f81 },
+ { 0x10fb0, 0x10fc4 },
+ { 0x10fe0, 0x10ff6 },
+ { 0x11003, 0x11037 },
+ { 0x11071, 0x11072 },
+ { 0x11075, 0x11075 },
+ { 0x11083, 0x110af },
+ { 0x110d0, 0x110e8 },
+ { 0x11103, 0x11126 },
+ { 0x11144, 0x11144 },
+ { 0x11147, 0x11147 },
+ { 0x11150, 0x11172 },
+ { 0x11176, 0x11176 },
+ { 0x11183, 0x111b2 },
+ { 0x111c1, 0x111c4 },
+ { 0x111da, 0x111da },
+ { 0x111dc, 0x111dc },
+ { 0x11200, 0x11211 },
+ { 0x11213, 0x1122b },
+ { 0x11280, 0x11286 },
+ { 0x11288, 0x11288 },
+ { 0x1128a, 0x1128d },
+ { 0x1128f, 0x1129d },
+ { 0x1129f, 0x112a8 },
+ { 0x112b0, 0x112de },
+ { 0x11305, 0x1130c },
+ { 0x1130f, 0x11310 },
+ { 0x11313, 0x11328 },
+ { 0x1132a, 0x11330 },
+ { 0x11332, 0x11333 },
+ { 0x11335, 0x11339 },
+ { 0x1133d, 0x1133d },
+ { 0x11350, 0x11350 },
+ { 0x1135d, 0x11361 },
+ { 0x11400, 0x11434 },
+ { 0x11447, 0x1144a },
+ { 0x1145f, 0x11461 },
+ { 0x11480, 0x114af },
+ { 0x114c4, 0x114c5 },
+ { 0x114c7, 0x114c7 },
+ { 0x11580, 0x115ae },
+ { 0x115d8, 0x115db },
+ { 0x11600, 0x1162f },
+ { 0x11644, 0x11644 },
+ { 0x11680, 0x116aa },
+ { 0x116b8, 0x116b8 },
+ { 0x11700, 0x1171a },
+ { 0x11740, 0x11746 },
+ { 0x11800, 0x1182b },
+ { 0x118a0, 0x118df },
+ { 0x118ff, 0x11906 },
+ { 0x11909, 0x11909 },
+ { 0x1190c, 0x11913 },
+ { 0x11915, 0x11916 },
+ { 0x11918, 0x1192f },
+ { 0x1193f, 0x1193f },
+ { 0x11941, 0x11941 },
+ { 0x119a0, 0x119a7 },
+ { 0x119aa, 0x119d0 },
+ { 0x119e1, 0x119e1 },
+ { 0x119e3, 0x119e3 },
+ { 0x11a00, 0x11a00 },
+ { 0x11a0b, 0x11a32 },
+ { 0x11a3a, 0x11a3a },
+ { 0x11a50, 0x11a50 },
+ { 0x11a5c, 0x11a89 },
+ { 0x11a9d, 0x11a9d },
+ { 0x11ab0, 0x11af8 },
+ { 0x11c00, 0x11c08 },
+ { 0x11c0a, 0x11c2e },
+ { 0x11c40, 0x11c40 },
+ { 0x11c72, 0x11c8f },
+ { 0x11d00, 0x11d06 },
+ { 0x11d08, 0x11d09 },
+ { 0x11d0b, 0x11d30 },
+ { 0x11d46, 0x11d46 },
+ { 0x11d60, 0x11d65 },
+ { 0x11d67, 0x11d68 },
+ { 0x11d6a, 0x11d89 },
+ { 0x11d98, 0x11d98 },
+ { 0x11ee0, 0x11ef2 },
+ { 0x11fb0, 0x11fb0 },
+ { 0x12000, 0x12399 },
+ { 0x12400, 0x1246e },
+ { 0x12480, 0x12543 },
+ { 0x12f90, 0x12ff0 },
+ { 0x13000, 0x1342e },
+ { 0x14400, 0x14646 },
+ { 0x16800, 0x16a38 },
+ { 0x16a40, 0x16a5e },
+ { 0x16a70, 0x16abe },
+ { 0x16ad0, 0x16aed },
+ { 0x16b00, 0x16b2f },
+ { 0x16b40, 0x16b43 },
+ { 0x16b63, 0x16b77 },
+ { 0x16b7d, 0x16b8f },
+ { 0x16e40, 0x16e7f },
+ { 0x16f00, 0x16f4a },
+ { 0x16f50, 0x16f50 },
+ { 0x16f93, 0x16f9f },
+ { 0x16fe0, 0x16fe1 },
+ { 0x16fe3, 0x16fe3 },
+ { 0x17000, 0x187f7 },
+ { 0x18800, 0x18cd5 },
+ { 0x18d00, 0x18d08 },
+ { 0x1aff0, 0x1aff3 },
+ { 0x1aff5, 0x1affb },
+ { 0x1affd, 0x1affe },
+ { 0x1b000, 0x1b122 },
+ { 0x1b150, 0x1b152 },
+ { 0x1b164, 0x1b167 },
+ { 0x1b170, 0x1b2fb },
+ { 0x1bc00, 0x1bc6a },
+ { 0x1bc70, 0x1bc7c },
+ { 0x1bc80, 0x1bc88 },
+ { 0x1bc90, 0x1bc99 },
+ { 0x1d400, 0x1d454 },
+ { 0x1d456, 0x1d49c },
+ { 0x1d49e, 0x1d49f },
+ { 0x1d4a2, 0x1d4a2 },
+ { 0x1d4a5, 0x1d4a6 },
+ { 0x1d4a9, 0x1d4ac },
+ { 0x1d4ae, 0x1d4b9 },
+ { 0x1d4bb, 0x1d4bb },
+ { 0x1d4bd, 0x1d4c3 },
+ { 0x1d4c5, 0x1d505 },
+ { 0x1d507, 0x1d50a },
+ { 0x1d50d, 0x1d514 },
+ { 0x1d516, 0x1d51c },
+ { 0x1d51e, 0x1d539 },
+ { 0x1d53b, 0x1d53e },
+ { 0x1d540, 0x1d544 },
+ { 0x1d546, 0x1d546 },
+ { 0x1d54a, 0x1d550 },
+ { 0x1d552, 0x1d6a5 },
+ { 0x1d6a8, 0x1d6c0 },
+ { 0x1d6c2, 0x1d6da },
+ { 0x1d6dc, 0x1d6fa },
+ { 0x1d6fc, 0x1d714 },
+ { 0x1d716, 0x1d734 },
+ { 0x1d736, 0x1d74e },
+ { 0x1d750, 0x1d76e },
+ { 0x1d770, 0x1d788 },
+ { 0x1d78a, 0x1d7a8 },
+ { 0x1d7aa, 0x1d7c2 },
+ { 0x1d7c4, 0x1d7cb },
+ { 0x1df00, 0x1df1e },
+ { 0x1e100, 0x1e12c },
+ { 0x1e137, 0x1e13d },
+ { 0x1e14e, 0x1e14e },
+ { 0x1e290, 0x1e2ad },
+ { 0x1e2c0, 0x1e2eb },
+ { 0x1e7e0, 0x1e7e6 },
+ { 0x1e7e8, 0x1e7eb },
+ { 0x1e7ed, 0x1e7ee },
+ { 0x1e7f0, 0x1e7fe },
+ { 0x1e800, 0x1e8c4 },
+ { 0x1e900, 0x1e943 },
+ { 0x1e94b, 0x1e94b },
+ { 0x1ee00, 0x1ee03 },
+ { 0x1ee05, 0x1ee1f },
+ { 0x1ee21, 0x1ee22 },
+ { 0x1ee24, 0x1ee24 },
+ { 0x1ee27, 0x1ee27 },
+ { 0x1ee29, 0x1ee32 },
+ { 0x1ee34, 0x1ee37 },
+ { 0x1ee39, 0x1ee39 },
+ { 0x1ee3b, 0x1ee3b },
+ { 0x1ee42, 0x1ee42 },
+ { 0x1ee47, 0x1ee47 },
+ { 0x1ee49, 0x1ee49 },
+ { 0x1ee4b, 0x1ee4b },
+ { 0x1ee4d, 0x1ee4f },
+ { 0x1ee51, 0x1ee52 },
+ { 0x1ee54, 0x1ee54 },
+ { 0x1ee57, 0x1ee57 },
+ { 0x1ee59, 0x1ee59 },
+ { 0x1ee5b, 0x1ee5b },
+ { 0x1ee5d, 0x1ee5d },
+ { 0x1ee5f, 0x1ee5f },
+ { 0x1ee61, 0x1ee62 },
+ { 0x1ee64, 0x1ee64 },
+ { 0x1ee67, 0x1ee6a },
+ { 0x1ee6c, 0x1ee72 },
+ { 0x1ee74, 0x1ee77 },
+ { 0x1ee79, 0x1ee7c },
+ { 0x1ee7e, 0x1ee7e },
+ { 0x1ee80, 0x1ee89 },
+ { 0x1ee8b, 0x1ee9b },
+ { 0x1eea1, 0x1eea3 },
+ { 0x1eea5, 0x1eea9 },
+ { 0x1eeab, 0x1eebb },
+ { 0x20000, 0x2a6df },
+ { 0x2a700, 0x2b738 },
+ { 0x2b740, 0x2b81d },
+ { 0x2b820, 0x2cea1 },
+ { 0x2ceb0, 0x2ebe0 },
+ { 0x2f800, 0x2fa1d },
+ { 0x30000, 0x3134a },
+ { 0x0, 0x0 },
+};
+
+static CharRange xid_continue[] = {
+ { 0x30, 0x39 },
+ { 0x41, 0x5a },
+ { 0x5f, 0x5f },
+ { 0x61, 0x7a },
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xb7, 0xb7 },
+ { 0xba, 0xba },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+ { 0xf8, 0x2c1 },
+ { 0x2c6, 0x2d1 },
+ { 0x2e0, 0x2e4 },
+ { 0x2ec, 0x2ec },
+ { 0x2ee, 0x2ee },
+ { 0x300, 0x374 },
+ { 0x376, 0x377 },
+ { 0x37a, 0x37d },
+ { 0x37f, 0x37f },
+ { 0x386, 0x38a },
+ { 0x38c, 0x38c },
+ { 0x38e, 0x3a1 },
+ { 0x3a3, 0x3f5 },
+ { 0x3f7, 0x481 },
+ { 0x483, 0x487 },
+ { 0x48a, 0x52f },
+ { 0x531, 0x556 },
+ { 0x559, 0x559 },
+ { 0x560, 0x588 },
+ { 0x591, 0x5bd },
+ { 0x5bf, 0x5bf },
+ { 0x5c1, 0x5c2 },
+ { 0x5c4, 0x5c5 },
+ { 0x5c7, 0x5c7 },
+ { 0x5d0, 0x5ea },
+ { 0x5ef, 0x5f2 },
+ { 0x610, 0x61a },
+ { 0x620, 0x669 },
+ { 0x66e, 0x6d3 },
+ { 0x6d5, 0x6dc },
+ { 0x6df, 0x6e8 },
+ { 0x6ea, 0x6fc },
+ { 0x6ff, 0x6ff },
+ { 0x710, 0x74a },
+ { 0x74d, 0x7b1 },
+ { 0x7c0, 0x7f5 },
+ { 0x7fa, 0x7fa },
+ { 0x7fd, 0x7fd },
+ { 0x800, 0x82d },
+ { 0x840, 0x85b },
+ { 0x860, 0x86a },
+ { 0x870, 0x887 },
+ { 0x889, 0x88e },
+ { 0x898, 0x8e1 },
+ { 0x8e3, 0x963 },
+ { 0x966, 0x96f },
+ { 0x971, 0x983 },
+ { 0x985, 0x98c },
+ { 0x98f, 0x990 },
+ { 0x993, 0x9a8 },
+ { 0x9aa, 0x9b0 },
+ { 0x9b2, 0x9b2 },
+ { 0x9b6, 0x9b9 },
+ { 0x9bc, 0x9c4 },
+ { 0x9c7, 0x9c8 },
+ { 0x9cb, 0x9ce },
+ { 0x9d7, 0x9d7 },
+ { 0x9dc, 0x9dd },
+ { 0x9df, 0x9e3 },
+ { 0x9e6, 0x9f1 },
+ { 0x9fc, 0x9fc },
+ { 0x9fe, 0x9fe },
+ { 0xa01, 0xa03 },
+ { 0xa05, 0xa0a },
+ { 0xa0f, 0xa10 },
+ { 0xa13, 0xa28 },
+ { 0xa2a, 0xa30 },
+ { 0xa32, 0xa33 },
+ { 0xa35, 0xa36 },
+ { 0xa38, 0xa39 },
+ { 0xa3c, 0xa3c },
+ { 0xa3e, 0xa42 },
+ { 0xa47, 0xa48 },
+ { 0xa4b, 0xa4d },
+ { 0xa51, 0xa51 },
+ { 0xa59, 0xa5c },
+ { 0xa5e, 0xa5e },
+ { 0xa66, 0xa75 },
+ { 0xa81, 0xa83 },
+ { 0xa85, 0xa8d },
+ { 0xa8f, 0xa91 },
+ { 0xa93, 0xaa8 },
+ { 0xaaa, 0xab0 },
+ { 0xab2, 0xab3 },
+ { 0xab5, 0xab9 },
+ { 0xabc, 0xac5 },
+ { 0xac7, 0xac9 },
+ { 0xacb, 0xacd },
+ { 0xad0, 0xad0 },
+ { 0xae0, 0xae3 },
+ { 0xae6, 0xaef },
+ { 0xaf9, 0xaff },
+ { 0xb01, 0xb03 },
+ { 0xb05, 0xb0c },
+ { 0xb0f, 0xb10 },
+ { 0xb13, 0xb28 },
+ { 0xb2a, 0xb30 },
+ { 0xb32, 0xb33 },
+ { 0xb35, 0xb39 },
+ { 0xb3c, 0xb44 },
+ { 0xb47, 0xb48 },
+ { 0xb4b, 0xb4d },
+ { 0xb55, 0xb57 },
+ { 0xb5c, 0xb5d },
+ { 0xb5f, 0xb63 },
+ { 0xb66, 0xb6f },
+ { 0xb71, 0xb71 },
+ { 0xb82, 0xb83 },
+ { 0xb85, 0xb8a },
+ { 0xb8e, 0xb90 },
+ { 0xb92, 0xb95 },
+ { 0xb99, 0xb9a },
+ { 0xb9c, 0xb9c },
+ { 0xb9e, 0xb9f },
+ { 0xba3, 0xba4 },
+ { 0xba8, 0xbaa },
+ { 0xbae, 0xbb9 },
+ { 0xbbe, 0xbc2 },
+ { 0xbc6, 0xbc8 },
+ { 0xbca, 0xbcd },
+ { 0xbd0, 0xbd0 },
+ { 0xbd7, 0xbd7 },
+ { 0xbe6, 0xbef },
+ { 0xc00, 0xc0c },
+ { 0xc0e, 0xc10 },
+ { 0xc12, 0xc28 },
+ { 0xc2a, 0xc39 },
+ { 0xc3c, 0xc44 },
+ { 0xc46, 0xc48 },
+ { 0xc4a, 0xc4d },
+ { 0xc55, 0xc56 },
+ { 0xc58, 0xc5a },
+ { 0xc5d, 0xc5d },
+ { 0xc60, 0xc63 },
+ { 0xc66, 0xc6f },
+ { 0xc80, 0xc83 },
+ { 0xc85, 0xc8c },
+ { 0xc8e, 0xc90 },
+ { 0xc92, 0xca8 },
+ { 0xcaa, 0xcb3 },
+ { 0xcb5, 0xcb9 },
+ { 0xcbc, 0xcc4 },
+ { 0xcc6, 0xcc8 },
+ { 0xcca, 0xccd },
+ { 0xcd5, 0xcd6 },
+ { 0xcdd, 0xcde },
+ { 0xce0, 0xce3 },
+ { 0xce6, 0xcef },
+ { 0xcf1, 0xcf2 },
+ { 0xd00, 0xd0c },
+ { 0xd0e, 0xd10 },
+ { 0xd12, 0xd44 },
+ { 0xd46, 0xd48 },
+ { 0xd4a, 0xd4e },
+ { 0xd54, 0xd57 },
+ { 0xd5f, 0xd63 },
+ { 0xd66, 0xd6f },
+ { 0xd7a, 0xd7f },
+ { 0xd81, 0xd83 },
+ { 0xd85, 0xd96 },
+ { 0xd9a, 0xdb1 },
+ { 0xdb3, 0xdbb },
+ { 0xdbd, 0xdbd },
+ { 0xdc0, 0xdc6 },
+ { 0xdca, 0xdca },
+ { 0xdcf, 0xdd4 },
+ { 0xdd6, 0xdd6 },
+ { 0xdd8, 0xddf },
+ { 0xde6, 0xdef },
+ { 0xdf2, 0xdf3 },
+ { 0xe01, 0xe3a },
+ { 0xe40, 0xe4e },
+ { 0xe50, 0xe59 },
+ { 0xe81, 0xe82 },
+ { 0xe84, 0xe84 },
+ { 0xe86, 0xe8a },
+ { 0xe8c, 0xea3 },
+ { 0xea5, 0xea5 },
+ { 0xea7, 0xebd },
+ { 0xec0, 0xec4 },
+ { 0xec6, 0xec6 },
+ { 0xec8, 0xecd },
+ { 0xed0, 0xed9 },
+ { 0xedc, 0xedf },
+ { 0xf00, 0xf00 },
+ { 0xf18, 0xf19 },
+ { 0xf20, 0xf29 },
+ { 0xf35, 0xf35 },
+ { 0xf37, 0xf37 },
+ { 0xf39, 0xf39 },
+ { 0xf3e, 0xf47 },
+ { 0xf49, 0xf6c },
+ { 0xf71, 0xf84 },
+ { 0xf86, 0xf97 },
+ { 0xf99, 0xfbc },
+ { 0xfc6, 0xfc6 },
+ { 0x1000, 0x1049 },
+ { 0x1050, 0x109d },
+ { 0x10a0, 0x10c5 },
+ { 0x10c7, 0x10c7 },
+ { 0x10cd, 0x10cd },
+ { 0x10d0, 0x10fa },
+ { 0x10fc, 0x1248 },
+ { 0x124a, 0x124d },
+ { 0x1250, 0x1256 },
+ { 0x1258, 0x1258 },
+ { 0x125a, 0x125d },
+ { 0x1260, 0x1288 },
+ { 0x128a, 0x128d },
+ { 0x1290, 0x12b0 },
+ { 0x12b2, 0x12b5 },
+ { 0x12b8, 0x12be },
+ { 0x12c0, 0x12c0 },
+ { 0x12c2, 0x12c5 },
+ { 0x12c8, 0x12d6 },
+ { 0x12d8, 0x1310 },
+ { 0x1312, 0x1315 },
+ { 0x1318, 0x135a },
+ { 0x135d, 0x135f },
+ { 0x1369, 0x1369 },
+ { 0x1371, 0x1371 },
+ { 0x1380, 0x138f },
+ { 0x13a0, 0x13f5 },
+ { 0x13f8, 0x13fd },
+ { 0x1401, 0x166c },
+ { 0x166f, 0x167f },
+ { 0x1681, 0x169a },
+ { 0x16a0, 0x16ea },
+ { 0x16ee, 0x16f8 },
+ { 0x1700, 0x1715 },
+ { 0x171f, 0x1734 },
+ { 0x1740, 0x1753 },
+ { 0x1760, 0x176c },
+ { 0x176e, 0x1770 },
+ { 0x1772, 0x1773 },
+ { 0x1780, 0x17d3 },
+ { 0x17d7, 0x17d7 },
+ { 0x17dc, 0x17dd },
+ { 0x17e0, 0x17e9 },
+ { 0x180b, 0x180d },
+ { 0x180f, 0x1819 },
+ { 0x1820, 0x1878 },
+ { 0x1880, 0x18aa },
+ { 0x18b0, 0x18f5 },
+ { 0x1900, 0x191e },
+ { 0x1920, 0x192b },
+ { 0x1930, 0x193b },
+ { 0x1946, 0x196d },
+ { 0x1970, 0x1974 },
+ { 0x1980, 0x19ab },
+ { 0x19b0, 0x19c9 },
+ { 0x19d0, 0x19da },
+ { 0x1a00, 0x1a1b },
+ { 0x1a20, 0x1a5e },
+ { 0x1a60, 0x1a7c },
+ { 0x1a7f, 0x1a89 },
+ { 0x1a90, 0x1a99 },
+ { 0x1aa7, 0x1aa7 },
+ { 0x1ab0, 0x1abd },
+ { 0x1abf, 0x1ace },
+ { 0x1b00, 0x1b4c },
+ { 0x1b50, 0x1b59 },
+ { 0x1b6b, 0x1b73 },
+ { 0x1b80, 0x1bf3 },
+ { 0x1c00, 0x1c37 },
+ { 0x1c40, 0x1c49 },
+ { 0x1c4d, 0x1c7d },
+ { 0x1c80, 0x1c88 },
+ { 0x1c90, 0x1cba },
+ { 0x1cbd, 0x1cbf },
+ { 0x1cd0, 0x1cd2 },
+ { 0x1cd4, 0x1cfa },
+ { 0x1d00, 0x1f15 },
+ { 0x1f18, 0x1f1d },
+ { 0x1f20, 0x1f45 },
+ { 0x1f48, 0x1f4d },
+ { 0x1f50, 0x1f57 },
+ { 0x1f59, 0x1f59 },
+ { 0x1f5b, 0x1f5b },
+ { 0x1f5d, 0x1f5d },
+ { 0x1f5f, 0x1f7d },
+ { 0x1f80, 0x1fb4 },
+ { 0x1fb6, 0x1fbc },
+ { 0x1fbe, 0x1fbe },
+ { 0x1fc2, 0x1fc4 },
+ { 0x1fc6, 0x1fcc },
+ { 0x1fd0, 0x1fd3 },
+ { 0x1fd6, 0x1fdb },
+ { 0x1fe0, 0x1fec },
+ { 0x1ff2, 0x1ff4 },
+ { 0x1ff6, 0x1ffc },
+ { 0x203f, 0x2040 },
+ { 0x2054, 0x2054 },
+ { 0x2071, 0x2071 },
+ { 0x207f, 0x207f },
+ { 0x2090, 0x209c },
+ { 0x20d0, 0x20dc },
+ { 0x20e1, 0x20e1 },
+ { 0x20e5, 0x20f0 },
+ { 0x2102, 0x2102 },
+ { 0x2107, 0x2107 },
+ { 0x210a, 0x2113 },
+ { 0x2115, 0x2115 },
+ { 0x2118, 0x211d },
+ { 0x2124, 0x2124 },
+ { 0x2126, 0x2126 },
+ { 0x2128, 0x2128 },
+ { 0x212a, 0x2139 },
+ { 0x213c, 0x213f },
+ { 0x2145, 0x2149 },
+ { 0x214e, 0x214e },
+ { 0x2160, 0x2188 },
+ { 0x2c00, 0x2ce4 },
+ { 0x2ceb, 0x2cf3 },
+ { 0x2d00, 0x2d25 },
+ { 0x2d27, 0x2d27 },
+ { 0x2d2d, 0x2d2d },
+ { 0x2d30, 0x2d67 },
+ { 0x2d6f, 0x2d6f },
+ { 0x2d7f, 0x2d96 },
+ { 0x2da0, 0x2da6 },
+ { 0x2da8, 0x2dae },
+ { 0x2db0, 0x2db6 },
+ { 0x2db8, 0x2dbe },
+ { 0x2dc0, 0x2dc6 },
+ { 0x2dc8, 0x2dce },
+ { 0x2dd0, 0x2dd6 },
+ { 0x2dd8, 0x2dde },
+ { 0x2de0, 0x2dff },
+ { 0x3005, 0x3007 },
+ { 0x3021, 0x302f },
+ { 0x3031, 0x3035 },
+ { 0x3038, 0x303c },
+ { 0x3041, 0x3096 },
+ { 0x3099, 0x309f },
+ { 0x30a1, 0x30fa },
+ { 0x30fc, 0x30ff },
+ { 0x3105, 0x312f },
+ { 0x3131, 0x318e },
+ { 0x31a0, 0x31bf },
+ { 0x31f0, 0x31ff },
+ { 0x3400, 0x4dbf },
+ { 0x4e00, 0xa48c },
+ { 0xa4d0, 0xa4fd },
+ { 0xa500, 0xa60c },
+ { 0xa610, 0xa62b },
+ { 0xa640, 0xa66f },
+ { 0xa674, 0xa67d },
+ { 0xa67f, 0xa6f1 },
+ { 0xa717, 0xa71f },
+ { 0xa722, 0xa788 },
+ { 0xa78b, 0xa7ca },
+ { 0xa7d0, 0xa7d1 },
+ { 0xa7d3, 0xa7d3 },
+ { 0xa7d5, 0xa7d9 },
+ { 0xa7f2, 0xa827 },
+ { 0xa82c, 0xa82c },
+ { 0xa840, 0xa873 },
+ { 0xa880, 0xa8c5 },
+ { 0xa8d0, 0xa8d9 },
+ { 0xa8e0, 0xa8f7 },
+ { 0xa8fb, 0xa8fb },
+ { 0xa8fd, 0xa92d },
+ { 0xa930, 0xa953 },
+ { 0xa960, 0xa97c },
+ { 0xa980, 0xa9c0 },
+ { 0xa9cf, 0xa9d9 },
+ { 0xa9e0, 0xa9fe },
+ { 0xaa00, 0xaa36 },
+ { 0xaa40, 0xaa4d },
+ { 0xaa50, 0xaa59 },
+ { 0xaa60, 0xaa76 },
+ { 0xaa7a, 0xaac2 },
+ { 0xaadb, 0xaadd },
+ { 0xaae0, 0xaaef },
+ { 0xaaf2, 0xaaf6 },
+ { 0xab01, 0xab06 },
+ { 0xab09, 0xab0e },
+ { 0xab11, 0xab16 },
+ { 0xab20, 0xab26 },
+ { 0xab28, 0xab2e },
+ { 0xab30, 0xab5a },
+ { 0xab5c, 0xab69 },
+ { 0xab70, 0xabea },
+ { 0xabec, 0xabed },
+ { 0xabf0, 0xabf9 },
+ { 0xac00, 0xd7a3 },
+ { 0xd7b0, 0xd7c6 },
+ { 0xd7cb, 0xd7fb },
+ { 0xf900, 0xfa6d },
+ { 0xfa70, 0xfad9 },
+ { 0xfb00, 0xfb06 },
+ { 0xfb13, 0xfb17 },
+ { 0xfb1d, 0xfb28 },
+ { 0xfb2a, 0xfb36 },
+ { 0xfb38, 0xfb3c },
+ { 0xfb3e, 0xfb3e },
+ { 0xfb40, 0xfb41 },
+ { 0xfb43, 0xfb44 },
+ { 0xfb46, 0xfbb1 },
+ { 0xfbd3, 0xfd3d },
+ { 0xfd50, 0xfd8f },
+ { 0xfd92, 0xfdc7 },
+ { 0xfdf0, 0xfdfb },
+ { 0xfe00, 0xfe0f },
+ { 0xfe20, 0xfe2f },
+ { 0xfe33, 0xfe34 },
+ { 0xfe4d, 0xfe4f },
+ { 0xfe70, 0xfe74 },
+ { 0xfe76, 0xfefc },
+ { 0xff10, 0xff19 },
+ { 0xff21, 0xff3a },
+ { 0xff3f, 0xff3f },
+ { 0xff41, 0xff5a },
+ { 0xff66, 0xffbe },
+ { 0xffc2, 0xffc7 },
+ { 0xffca, 0xffcf },
+ { 0xffd2, 0xffd7 },
+ { 0xffda, 0xffdc },
+ { 0x10000, 0x1000b },
+ { 0x1000d, 0x10026 },
+ { 0x10028, 0x1003a },
+ { 0x1003c, 0x1003d },
+ { 0x1003f, 0x1004d },
+ { 0x10050, 0x1005d },
+ { 0x10080, 0x100fa },
+ { 0x10140, 0x10174 },
+ { 0x101fd, 0x101fd },
+ { 0x10280, 0x1029c },
+ { 0x102a0, 0x102d0 },
+ { 0x102e0, 0x102e0 },
+ { 0x10300, 0x1031f },
+ { 0x1032d, 0x1034a },
+ { 0x10350, 0x1037a },
+ { 0x10380, 0x1039d },
+ { 0x103a0, 0x103c3 },
+ { 0x103c8, 0x103cf },
+ { 0x103d1, 0x103d5 },
+ { 0x10400, 0x1049d },
+ { 0x104a0, 0x104a9 },
+ { 0x104b0, 0x104d3 },
+ { 0x104d8, 0x104fb },
+ { 0x10500, 0x10527 },
+ { 0x10530, 0x10563 },
+ { 0x10570, 0x1057a },
+ { 0x1057c, 0x1058a },
+ { 0x1058c, 0x10592 },
+ { 0x10594, 0x10595 },
+ { 0x10597, 0x105a1 },
+ { 0x105a3, 0x105b1 },
+ { 0x105b3, 0x105b9 },
+ { 0x105bb, 0x105bc },
+ { 0x10600, 0x10736 },
+ { 0x10740, 0x10755 },
+ { 0x10760, 0x10767 },
+ { 0x10780, 0x10785 },
+ { 0x10787, 0x107b0 },
+ { 0x107b2, 0x107ba },
+ { 0x10800, 0x10805 },
+ { 0x10808, 0x10808 },
+ { 0x1080a, 0x10835 },
+ { 0x10837, 0x10838 },
+ { 0x1083c, 0x1083c },
+ { 0x1083f, 0x10855 },
+ { 0x10860, 0x10876 },
+ { 0x10880, 0x1089e },
+ { 0x108e0, 0x108f2 },
+ { 0x108f4, 0x108f5 },
+ { 0x10900, 0x10915 },
+ { 0x10920, 0x10939 },
+ { 0x10980, 0x109b7 },
+ { 0x109be, 0x109bf },
+ { 0x10a00, 0x10a03 },
+ { 0x10a05, 0x10a06 },
+ { 0x10a0c, 0x10a13 },
+ { 0x10a15, 0x10a17 },
+ { 0x10a19, 0x10a35 },
+ { 0x10a38, 0x10a3a },
+ { 0x10a3f, 0x10a3f },
+ { 0x10a60, 0x10a7c },
+ { 0x10a80, 0x10a9c },
+ { 0x10ac0, 0x10ac7 },
+ { 0x10ac9, 0x10ae6 },
+ { 0x10b00, 0x10b35 },
+ { 0x10b40, 0x10b55 },
+ { 0x10b60, 0x10b72 },
+ { 0x10b80, 0x10b91 },
+ { 0x10c00, 0x10c48 },
+ { 0x10c80, 0x10cb2 },
+ { 0x10cc0, 0x10cf2 },
+ { 0x10d00, 0x10d27 },
+ { 0x10d30, 0x10d39 },
+ { 0x10e80, 0x10ea9 },
+ { 0x10eab, 0x10eac },
+ { 0x10eb0, 0x10eb1 },
+ { 0x10f00, 0x10f1c },
+ { 0x10f27, 0x10f27 },
+ { 0x10f30, 0x10f50 },
+ { 0x10f70, 0x10f85 },
+ { 0x10fb0, 0x10fc4 },
+ { 0x10fe0, 0x10ff6 },
+ { 0x11000, 0x11046 },
+ { 0x11066, 0x11075 },
+ { 0x1107f, 0x110ba },
+ { 0x110c2, 0x110c2 },
+ { 0x110d0, 0x110e8 },
+ { 0x110f0, 0x110f9 },
+ { 0x11100, 0x11134 },
+ { 0x11136, 0x1113f },
+ { 0x11144, 0x11147 },
+ { 0x11150, 0x11173 },
+ { 0x11176, 0x11176 },
+ { 0x11180, 0x111c4 },
+ { 0x111c9, 0x111cc },
+ { 0x111ce, 0x111da },
+ { 0x111dc, 0x111dc },
+ { 0x11200, 0x11211 },
+ { 0x11213, 0x11237 },
+ { 0x1123e, 0x1123e },
+ { 0x11280, 0x11286 },
+ { 0x11288, 0x11288 },
+ { 0x1128a, 0x1128d },
+ { 0x1128f, 0x1129d },
+ { 0x1129f, 0x112a8 },
+ { 0x112b0, 0x112ea },
+ { 0x112f0, 0x112f9 },
+ { 0x11300, 0x11303 },
+ { 0x11305, 0x1130c },
+ { 0x1130f, 0x11310 },
+ { 0x11313, 0x11328 },
+ { 0x1132a, 0x11330 },
+ { 0x11332, 0x11333 },
+ { 0x11335, 0x11339 },
+ { 0x1133b, 0x11344 },
+ { 0x11347, 0x11348 },
+ { 0x1134b, 0x1134d },
+ { 0x11350, 0x11350 },
+ { 0x11357, 0x11357 },
+ { 0x1135d, 0x11363 },
+ { 0x11366, 0x1136c },
+ { 0x11370, 0x11374 },
+ { 0x11400, 0x1144a },
+ { 0x11450, 0x11459 },
+ { 0x1145e, 0x11461 },
+ { 0x11480, 0x114c5 },
+ { 0x114c7, 0x114c7 },
+ { 0x114d0, 0x114d9 },
+ { 0x11580, 0x115b5 },
+ { 0x115b8, 0x115c0 },
+ { 0x115d8, 0x115dd },
+ { 0x11600, 0x11640 },
+ { 0x11644, 0x11644 },
+ { 0x11650, 0x11659 },
+ { 0x11680, 0x116b8 },
+ { 0x116c0, 0x116c9 },
+ { 0x11700, 0x1171a },
+ { 0x1171d, 0x1172b },
+ { 0x11730, 0x11739 },
+ { 0x11740, 0x11746 },
+ { 0x11800, 0x1183a },
+ { 0x118a0, 0x118e9 },
+ { 0x118ff, 0x11906 },
+ { 0x11909, 0x11909 },
+ { 0x1190c, 0x11913 },
+ { 0x11915, 0x11916 },
+ { 0x11918, 0x11935 },
+ { 0x11937, 0x11938 },
+ { 0x1193b, 0x11943 },
+ { 0x11950, 0x11959 },
+ { 0x119a0, 0x119a7 },
+ { 0x119aa, 0x119d7 },
+ { 0x119da, 0x119e1 },
+ { 0x119e3, 0x119e4 },
+ { 0x11a00, 0x11a3e },
+ { 0x11a47, 0x11a47 },
+ { 0x11a50, 0x11a99 },
+ { 0x11a9d, 0x11a9d },
+ { 0x11ab0, 0x11af8 },
+ { 0x11c00, 0x11c08 },
+ { 0x11c0a, 0x11c36 },
+ { 0x11c38, 0x11c40 },
+ { 0x11c50, 0x11c59 },
+ { 0x11c72, 0x11c8f },
+ { 0x11c92, 0x11ca7 },
+ { 0x11ca9, 0x11cb6 },
+ { 0x11d00, 0x11d06 },
+ { 0x11d08, 0x11d09 },
+ { 0x11d0b, 0x11d36 },
+ { 0x11d3a, 0x11d3a },
+ { 0x11d3c, 0x11d3d },
+ { 0x11d3f, 0x11d47 },
+ { 0x11d50, 0x11d59 },
+ { 0x11d60, 0x11d65 },
+ { 0x11d67, 0x11d68 },
+ { 0x11d6a, 0x11d8e },
+ { 0x11d90, 0x11d91 },
+ { 0x11d93, 0x11d98 },
+ { 0x11da0, 0x11da9 },
+ { 0x11ee0, 0x11ef6 },
+ { 0x11fb0, 0x11fb0 },
+ { 0x12000, 0x12399 },
+ { 0x12400, 0x1246e },
+ { 0x12480, 0x12543 },
+ { 0x12f90, 0x12ff0 },
+ { 0x13000, 0x1342e },
+ { 0x14400, 0x14646 },
+ { 0x16800, 0x16a38 },
+ { 0x16a40, 0x16a5e },
+ { 0x16a60, 0x16a69 },
+ { 0x16a70, 0x16abe },
+ { 0x16ac0, 0x16ac9 },
+ { 0x16ad0, 0x16aed },
+ { 0x16af0, 0x16af4 },
+ { 0x16b00, 0x16b36 },
+ { 0x16b40, 0x16b43 },
+ { 0x16b50, 0x16b59 },
+ { 0x16b63, 0x16b77 },
+ { 0x16b7d, 0x16b8f },
+ { 0x16e40, 0x16e7f },
+ { 0x16f00, 0x16f4a },
+ { 0x16f4f, 0x16f87 },
+ { 0x16f8f, 0x16f9f },
+ { 0x16fe0, 0x16fe1 },
+ { 0x16fe3, 0x16fe4 },
+ { 0x16ff0, 0x16ff1 },
+ { 0x17000, 0x187f7 },
+ { 0x18800, 0x18cd5 },
+ { 0x18d00, 0x18d08 },
+ { 0x1aff0, 0x1aff3 },
+ { 0x1aff5, 0x1affb },
+ { 0x1affd, 0x1affe },
+ { 0x1b000, 0x1b122 },
+ { 0x1b150, 0x1b152 },
+ { 0x1b164, 0x1b167 },
+ { 0x1b170, 0x1b2fb },
+ { 0x1bc00, 0x1bc6a },
+ { 0x1bc70, 0x1bc7c },
+ { 0x1bc80, 0x1bc88 },
+ { 0x1bc90, 0x1bc99 },
+ { 0x1bc9d, 0x1bc9e },
+ { 0x1cf00, 0x1cf2d },
+ { 0x1cf30, 0x1cf46 },
+ { 0x1d165, 0x1d169 },
+ { 0x1d16d, 0x1d172 },
+ { 0x1d17b, 0x1d182 },
+ { 0x1d185, 0x1d18b },
+ { 0x1d1aa, 0x1d1ad },
+ { 0x1d242, 0x1d244 },
+ { 0x1d400, 0x1d454 },
+ { 0x1d456, 0x1d49c },
+ { 0x1d49e, 0x1d49f },
+ { 0x1d4a2, 0x1d4a2 },
+ { 0x1d4a5, 0x1d4a6 },
+ { 0x1d4a9, 0x1d4ac },
+ { 0x1d4ae, 0x1d4b9 },
+ { 0x1d4bb, 0x1d4bb },
+ { 0x1d4bd, 0x1d4c3 },
+ { 0x1d4c5, 0x1d505 },
+ { 0x1d507, 0x1d50a },
+ { 0x1d50d, 0x1d514 },
+ { 0x1d516, 0x1d51c },
+ { 0x1d51e, 0x1d539 },
+ { 0x1d53b, 0x1d53e },
+ { 0x1d540, 0x1d544 },
+ { 0x1d546, 0x1d546 },
+ { 0x1d54a, 0x1d550 },
+ { 0x1d552, 0x1d6a5 },
+ { 0x1d6a8, 0x1d6c0 },
+ { 0x1d6c2, 0x1d6da },
+ { 0x1d6dc, 0x1d6fa },
+ { 0x1d6fc, 0x1d714 },
+ { 0x1d716, 0x1d734 },
+ { 0x1d736, 0x1d74e },
+ { 0x1d750, 0x1d76e },
+ { 0x1d770, 0x1d788 },
+ { 0x1d78a, 0x1d7a8 },
+ { 0x1d7aa, 0x1d7c2 },
+ { 0x1d7c4, 0x1d7cb },
+ { 0x1d7ce, 0x1d7ff },
+ { 0x1da00, 0x1da36 },
+ { 0x1da3b, 0x1da6c },
+ { 0x1da75, 0x1da75 },
+ { 0x1da84, 0x1da84 },
+ { 0x1da9b, 0x1da9f },
+ { 0x1daa1, 0x1daaf },
+ { 0x1df00, 0x1df1e },
+ { 0x1e000, 0x1e006 },
+ { 0x1e008, 0x1e018 },
+ { 0x1e01b, 0x1e021 },
+ { 0x1e023, 0x1e024 },
+ { 0x1e026, 0x1e02a },
+ { 0x1e100, 0x1e12c },
+ { 0x1e130, 0x1e13d },
+ { 0x1e140, 0x1e149 },
+ { 0x1e14e, 0x1e14e },
+ { 0x1e290, 0x1e2ae },
+ { 0x1e2c0, 0x1e2f9 },
+ { 0x1e7e0, 0x1e7e6 },
+ { 0x1e7e8, 0x1e7eb },
+ { 0x1e7ed, 0x1e7ee },
+ { 0x1e7f0, 0x1e7fe },
+ { 0x1e800, 0x1e8c4 },
+ { 0x1e8d0, 0x1e8d6 },
+ { 0x1e900, 0x1e94b },
+ { 0x1e950, 0x1e959 },
+ { 0x1ee00, 0x1ee03 },
+ { 0x1ee05, 0x1ee1f },
+ { 0x1ee21, 0x1ee22 },
+ { 0x1ee24, 0x1ee24 },
+ { 0x1ee27, 0x1ee27 },
+ { 0x1ee29, 0x1ee32 },
+ { 0x1ee34, 0x1ee37 },
+ { 0x1ee39, 0x1ee39 },
+ { 0x1ee3b, 0x1ee3b },
+ { 0x1ee42, 0x1ee42 },
+ { 0x1ee47, 0x1ee47 },
+ { 0x1ee49, 0x1ee49 },
+ { 0x1ee4b, 0x1ee4b },
+ { 0x1ee4d, 0x1ee4f },
+ { 0x1ee51, 0x1ee52 },
+ { 0x1ee54, 0x1ee54 },
+ { 0x1ee57, 0x1ee57 },
+ { 0x1ee59, 0x1ee59 },
+ { 0x1ee5b, 0x1ee5b },
+ { 0x1ee5d, 0x1ee5d },
+ { 0x1ee5f, 0x1ee5f },
+ { 0x1ee61, 0x1ee62 },
+ { 0x1ee64, 0x1ee64 },
+ { 0x1ee67, 0x1ee6a },
+ { 0x1ee6c, 0x1ee72 },
+ { 0x1ee74, 0x1ee77 },
+ { 0x1ee79, 0x1ee7c },
+ { 0x1ee7e, 0x1ee7e },
+ { 0x1ee80, 0x1ee89 },
+ { 0x1ee8b, 0x1ee9b },
+ { 0x1eea1, 0x1eea3 },
+ { 0x1eea5, 0x1eea9 },
+ { 0x1eeab, 0x1eebb },
+ { 0x1fbf0, 0x1fbf9 },
+ { 0x20000, 0x2a6df },
+ { 0x2a700, 0x2b738 },
+ { 0x2b740, 0x2b81d },
+ { 0x2b820, 0x2cea1 },
+ { 0x2ceb0, 0x2ebe0 },
+ { 0x2f800, 0x2fa1d },
+ { 0x30000, 0x3134a },
+ { 0xe0100, 0xe01ef },
+ { 0x0, 0x0 },
+};
+
+#endif // CHAR_RANGE_INC
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
new file mode 100644
index 0000000000..67147a4327
--- /dev/null
+++ b/core/string/char_utils.h
@@ -0,0 +1,112 @@
+/*************************************************************************/
+/* char_utils.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef CHAR_UTILS_H
+#define CHAR_UTILS_H
+
+#include "core/typedefs.h"
+
+#include "char_range.inc"
+
+static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) {
+ for (int i = 0; xid_start[i].start != 0; i++) {
+ if (c >= xid_start[i].start && c <= xid_start[i].end) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) {
+ for (int i = 0; xid_continue[i].start != 0; i++) {
+ if (c >= xid_continue[i].start && c <= xid_continue[i].end) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
+ return (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) {
+ return (c >= 'a' && c <= 'z');
+}
+
+static _FORCE_INLINE_ bool is_digit(char32_t c) {
+ return (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
+ return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
+}
+
+static _FORCE_INLINE_ bool is_binary_digit(char32_t c) {
+ return (c == '0' || c == '1');
+}
+
+static _FORCE_INLINE_ bool is_ascii_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+}
+
+static _FORCE_INLINE_ bool is_symbol(char32_t c) {
+ return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
+}
+
+static _FORCE_INLINE_ bool is_control(char32_t p_char) {
+ return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
+}
+
+static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
+ return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
+}
+
+static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
+ return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
+}
+
+static _FORCE_INLINE_ bool is_punct(char32_t p_char) {
+ return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
+}
+
+static _FORCE_INLINE_ bool is_underscore(char32_t p_char) {
+ return (p_char == '_');
+}
+
+#endif // CHAR_UTILS_H
diff --git a/core/string/locales.h b/core/string/locales.h
new file mode 100644
index 0000000000..32d6608ec2
--- /dev/null
+++ b/core/string/locales.h
@@ -0,0 +1,1197 @@
+/*************************************************************************/
+/* locales.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef LOCALES_H
+#define LOCALES_H
+
+// Windows has some weird locale identifiers which do not honor the ISO 639-1
+// standardized nomenclature. Whenever those don't conflict with existing ISO
+// identifiers, we override them.
+//
+// Reference:
+// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx
+
+static const char *locale_renames[][2] = {
+ { "in", "id" }, // Indonesian
+ { "iw", "he" }, // Hebrew
+ { "no", "nb" }, // Norwegian Bokmål
+ { "C", "en" }, // Locale is not set, fallback to English.
+ { nullptr, nullptr }
+};
+
+// Additional script information to preferred scripts.
+// Language code, script code, default country, supported countries.
+// Reference:
+// - https://lh.2xlibre.net/locales/
+// - https://www.localeplanet.com/icu/index.html
+// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
+
+static const char *locale_scripts[][4] = {
+ { "az", "Latn", "", "AZ" },
+ { "az", "Arab", "", "IR" },
+ { "bs", "Latn", "", "BA" },
+ { "ff", "Latn", "", "BF,CM,GH,GM,GN,GW,LR,MR,NE,NG,SL,SN" },
+ { "pa", "Arab", "PK", "PK" },
+ { "pa", "Guru", "IN", "IN" },
+ { "sd", "Arab", "PK", "PK" },
+ { "sd", "Deva", "IN", "IN" },
+ { "shi", "Tfng", "", "MA" },
+ { "sr", "Cyrl", "", "BA,RS,XK" },
+ { "sr", "Latn", "", "ME" },
+ { "uz", "Latn", "", "UZ" },
+ { "uz", "Arab", "AF", "AF" },
+ { "vai", "Vaii", "", "LR" },
+ { "yue", "Hans", "CN", "CN" },
+ { "yue", "Hant", "HK", "HK" },
+ { "zh", "Hans", "CN", "CN,SG" },
+ { "zh", "Hant", "TW", "HK,MO,TW" },
+ { nullptr, nullptr, nullptr, nullptr }
+};
+
+// Additional mapping for outdated, temporary or exceptionally reserved country codes.
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+// - https://www.iso.org/obp/ui/#search/code/
+
+static const char *country_renames[][2] = {
+ { "BU", "MM" }, // Burma, name changed to Myanmar.
+ { "KV", "XK" }, // Kosovo (temporary FIPS code to European Commission code), no official ISO code assigned.
+ { "TP", "TL" }, // East Timor, name changed to Timor-Leste.
+ { "UK", "GB" }, // United Kingdom, exceptionally reserved code.
+ { nullptr, nullptr }
+};
+
+// Country code, country name.
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+// - https://www.iso.org/obp/ui/#search/code/
+
+static const char *country_names[][2] = {
+ { "AC", "Ascension Island" }, // Exceptionally reserved.
+ { "AD", "Andorra" },
+ { "AE", "United Arab Emirates" },
+ { "AF", "Afghanistan" },
+ { "AG", "Antigua and Barbuda" },
+ { "AI", "Anguilla" },
+ { "AL", "Albania" },
+ { "AM", "Armenia" },
+ { "AN", "Netherlands Antilles" }, // Transitionally reserved, divided into BQ, CW and SX.
+ { "AO", "Angola" },
+ { "AQ", "Antarctica" },
+ { "AR", "Argentina" },
+ { "AS", "American Samoa" },
+ { "AT", "Austria" },
+ { "AU", "Australia" },
+ { "AW", "Aruba" },
+ { "AX", "Åland Islands" },
+ { "AZ", "Azerbaijan" },
+ { "BA", "Bosnia and Herzegovina" },
+ { "BB", "Barbados" },
+ { "BD", "Bangladesh" },
+ { "BE", "Belgium" },
+ { "BF", "Burkina Faso" },
+ { "BG", "Bulgaria" },
+ { "BH", "Bahrain" },
+ { "BI", "Burundi" },
+ { "BJ", "Benin" },
+ { "BL", "St. Barthélemy" },
+ { "BM", "Bermuda" },
+ { "BN", "Brunei" },
+ { "BO", "Bolivia" },
+ { "BQ", "Caribbean Netherlands" },
+ { "BR", "Brazil" },
+ { "BS", "Bahamas" },
+ { "BT", "Bhutan" },
+ { "BV", "Bouvet Island" },
+ { "BW", "Botswana" },
+ { "BY", "Belarus" },
+ { "BZ", "Belize" },
+ { "CA", "Canada" },
+ { "CC", "Cocos (Keeling) Islands" },
+ { "CD", "Congo - Kinshasa" },
+ { "CF", "Central African Republic" },
+ { "CG", "Congo - Brazzaville" },
+ { "CH", "Switzerland" },
+ { "CI", "Côte d'Ivoire" },
+ { "CK", "Cook Islands" },
+ { "CL", "Chile" },
+ { "CM", "Cameroon" },
+ { "CN", "China" },
+ { "CO", "Colombia" },
+ { "CP", "Clipperton Island" }, // Exceptionally reserved.
+ { "CR", "Costa Rica" },
+ { "CQ", "Island of Sark" }, // Exceptionally reserved.
+ { "CU", "Cuba" },
+ { "CV", "Cabo Verde" },
+ { "CW", "Curaçao" },
+ { "CX", "Christmas Island" },
+ { "CY", "Cyprus" },
+ { "CZ", "Czechia" },
+ { "DE", "Germany" },
+ { "DG", "Diego Garcia" }, // Exceptionally reserved.
+ { "DJ", "Djibouti" },
+ { "DK", "Denmark" },
+ { "DM", "Dominica" },
+ { "DO", "Dominican Republic" },
+ { "DZ", "Algeria" },
+ { "EA", "Ceuta and Melilla" }, // Exceptionally reserved.
+ { "EC", "Ecuador" },
+ { "EE", "Estonia" },
+ { "EG", "Egypt" },
+ { "EH", "Western Sahara" },
+ { "ER", "Eritrea" },
+ { "ES", "Spain" },
+ { "ET", "Ethiopia" },
+ { "EU", "European Union" }, // Exceptionally reserved.
+ { "EZ", "Eurozone" }, // Exceptionally reserved.
+ { "FI", "Finland" },
+ { "FJ", "Fiji" },
+ { "FK", "Falkland Islands" },
+ { "FM", "Micronesia" },
+ { "FO", "Faroe Islands" },
+ { "FR", "France" },
+ { "FX", "France, Metropolitan" }, // Exceptionally reserved.
+ { "GA", "Gabon" },
+ { "GB", "United Kingdom" },
+ { "GD", "Grenada" },
+ { "GE", "Georgia" },
+ { "GF", "French Guiana" },
+ { "GG", "Guernsey" },
+ { "GH", "Ghana" },
+ { "GI", "Gibraltar" },
+ { "GL", "Greenland" },
+ { "GM", "Gambia" },
+ { "GN", "Guinea" },
+ { "GP", "Guadeloupe" },
+ { "GQ", "Equatorial Guinea" },
+ { "GR", "Greece" },
+ { "GS", "South Georgia and South Sandwich Islands" },
+ { "GT", "Guatemala" },
+ { "GU", "Guam" },
+ { "GW", "Guinea-Bissau" },
+ { "GY", "Guyana" },
+ { "HK", "Hong Kong" },
+ { "HM", "Heard Island and McDonald Islands" },
+ { "HN", "Honduras" },
+ { "HR", "Croatia" },
+ { "HT", "Haiti" },
+ { "HU", "Hungary" },
+ { "IC", "Canary Islands" }, // Exceptionally reserved.
+ { "ID", "Indonesia" },
+ { "IE", "Ireland" },
+ { "IL", "Israel" },
+ { "IM", "Isle of Man" },
+ { "IN", "India" },
+ { "IO", "British Indian Ocean Territory" },
+ { "IQ", "Iraq" },
+ { "IR", "Iran" },
+ { "IS", "Iceland" },
+ { "IT", "Italy" },
+ { "JE", "Jersey" },
+ { "JM", "Jamaica" },
+ { "JO", "Jordan" },
+ { "JP", "Japan" },
+ { "KE", "Kenya" },
+ { "KG", "Kyrgyzstan" },
+ { "KH", "Cambodia" },
+ { "KI", "Kiribati" },
+ { "KM", "Comoros" },
+ { "KN", "St. Kitts and Nevis" },
+ { "KP", "North Korea" },
+ { "KR", "South Korea" },
+ { "KW", "Kuwait" },
+ { "KY", "Cayman Islands" },
+ { "KZ", "Kazakhstan" },
+ { "LA", "Laos" },
+ { "LB", "Lebanon" },
+ { "LC", "St. Lucia" },
+ { "LI", "Liechtenstein" },
+ { "LK", "Sri Lanka" },
+ { "LR", "Liberia" },
+ { "LS", "Lesotho" },
+ { "LT", "Lithuania" },
+ { "LU", "Luxembourg" },
+ { "LV", "Latvia" },
+ { "LY", "Libya" },
+ { "MA", "Morocco" },
+ { "MC", "Monaco" },
+ { "MD", "Moldova" },
+ { "ME", "Montenegro" },
+ { "MF", "St. Martin" },
+ { "MG", "Madagascar" },
+ { "MH", "Marshall Islands" },
+ { "MK", "North Macedonia" },
+ { "ML", "Mali" },
+ { "MM", "Myanmar" },
+ { "MN", "Mongolia" },
+ { "MO", "Macao" },
+ { "MP", "Northern Mariana Islands" },
+ { "MQ", "Martinique" },
+ { "MR", "Mauritania" },
+ { "MS", "Montserrat" },
+ { "MT", "Malta" },
+ { "MU", "Mauritius" },
+ { "MV", "Maldives" },
+ { "MW", "Malawi" },
+ { "MX", "Mexico" },
+ { "MY", "Malaysia" },
+ { "MZ", "Mozambique" },
+ { "NA", "Namibia" },
+ { "NC", "New Caledonia" },
+ { "NE", "Niger" },
+ { "NF", "Norfolk Island" },
+ { "NG", "Nigeria" },
+ { "NI", "Nicaragua" },
+ { "NL", "Netherlands" },
+ { "NO", "Norway" },
+ { "NP", "Nepal" },
+ { "NR", "Nauru" },
+ { "NU", "Niue" },
+ { "NZ", "New Zealand" },
+ { "OM", "Oman" },
+ { "PA", "Panama" },
+ { "PE", "Peru" },
+ { "PF", "French Polynesia" },
+ { "PG", "Papua New Guinea" },
+ { "PH", "Philippines" },
+ { "PK", "Pakistan" },
+ { "PL", "Poland" },
+ { "PM", "St. Pierre and Miquelon" },
+ { "PN", "Pitcairn Islands" },
+ { "PR", "Puerto Rico" },
+ { "PS", "Palestine" },
+ { "PT", "Portugal" },
+ { "PW", "Palau" },
+ { "PY", "Paraguay" },
+ { "QA", "Qatar" },
+ { "RE", "Réunion" },
+ { "RO", "Romania" },
+ { "RS", "Serbia" },
+ { "RU", "Russia" },
+ { "RW", "Rwanda" },
+ { "SA", "Saudi Arabia" },
+ { "SB", "Solomon Islands" },
+ { "SC", "Seychelles" },
+ { "SD", "Sudan" },
+ { "SE", "Sweden" },
+ { "SG", "Singapore" },
+ { "SH", "St. Helena, Ascension and Tristan da Cunha" },
+ { "SI", "Slovenia" },
+ { "SJ", "Svalbard and Jan Mayen" },
+ { "SK", "Slovakia" },
+ { "SL", "Sierra Leone" },
+ { "SM", "San Marino" },
+ { "SN", "Senegal" },
+ { "SO", "Somalia" },
+ { "SR", "Suriname" },
+ { "SS", "South Sudan" },
+ { "ST", "Sao Tome and Principe" },
+ { "SV", "El Salvador" },
+ { "SX", "Sint Maarten" },
+ { "SY", "Syria" },
+ { "SZ", "Eswatini" },
+ { "TA", "Tristan da Cunha" }, // Exceptionally reserved.
+ { "TC", "Turks and Caicos Islands" },
+ { "TD", "Chad" },
+ { "TF", "French Southern Territories" },
+ { "TG", "Togo" },
+ { "TH", "Thailand" },
+ { "TJ", "Tajikistan" },
+ { "TK", "Tokelau" },
+ { "TL", "Timor-Leste" },
+ { "TM", "Turkmenistan" },
+ { "TN", "Tunisia" },
+ { "TO", "Tonga" },
+ { "TR", "Turkey" },
+ { "TT", "Trinidad and Tobago" },
+ { "TV", "Tuvalu" },
+ { "TW", "Taiwan" },
+ { "TZ", "Tanzania" },
+ { "UA", "Ukraine" },
+ { "UG", "Uganda" },
+ { "UM", "U.S. Outlying Islands" },
+ { "US", "United States of America" },
+ { "UY", "Uruguay" },
+ { "UZ", "Uzbekistan" },
+ { "VA", "Holy See" },
+ { "VC", "St. Vincent and the Grenadines" },
+ { "VE", "Venezuela" },
+ { "VG", "British Virgin Islands" },
+ { "VI", "U.S. Virgin Islands" },
+ { "VN", "Viet Nam" },
+ { "VU", "Vanuatu" },
+ { "WF", "Wallis and Futuna" },
+ { "WS", "Samoa" },
+ { "XK", "Kosovo" }, // Temporary code, no official ISO code assigned.
+ { "YE", "Yemen" },
+ { "YT", "Mayotte" },
+ { "ZA", "South Africa" },
+ { "ZM", "Zambia" },
+ { "ZW", "Zimbabwe" },
+ { nullptr, nullptr }
+};
+
+// Languages code, language name.
+// Reference:
+// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+// - https://www.localeplanet.com/icu/index.html
+// - https://lh.2xlibre.net/locales/
+
+static const char *language_list[][2] = {
+ { "aa", "Afar" },
+ { "ab", "Abkhazian" },
+ { "ace", "Achinese" },
+ { "ach", "Acoli" },
+ { "ada", "Adangme" },
+ { "ady", "Adyghe" },
+ { "ae", "Avestan" },
+ { "aeb", "Tunisian Arabic" },
+ { "af", "Afrikaans" },
+ { "afh", "Afrihili" },
+ { "agq", "Aghem" },
+ { "ain", "Ainu" },
+ { "agr", "Aguaruna" },
+ { "ak", "Akan" },
+ { "akk", "Akkadian" },
+ { "akz", "Alabama" },
+ { "ale", "Aleut" },
+ { "aln", "Gheg Albanian" },
+ { "alt", "Southern Altai" },
+ { "am", "Amharic" },
+ { "an", "Aragonese" },
+ { "ang", "Old English" },
+ { "anp", "Angika" },
+ { "ar", "Arabic" },
+ { "arc", "Aramaic" },
+ { "arn", "Mapudungun" },
+ { "aro", "Araona" },
+ { "arp", "Arapaho" },
+ { "arq", "Algerian Arabic" },
+ { "ars", "Najdi Arabic" },
+ { "arw", "Arawak" },
+ { "ary", "Moroccan Arabic" },
+ { "arz", "Egyptian Arabic" },
+ { "as", "Assamese" },
+ { "asa", "Asu" },
+ { "ase", "American Sign Language" },
+ { "ast", "Asturian" },
+ { "av", "Avaric" },
+ { "avk", "Kotava" },
+ { "awa", "Awadhi" },
+ { "ayc", "Southern Aymara" },
+ { "ay", "Aymara" },
+ { "az", "Azerbaijani" },
+ { "ba", "Bashkir" },
+ { "bal", "Baluchi" },
+ { "ban", "Balinese" },
+ { "bar", "Bavarian" },
+ { "bas", "Bassa" },
+ { "bax", "Bamun" },
+ { "bbc", "Batak Toba" },
+ { "bbj", "Ghomala" },
+ { "be", "Belarusian" },
+ { "bej", "Beja" },
+ { "bem", "Bemba" },
+ { "ber", "Berber" },
+ { "bew", "Betawi" },
+ { "bez", "Bena" },
+ { "bfd", "Bafut" },
+ { "bfq", "Badaga" },
+ { "bg", "Bulgarian" },
+ { "bhb", "Bhili" },
+ { "bgn", "Western Balochi" },
+ { "bho", "Bhojpuri" },
+ { "bi", "Bislama" },
+ { "bik", "Bikol" },
+ { "bin", "Bini" },
+ { "bjn", "Banjar" },
+ { "bkm", "Kom" },
+ { "bla", "Siksika" },
+ { "bm", "Bambara" },
+ { "bn", "Bengali" },
+ { "bo", "Tibetan" },
+ { "bpy", "Bishnupriya" },
+ { "bqi", "Bakhtiari" },
+ { "br", "Breton" },
+ { "brh", "Brahui" },
+ { "brx", "Bodo" },
+ { "bs", "Bosnian" },
+ { "bss", "Akoose" },
+ { "bua", "Buriat" },
+ { "bug", "Buginese" },
+ { "bum", "Bulu" },
+ { "byn", "Bilin" },
+ { "byv", "Medumba" },
+ { "ca", "Catalan" },
+ { "cad", "Caddo" },
+ { "car", "Carib" },
+ { "cay", "Cayuga" },
+ { "cch", "Atsam" },
+ { "ccp", "Chakma" },
+ { "ce", "Chechen" },
+ { "ceb", "Cebuano" },
+ { "cgg", "Chiga" },
+ { "ch", "Chamorro" },
+ { "chb", "Chibcha" },
+ { "chg", "Chagatai" },
+ { "chk", "Chuukese" },
+ { "chm", "Mari" },
+ { "chn", "Chinook Jargon" },
+ { "cho", "Choctaw" },
+ { "chp", "Chipewyan" },
+ { "chr", "Cherokee" },
+ { "chy", "Cheyenne" },
+ { "cic", "Chickasaw" },
+ { "ckb", "Central Kurdish" },
+ { "csb", "Kashubian" },
+ { "cmn", "Mandarin Chinese" },
+ { "co", "Corsican" },
+ { "cop", "Coptic" },
+ { "cps", "Capiznon" },
+ { "cr", "Cree" },
+ { "crh", "Crimean Tatar" },
+ { "crs", "Seselwa Creole French" },
+ { "cs", "Czech" },
+ { "csb", "Kashubian" },
+ { "cu", "Church Slavic" },
+ { "cv", "Chuvash" },
+ { "cy", "Welsh" },
+ { "da", "Danish" },
+ { "dak", "Dakota" },
+ { "dar", "Dargwa" },
+ { "dav", "Taita" },
+ { "de", "German" },
+ { "del", "Delaware" },
+ { "den", "Slave" },
+ { "dgr", "Dogrib" },
+ { "din", "Dinka" },
+ { "dje", "Zarma" },
+ { "doi", "Dogri" },
+ { "dsb", "Lower Sorbian" },
+ { "dtp", "Central Dusun" },
+ { "dua", "Duala" },
+ { "dum", "Middle Dutch" },
+ { "dv", "Dhivehi" },
+ { "dyo", "Jola-Fonyi" },
+ { "dyu", "Dyula" },
+ { "dz", "Dzongkha" },
+ { "dzg", "Dazaga" },
+ { "ebu", "Embu" },
+ { "ee", "Ewe" },
+ { "efi", "Efik" },
+ { "egl", "Emilian" },
+ { "egy", "Ancient Egyptian" },
+ { "eka", "Ekajuk" },
+ { "el", "Greek" },
+ { "elx", "Elamite" },
+ { "en", "English" },
+ { "enm", "Middle English" },
+ { "eo", "Esperanto" },
+ { "es", "Spanish" },
+ { "esu", "Central Yupik" },
+ { "et", "Estonian" },
+ { "eu", "Basque" },
+ { "ewo", "Ewondo" },
+ { "ext", "Extremaduran" },
+ { "fa", "Persian" },
+ { "fan", "Fang" },
+ { "fat", "Fanti" },
+ { "ff", "Fulah" },
+ { "fi", "Finnish" },
+ { "fil", "Filipino" },
+ { "fit", "Tornedalen Finnish" },
+ { "fj", "Fijian" },
+ { "fo", "Faroese" },
+ { "fon", "Fon" },
+ { "fr", "French" },
+ { "frc", "Cajun French" },
+ { "frm", "Middle French" },
+ { "fro", "Old French" },
+ { "frp", "Arpitan" },
+ { "frr", "Northern Frisian" },
+ { "frs", "Eastern Frisian" },
+ { "fur", "Friulian" },
+ { "fy", "Western Frisian" },
+ { "ga", "Irish" },
+ { "gaa", "Ga" },
+ { "gag", "Gagauz" },
+ { "gan", "Gan Chinese" },
+ { "gay", "Gayo" },
+ { "gba", "Gbaya" },
+ { "gbz", "Zoroastrian Dari" },
+ { "gd", "Scottish Gaelic" },
+ { "gez", "Geez" },
+ { "gil", "Gilbertese" },
+ { "gl", "Galician" },
+ { "glk", "Gilaki" },
+ { "gmh", "Middle High German" },
+ { "gn", "Guarani" },
+ { "goh", "Old High German" },
+ { "gom", "Goan Konkani" },
+ { "gon", "Gondi" },
+ { "gor", "Gorontalo" },
+ { "got", "Gothic" },
+ { "grb", "Grebo" },
+ { "grc", "Ancient Greek" },
+ { "gsw", "Swiss German" },
+ { "gu", "Gujarati" },
+ { "guc", "Wayuu" },
+ { "gur", "Frafra" },
+ { "guz", "Gusii" },
+ { "gv", "Manx" },
+ { "gwi", "Gwichʼin" },
+ { "ha", "Hausa" },
+ { "hai", "Haida" },
+ { "hak", "Hakka Chinese" },
+ { "haw", "Hawaiian" },
+ { "he", "Hebrew" },
+ { "hi", "Hindi" },
+ { "hif", "Fiji Hindi" },
+ { "hil", "Hiligaynon" },
+ { "hit", "Hittite" },
+ { "hmn", "Hmong" },
+ { "ho", "Hiri Motu" },
+ { "hne", "Chhattisgarhi" },
+ { "hr", "Croatian" },
+ { "hsb", "Upper Sorbian" },
+ { "hsn", "Xiang Chinese" },
+ { "ht", "Haitian" },
+ { "hu", "Hungarian" },
+ { "hup", "Hupa" },
+ { "hus", "Huastec" },
+ { "hy", "Armenian" },
+ { "hz", "Herero" },
+ { "ia", "Interlingua" },
+ { "iba", "Iban" },
+ { "ibb", "Ibibio" },
+ { "id", "Indonesian" },
+ { "ie", "Interlingue" },
+ { "ig", "Igbo" },
+ { "ii", "Sichuan Yi" },
+ { "ik", "Inupiaq" },
+ { "ilo", "Iloko" },
+ { "inh", "Ingush" },
+ { "io", "Ido" },
+ { "is", "Icelandic" },
+ { "it", "Italian" },
+ { "iu", "Inuktitut" },
+ { "izh", "Ingrian" },
+ { "ja", "Japanese" },
+ { "jam", "Jamaican Creole English" },
+ { "jbo", "Lojban" },
+ { "jgo", "Ngomba" },
+ { "jmc", "Machame" },
+ { "jpr", "Judeo-Persian" },
+ { "jrb", "Judeo-Arabic" },
+ { "jut", "Jutish" },
+ { "jv", "Javanese" },
+ { "ka", "Georgian" },
+ { "kaa", "Kara-Kalpak" },
+ { "kab", "Kabyle" },
+ { "kac", "Kachin" },
+ { "kaj", "Jju" },
+ { "kam", "Kamba" },
+ { "kaw", "Kawi" },
+ { "kbd", "Kabardian" },
+ { "kbl", "Kanembu" },
+ { "kcg", "Tyap" },
+ { "kde", "Makonde" },
+ { "kea", "Kabuverdianu" },
+ { "ken", "Kenyang" },
+ { "kfo", "Koro" },
+ { "kg", "Kongo" },
+ { "kgp", "Kaingang" },
+ { "kha", "Khasi" },
+ { "kho", "Khotanese" },
+ { "khq", "Koyra Chiini" },
+ { "khw", "Khowar" },
+ { "ki", "Kikuyu" },
+ { "kiu", "Kirmanjki" },
+ { "kj", "Kuanyama" },
+ { "kk", "Kazakh" },
+ { "kkj", "Kako" },
+ { "kl", "Kalaallisut" },
+ { "kln", "Kalenjin" },
+ { "km", "Central Khmer" },
+ { "kmb", "Kimbundu" },
+ { "kn", "Kannada" },
+ { "ko", "Korean" },
+ { "koi", "Komi-Permyak" },
+ { "kok", "Konkani" },
+ { "kos", "Kosraean" },
+ { "kpe", "Kpelle" },
+ { "kr", "Kanuri" },
+ { "krc", "Karachay-Balkar" },
+ { "kri", "Krio" },
+ { "krj", "Kinaray-a" },
+ { "krl", "Karelian" },
+ { "kru", "Kurukh" },
+ { "ks", "Kashmiri" },
+ { "ksb", "Shambala" },
+ { "ksf", "Bafia" },
+ { "ksh", "Colognian" },
+ { "ku", "Kurdish" },
+ { "kum", "Kumyk" },
+ { "kut", "Kutenai" },
+ { "kv", "Komi" },
+ { "kw", "Cornish" },
+ { "ky", "Kirghiz" },
+ { "lag", "Langi" },
+ { "la", "Latin" },
+ { "lad", "Ladino" },
+ { "lag", "Langi" },
+ { "lah", "Lahnda" },
+ { "lam", "Lamba" },
+ { "lb", "Luxembourgish" },
+ { "lez", "Lezghian" },
+ { "lfn", "Lingua Franca Nova" },
+ { "lg", "Ganda" },
+ { "li", "Limburgan" },
+ { "lij", "Ligurian" },
+ { "liv", "Livonian" },
+ { "lkt", "Lakota" },
+ { "lmo", "Lombard" },
+ { "ln", "Lingala" },
+ { "lo", "Lao" },
+ { "lol", "Mongo" },
+ { "lou", "Louisiana Creole" },
+ { "loz", "Lozi" },
+ { "lrc", "Northern Luri" },
+ { "lt", "Lithuanian" },
+ { "ltg", "Latgalian" },
+ { "lu", "Luba-Katanga" },
+ { "lua", "Luba-Lulua" },
+ { "lui", "Luiseno" },
+ { "lun", "Lunda" },
+ { "luo", "Luo" },
+ { "lus", "Mizo" },
+ { "luy", "Luyia" },
+ { "lv", "Latvian" },
+ { "lzh", "Literary Chinese" },
+ { "lzz", "Laz" },
+ { "mad", "Madurese" },
+ { "maf", "Mafa" },
+ { "mag", "Magahi" },
+ { "mai", "Maithili" },
+ { "mak", "Makasar" },
+ { "man", "Mandingo" },
+ { "mas", "Masai" },
+ { "mde", "Maba" },
+ { "mdf", "Moksha" },
+ { "mdr", "Mandar" },
+ { "men", "Mende" },
+ { "mer", "Meru" },
+ { "mfe", "Morisyen" },
+ { "mg", "Malagasy" },
+ { "mga", "Middle Irish" },
+ { "mgh", "Makhuwa-Meetto" },
+ { "mgo", "Metaʼ" },
+ { "mh", "Marshallese" },
+ { "mhr", "Eastern Mari" },
+ { "mi", "Māori" },
+ { "mic", "Mi'kmaq" },
+ { "min", "Minangkabau" },
+ { "miq", "Mískito" },
+ { "mjw", "Karbi" },
+ { "mk", "Macedonian" },
+ { "ml", "Malayalam" },
+ { "mn", "Mongolian" },
+ { "mnc", "Manchu" },
+ { "mni", "Manipuri" },
+ { "mnw", "Mon" },
+ { "mos", "Mossi" },
+ { "moh", "Mohawk" },
+ { "mr", "Marathi" },
+ { "mrj", "Western Mari" },
+ { "ms", "Malay" },
+ { "mt", "Maltese" },
+ { "mua", "Mundang" },
+ { "mus", "Muscogee" },
+ { "mwl", "Mirandese" },
+ { "mwr", "Marwari" },
+ { "mwv", "Mentawai" },
+ { "my", "Burmese" },
+ { "mye", "Myene" },
+ { "myv", "Erzya" },
+ { "mzn", "Mazanderani" },
+ { "na", "Nauru" },
+ { "nah", "Nahuatl" },
+ { "nan", "Min Nan Chinese" },
+ { "nap", "Neapolitan" },
+ { "naq", "Nama" },
+ { "nan", "Min Nan Chinese" },
+ { "nb", "Norwegian Bokmål" },
+ { "nd", "North Ndebele" },
+ { "nds", "Low German" },
+ { "ne", "Nepali" },
+ { "new", "Newari" },
+ { "nhn", "Central Nahuatl" },
+ { "ng", "Ndonga" },
+ { "nia", "Nias" },
+ { "niu", "Niuean" },
+ { "njo", "Ao Naga" },
+ { "nl", "Dutch" },
+ { "nmg", "Kwasio" },
+ { "nn", "Norwegian Nynorsk" },
+ { "nnh", "Ngiemboon" },
+ { "nog", "Nogai" },
+ { "non", "Old Norse" },
+ { "nov", "Novial" },
+ { "nqo", "N'ko" },
+ { "nr", "South Ndebele" },
+ { "nso", "Pedi" },
+ { "nus", "Nuer" },
+ { "nv", "Navajo" },
+ { "nwc", "Classical Newari" },
+ { "ny", "Nyanja" },
+ { "nym", "Nyamwezi" },
+ { "nyn", "Nyankole" },
+ { "nyo", "Nyoro" },
+ { "nzi", "Nzima" },
+ { "oc", "Occitan" },
+ { "oj", "Ojibwa" },
+ { "om", "Oromo" },
+ { "or", "Odia" },
+ { "os", "Ossetic" },
+ { "osa", "Osage" },
+ { "ota", "Ottoman Turkish" },
+ { "pa", "Panjabi" },
+ { "pag", "Pangasinan" },
+ { "pal", "Pahlavi" },
+ { "pam", "Pampanga" },
+ { "pap", "Papiamento" },
+ { "pau", "Palauan" },
+ { "pcd", "Picard" },
+ { "pcm", "Nigerian Pidgin" },
+ { "pdc", "Pennsylvania German" },
+ { "pdt", "Plautdietsch" },
+ { "peo", "Old Persian" },
+ { "pfl", "Palatine German" },
+ { "phn", "Phoenician" },
+ { "pi", "Pali" },
+ { "pl", "Polish" },
+ { "pms", "Piedmontese" },
+ { "pnt", "Pontic" },
+ { "pon", "Pohnpeian" },
+ { "pr", "Pirate" },
+ { "prg", "Prussian" },
+ { "pro", "Old Provençal" },
+ { "prs", "Dari" },
+ { "ps", "Pushto" },
+ { "pt", "Portuguese" },
+ { "qu", "Quechua" },
+ { "quc", "K'iche" },
+ { "qug", "Chimborazo Highland Quichua" },
+ { "quy", "Ayacucho Quechua" },
+ { "quz", "Cusco Quechua" },
+ { "raj", "Rajasthani" },
+ { "rap", "Rapanui" },
+ { "rar", "Rarotongan" },
+ { "rgn", "Romagnol" },
+ { "rif", "Riffian" },
+ { "rm", "Romansh" },
+ { "rn", "Rundi" },
+ { "ro", "Romanian" },
+ { "rof", "Rombo" },
+ { "rom", "Romany" },
+ { "rtm", "Rotuman" },
+ { "ru", "Russian" },
+ { "rue", "Rusyn" },
+ { "rug", "Roviana" },
+ { "rup", "Aromanian" },
+ { "rw", "Kinyarwanda" },
+ { "rwk", "Rwa" },
+ { "sa", "Sanskrit" },
+ { "sad", "Sandawe" },
+ { "sah", "Sakha" },
+ { "sam", "Samaritan Aramaic" },
+ { "saq", "Samburu" },
+ { "sas", "Sasak" },
+ { "sat", "Santali" },
+ { "saz", "Saurashtra" },
+ { "sba", "Ngambay" },
+ { "sbp", "Sangu" },
+ { "sc", "Sardinian" },
+ { "scn", "Sicilian" },
+ { "sco", "Scots" },
+ { "sd", "Sindhi" },
+ { "sdc", "Sassarese Sardinian" },
+ { "sdh", "Southern Kurdish" },
+ { "se", "Northern Sami" },
+ { "see", "Seneca" },
+ { "seh", "Sena" },
+ { "sei", "Seri" },
+ { "sel", "Selkup" },
+ { "ses", "Koyraboro Senni" },
+ { "sg", "Sango" },
+ { "sga", "Old Irish" },
+ { "sgs", "Samogitian" },
+ { "sh", "Serbo-Croatian" },
+ { "shi", "Tachelhit" },
+ { "shn", "Shan" },
+ { "shs", "Shuswap" },
+ { "shu", "Chadian Arabic" },
+ { "si", "Sinhala" },
+ { "sid", "Sidamo" },
+ { "sk", "Slovak" },
+ { "sl", "Slovenian" },
+ { "sli", "Lower Silesian" },
+ { "sly", "Selayar" },
+ { "sm", "Samoan" },
+ { "sma", "Southern Sami" },
+ { "smj", "Lule Sami" },
+ { "smn", "Inari Sami" },
+ { "sms", "Skolt Sami" },
+ { "sn", "Shona" },
+ { "snk", "Soninke" },
+ { "so", "Somali" },
+ { "sog", "Sogdien" },
+ { "son", "Songhai" },
+ { "sq", "Albanian" },
+ { "sr", "Serbian" },
+ { "srn", "Sranan Tongo" },
+ { "srr", "Serer" },
+ { "ss", "Swati" },
+ { "ssy", "Saho" },
+ { "st", "Southern Sotho" },
+ { "stq", "Saterland Frisian" },
+ { "su", "Sundanese" },
+ { "suk", "Sukuma" },
+ { "sus", "Susu" },
+ { "sux", "Sumerian" },
+ { "sv", "Swedish" },
+ { "sw", "Swahili" },
+ { "swb", "Comorian" },
+ { "swc", "Congo Swahili" },
+ { "syc", "Classical Syriac" },
+ { "syr", "Syriac" },
+ { "szl", "Silesian" },
+ { "ta", "Tamil" },
+ { "tcy", "Tulu" },
+ { "te", "Telugu" },
+ { "tem", "Timne" },
+ { "teo", "Teso" },
+ { "ter", "Tereno" },
+ { "tet", "Tetum" },
+ { "tg", "Tajik" },
+ { "th", "Thai" },
+ { "the", "Chitwania Tharu" },
+ { "ti", "Tigrinya" },
+ { "tig", "Tigre" },
+ { "tiv", "Tiv" },
+ { "tk", "Turkmen" },
+ { "tkl", "Tokelau" },
+ { "tkr", "Tsakhur" },
+ { "tl", "Tagalog" },
+ { "tlh", "Klingon" },
+ { "tli", "Tlingit" },
+ { "tly", "Talysh" },
+ { "tmh", "Tamashek" },
+ { "tn", "Tswana" },
+ { "to", "Tongan" },
+ { "tog", "Nyasa Tonga" },
+ { "tpi", "Tok Pisin" },
+ { "tr", "Turkish" },
+ { "tru", "Turoyo" },
+ { "trv", "Taroko" },
+ { "ts", "Tsonga" },
+ { "tsd", "Tsakonian" },
+ { "tsi", "Tsimshian" },
+ { "tt", "Tatar" },
+ { "ttt", "Muslim Tat" },
+ { "tum", "Tumbuka" },
+ { "tvl", "Tuvalu" },
+ { "tw", "Twi" },
+ { "twq", "Tasawaq" },
+ { "ty", "Tahitian" },
+ { "tyv", "Tuvinian" },
+ { "tzm", "Central Atlas Tamazight" },
+ { "udm", "Udmurt" },
+ { "ug", "Uyghur" },
+ { "uga", "Ugaritic" },
+ { "uk", "Ukrainian" },
+ { "umb", "Umbundu" },
+ { "unm", "Unami" },
+ { "ur", "Urdu" },
+ { "uz", "Uzbek" },
+ { "vai", "Vai" },
+ { "ve", "Venda" },
+ { "vec", "Venetian" },
+ { "vep", "Veps" },
+ { "vi", "Vietnamese" },
+ { "vls", "West Flemish" },
+ { "vmf", "Main-Franconian" },
+ { "vo", "Volapük" },
+ { "vot", "Votic" },
+ { "vro", "Võro" },
+ { "vun", "Vunjo" },
+ { "wa", "Walloon" },
+ { "wae", "Walser" },
+ { "wal", "Wolaytta" },
+ { "war", "Waray" },
+ { "was", "Washo" },
+ { "wbp", "Warlpiri" },
+ { "wo", "Wolof" },
+ { "wuu", "Wu Chinese" },
+ { "xal", "Kalmyk" },
+ { "xh", "Xhosa" },
+ { "xmf", "Mingrelian" },
+ { "xog", "Soga" },
+ { "yao", "Yao" },
+ { "yap", "Yapese" },
+ { "yav", "Yangben" },
+ { "ybb", "Yemba" },
+ { "yi", "Yiddish" },
+ { "yo", "Yoruba" },
+ { "yrl", "Nheengatu" },
+ { "yue", "Yue Chinese" },
+ { "yuw", "Papua New Guinea" },
+ { "za", "Zhuang" },
+ { "zap", "Zapotec" },
+ { "zbl", "Blissymbols" },
+ { "zea", "Zeelandic" },
+ { "zen", "Zenaga" },
+ { "zgh", "Standard Moroccan Tamazight" },
+ { "zh", "Chinese" },
+ { "zu", "Zulu" },
+ { "zun", "Zuni" },
+ { "zza", "Zaza" },
+ { nullptr, nullptr }
+};
+
+// Additional regional variants.
+// Variant name, supported languages.
+
+static const char *locale_variants[][2] = {
+ { "valencia", "ca" },
+ { "iqtelif", "tt" },
+ { "saaho", "aa" },
+ { "tradnl", "es" },
+ { nullptr, nullptr },
+};
+
+// Script names and codes (excludes typographic variants, special codes, reserved codes and aliases for combined scripts).
+// Reference:
+// - https://en.wikipedia.org/wiki/ISO_15924
+
+static const char *script_list[][2] = {
+ { "Adlam", "Adlm" },
+ { "Afaka", "Afak" },
+ { "Caucasian Albanian", "Aghb" },
+ { "Ahom", "Ahom" },
+ { "Arabic", "Arab" },
+ { "Imperial Aramaic", "Armi" },
+ { "Armenian", "Armn" },
+ { "Avestan", "Avst" },
+ { "Balinese", "Bali" },
+ { "Bamum", "Bamu" },
+ { "Bassa Vah", "Bass" },
+ { "Batak", "Batk" },
+ { "Bengali", "Beng" },
+ { "Bhaiksuki", "Bhks" },
+ { "Blissymbols", "Blis" },
+ { "Bopomofo", "Bopo" },
+ { "Brahmi", "Brah" },
+ { "Braille", "Brai" },
+ { "Buginese", "Bugi" },
+ { "Buhid", "Buhd" },
+ { "Chakma", "Cakm" },
+ { "Unified Canadian Aboriginal", "Cans" },
+ { "Carian", "Cari" },
+ { "Cham", "Cham" },
+ { "Cherokee", "Cher" },
+ { "Chorasmian", "Chrs" },
+ { "Cirth", "Cirt" },
+ { "Coptic", "Copt" },
+ { "Cypro-Minoan", "Cpmn" },
+ { "Cypriot", "Cprt" },
+ { "Cyrillic", "Cyrl" },
+ { "Devanagari", "Deva" },
+ { "Dives Akuru", "Diak" },
+ { "Dogra", "Dogr" },
+ { "Deseret", "Dsrt" },
+ { "Duployan", "Dupl" },
+ { "Egyptian demotic", "Egyd" },
+ { "Egyptian hieratic", "Egyh" },
+ { "Egyptian hieroglyphs", "Egyp" },
+ { "Elbasan", "Elba" },
+ { "Elymaic", "Elym" },
+ { "Ethiopic", "Ethi" },
+ { "Khutsuri", "Geok" },
+ { "Georgian", "Geor" },
+ { "Glagolitic", "Glag" },
+ { "Gunjala Gondi", "Gong" },
+ { "Masaram Gondi", "Gonm" },
+ { "Gothic", "Goth" },
+ { "Grantha", "Gran" },
+ { "Greek", "Grek" },
+ { "Gujarati", "Gujr" },
+ { "Gurmukhi", "Guru" },
+ { "Hangul", "Hang" },
+ { "Han", "Hani" },
+ { "Hanunoo", "Hano" },
+ { "Simplified", "Hans" },
+ { "Traditional", "Hant" },
+ { "Hatran", "Hatr" },
+ { "Hebrew", "Hebr" },
+ { "Hiragana", "Hira" },
+ { "Anatolian Hieroglyphs", "Hluw" },
+ { "Pahawh Hmong", "Hmng" },
+ { "Nyiakeng Puachue Hmong", "Hmnp" },
+ { "Old Hungarian", "Hung" },
+ { "Indus", "Inds" },
+ { "Old Italic", "Ital" },
+ { "Javanese", "Java" },
+ { "Jurchen", "Jurc" },
+ { "Kayah Li", "Kali" },
+ { "Katakana", "Kana" },
+ { "Kharoshthi", "Khar" },
+ { "Khmer", "Khmr" },
+ { "Khojki", "Khoj" },
+ { "Khitan large script", "Kitl" },
+ { "Khitan small script", "Kits" },
+ { "Kannada", "Knda" },
+ { "Kpelle", "Kpel" },
+ { "Kaithi", "Kthi" },
+ { "Tai Tham", "Lana" },
+ { "Lao", "Laoo" },
+ { "Latin", "Latn" },
+ { "Leke", "Leke" },
+ { "Lepcha", "Lepc" },
+ { "Limbu", "Limb" },
+ { "Linear A", "Lina" },
+ { "Linear B", "Linb" },
+ { "Lisu", "Lisu" },
+ { "Loma", "Loma" },
+ { "Lycian", "Lyci" },
+ { "Lydian", "Lydi" },
+ { "Mahajani", "Mahj" },
+ { "Makasar", "Maka" },
+ { "Mandaic", "Mand" },
+ { "Manichaean", "Mani" },
+ { "Marchen", "Marc" },
+ { "Mayan Hieroglyphs", "Maya" },
+ { "Medefaidrin", "Medf" },
+ { "Mende Kikakui", "Mend" },
+ { "Meroitic Cursive", "Merc" },
+ { "Meroitic Hieroglyphs", "Mero" },
+ { "Malayalam", "Mlym" },
+ { "Modi", "Modi" },
+ { "Mongolian", "Mong" },
+ { "Moon", "Moon" },
+ { "Mro", "Mroo" },
+ { "Meitei Mayek", "Mtei" },
+ { "Multani", "Mult" },
+ { "Myanmar (Burmese)", "Mymr" },
+ { "Nandinagari", "Nand" },
+ { "Old North Arabian", "Narb" },
+ { "Nabataean", "Nbat" },
+ { "Newa", "Newa" },
+ { "Naxi Dongba", "Nkdb" },
+ { "Nakhi Geba", "Nkgb" },
+ { "N'ko", "Nkoo" },
+ { "Nüshu", "Nshu" },
+ { "Ogham", "Ogam" },
+ { "Ol Chiki", "Olck" },
+ { "Old Turkic", "Orkh" },
+ { "Oriya", "Orya" },
+ { "Osage", "Osge" },
+ { "Osmanya", "Osma" },
+ { "Old Uyghur", "Ougr" },
+ { "Palmyrene", "Palm" },
+ { "Pau Cin Hau", "Pauc" },
+ { "Proto-Cuneiform", "Pcun" },
+ { "Proto-Elamite", "Pelm" },
+ { "Old Permic", "Perm" },
+ { "Phags-pa", "Phag" },
+ { "Inscriptional Pahlavi", "Phli" },
+ { "Psalter Pahlavi", "Phlp" },
+ { "Book Pahlavi", "Phlv" },
+ { "Phoenician", "Phnx" },
+ { "Klingon", "Piqd" },
+ { "Miao", "Plrd" },
+ { "Inscriptional Parthian", "Prti" },
+ { "Proto-Sinaitic", "Psin" },
+ { "Ranjana", "Ranj" },
+ { "Rejang", "Rjng" },
+ { "Hanifi Rohingya", "Rohg" },
+ { "Rongorongo", "Roro" },
+ { "Runic", "Runr" },
+ { "Samaritan", "Samr" },
+ { "Sarati", "Sara" },
+ { "Old South Arabian", "Sarb" },
+ { "Saurashtra", "Saur" },
+ { "SignWriting", "Sgnw" },
+ { "Shavian", "Shaw" },
+ { "Sharada", "Shrd" },
+ { "Shuishu", "Shui" },
+ { "Siddham", "Sidd" },
+ { "Khudawadi", "Sind" },
+ { "Sinhala", "Sinh" },
+ { "Sogdian", "Sogd" },
+ { "Old Sogdian", "Sogo" },
+ { "Sora Sompeng", "Sora" },
+ { "Soyombo", "Soyo" },
+ { "Sundanese", "Sund" },
+ { "Syloti Nagri", "Sylo" },
+ { "Syriac", "Syrc" },
+ { "Tagbanwa", "Tagb" },
+ { "Takri", "Takr" },
+ { "Tai Le", "Tale" },
+ { "New Tai Lue", "Talu" },
+ { "Tamil", "Taml" },
+ { "Tangut", "Tang" },
+ { "Tai Viet", "Tavt" },
+ { "Telugu", "Telu" },
+ { "Tengwar", "Teng" },
+ { "Tifinagh", "Tfng" },
+ { "Tagalog", "Tglg" },
+ { "Thaana", "Thaa" },
+ { "Thai", "Thai" },
+ { "Tibetan", "Tibt" },
+ { "Tirhuta", "Tirh" },
+ { "Tangsa", "Tnsa" },
+ { "Toto", "Toto" },
+ { "Ugaritic", "Ugar" },
+ { "Vai", "Vaii" },
+ { "Visible Speech", "Visp" },
+ { "Vithkuqi", "Vith" },
+ { "Warang Citi", "Wara" },
+ { "Wancho", "Wcho" },
+ { "Woleai", "Wole" },
+ { "Old Persian", "Xpeo" },
+ { "Cuneiform", "Xsux" },
+ { "Yezidi", "Yezi" },
+ { "Yi", "Yiii" },
+ { "Zanabazar Square", "Zanb" },
+ { nullptr, nullptr }
+};
+
+#endif // LOCALES_H
diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp
index 5fae13779e..30fa434fad 100644
--- a/core/string/node_path.cpp
+++ b/core/string/node_path.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -199,6 +199,21 @@ Vector<StringName> NodePath::get_subnames() const {
return Vector<StringName>();
}
+StringName NodePath::get_concatenated_names() const {
+ ERR_FAIL_COND_V(!data, StringName());
+
+ if (!data->concatenated_path) {
+ int pc = data->path.size();
+ String concatenated;
+ const StringName *sn = data->path.ptr();
+ for (int i = 0; i < pc; i++) {
+ concatenated += i == 0 ? sn[i].operator String() : "/" + sn[i];
+ }
+ data->concatenated_path = concatenated;
+ }
+ return data->concatenated_path;
+}
+
StringName NodePath::get_concatenated_subnames() const {
ERR_FAIL_COND_V(!data, StringName());
@@ -293,12 +308,12 @@ void NodePath::simplify() {
break;
}
if (data->path[i].operator String() == ".") {
- data->path.remove(i);
+ data->path.remove_at(i);
i--;
} else if (i > 0 && data->path[i].operator String() == ".." && data->path[i - 1].operator String() != "." && data->path[i - 1].operator String() != "..") {
//remove both
- data->path.remove(i - 1);
- data->path.remove(i - 1);
+ data->path.remove_at(i - 1);
+ data->path.remove_at(i - 1);
i -= 2;
if (data->path.size() == 0) {
data->path.push_back(".");
@@ -368,7 +383,7 @@ NodePath::NodePath(const String &p_path) {
for (int i = from; i <= path.length(); i++) {
if (path[i] == ':' || path[i] == 0) {
String str = path.substr(from, i - from);
- if (str == "") {
+ if (str.is_empty()) {
if (path[i] == 0) {
continue; // Allow end-of-path :
}
diff --git a/core/string/node_path.h b/core/string/node_path.h
index a277ab26fa..2bce33e21e 100644
--- a/core/string/node_path.h
+++ b/core/string/node_path.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -39,6 +39,7 @@ class NodePath {
SafeRefCount refcount;
Vector<StringName> path;
Vector<StringName> subpath;
+ StringName concatenated_path;
StringName concatenated_subpath;
bool absolute;
bool has_slashes;
@@ -59,6 +60,7 @@ public:
StringName get_subname(int p_idx) const;
Vector<StringName> get_names() const;
Vector<StringName> get_subnames() const;
+ StringName get_concatenated_names() const;
StringName get_concatenated_subnames() const;
NodePath rel_path_to(const NodePath &p_np) const;
diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp
index f8be564740..07302cc8c3 100644
--- a/core/string/optimized_translation.cpp
+++ b/core/string/optimized_translation.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -37,9 +37,9 @@ extern "C" {
}
struct CompressedString {
- int orig_len;
+ int orig_len = 0;
CharString compressed;
- int offset;
+ int offset = 0;
};
void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
@@ -53,7 +53,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
int size = Math::larger_prime(keys.size());
Vector<Vector<Pair<int, CharString>>> buckets;
- Vector<Map<uint32_t, int>> table;
+ Vector<HashMap<uint32_t, int>> table;
Vector<uint32_t> hfunc_table;
Vector<CompressedString> compressed;
@@ -108,7 +108,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
for (int i = 0; i < size; i++) {
const Vector<Pair<int, CharString>> &b = buckets[i];
- Map<uint32_t, int> &t = table.write[i];
+ HashMap<uint32_t, int> &t = table.write[i];
if (b.size() == 0) {
continue;
@@ -147,7 +147,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) {
int btindex = 0;
for (int i = 0; i < size; i++) {
- const Map<uint32_t, int> &t = table[i];
+ const HashMap<uint32_t, int> &t = table[i];
if (t.size() == 0) {
htw[i] = 0xFFFFFFFF; //nothing
continue;
diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h
index bccf932383..f3dbfe8f5c 100644
--- a/core/string/optimized_translation.h
+++ b/core/string/optimized_translation.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -38,7 +38,7 @@ class OptimizedTranslation : public Translation {
//this translation uses a sort of modified perfect hash algorithm
//it requires hashing strings twice and then does a binary search,
- //so it's slower, but at the same time it has an extreemly high chance
+ //so it's slower, but at the same time it has an extremely high chance
//of catching untranslated strings
//load/store friendly types
diff --git a/core/string/print_string.cpp b/core/string/print_string.cpp
index 345371d733..592da58fe7 100644
--- a/core/string/print_string.cpp
+++ b/core/string/print_string.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -30,13 +30,12 @@
#include "print_string.h"
+#include "core/core_globals.h"
#include "core/os/os.h"
#include <stdio.h>
static PrintHandlerList *print_handler_list = nullptr;
-bool _print_line_enabled = true;
-bool _print_error_enabled = true;
void add_print_handler(PrintHandlerList *p_handler) {
_global_lock();
@@ -45,7 +44,7 @@ void add_print_handler(PrintHandlerList *p_handler) {
_global_unlock();
}
-void remove_print_handler(PrintHandlerList *p_handler) {
+void remove_print_handler(const PrintHandlerList *p_handler) {
_global_lock();
PrintHandlerList *prev = nullptr;
@@ -69,8 +68,8 @@ void remove_print_handler(PrintHandlerList *p_handler) {
ERR_FAIL_COND(l == nullptr);
}
-void print_line(String p_string) {
- if (!_print_line_enabled) {
+void __print_line(String p_string) {
+ if (!CoreGlobals::print_line_enabled) {
return;
}
@@ -79,7 +78,98 @@ void print_line(String p_string) {
_global_lock();
PrintHandlerList *l = print_handler_list;
while (l) {
- l->printfunc(l->userdata, p_string, false);
+ l->printfunc(l->userdata, p_string, false, false);
+ l = l->next;
+ }
+
+ _global_unlock();
+}
+
+void __print_line_rich(String p_string) {
+ if (!CoreGlobals::print_line_enabled) {
+ return;
+ }
+
+ // Convert a subset of BBCode tags to ANSI escape codes for correct display in the terminal.
+ // Support of those ANSI escape codes varies across terminal emulators,
+ // especially for italic and strikethrough.
+ String p_string_ansi = p_string;
+
+ p_string_ansi = p_string_ansi.replace("[b]", "\u001b[1m");
+ p_string_ansi = p_string_ansi.replace("[/b]", "\u001b[22m");
+ p_string_ansi = p_string_ansi.replace("[i]", "\u001b[3m");
+ p_string_ansi = p_string_ansi.replace("[/i]", "\u001b[23m");
+ p_string_ansi = p_string_ansi.replace("[u]", "\u001b[4m");
+ p_string_ansi = p_string_ansi.replace("[/u]", "\u001b[24m");
+ p_string_ansi = p_string_ansi.replace("[s]", "\u001b[9m");
+ p_string_ansi = p_string_ansi.replace("[/s]", "\u001b[29m");
+
+ p_string_ansi = p_string_ansi.replace("[indent]", " ");
+ p_string_ansi = p_string_ansi.replace("[/indent]", "");
+ p_string_ansi = p_string_ansi.replace("[code]", "\u001b[2m");
+ p_string_ansi = p_string_ansi.replace("[/code]", "\u001b[22m");
+ p_string_ansi = p_string_ansi.replace("[url]", "");
+ p_string_ansi = p_string_ansi.replace("[/url]", "");
+ p_string_ansi = p_string_ansi.replace("[center]", "\n\t\t\t");
+ p_string_ansi = p_string_ansi.replace("[/center]", "");
+ p_string_ansi = p_string_ansi.replace("[right]", "\n\t\t\t\t\t\t");
+ p_string_ansi = p_string_ansi.replace("[/right]", "");
+
+ if (p_string_ansi.contains("[color")) {
+ p_string_ansi = p_string_ansi.replace("[color=black]", "\u001b[30m");
+ p_string_ansi = p_string_ansi.replace("[color=red]", "\u001b[91m");
+ p_string_ansi = p_string_ansi.replace("[color=green]", "\u001b[92m");
+ p_string_ansi = p_string_ansi.replace("[color=lime]", "\u001b[92m");
+ p_string_ansi = p_string_ansi.replace("[color=yellow]", "\u001b[93m");
+ p_string_ansi = p_string_ansi.replace("[color=blue]", "\u001b[94m");
+ p_string_ansi = p_string_ansi.replace("[color=magenta]", "\u001b[95m");
+ p_string_ansi = p_string_ansi.replace("[color=pink]", "\u001b[38;5;218m");
+ p_string_ansi = p_string_ansi.replace("[color=purple]", "\u001b[38;5;98m");
+ p_string_ansi = p_string_ansi.replace("[color=cyan]", "\u001b[96m");
+ p_string_ansi = p_string_ansi.replace("[color=white]", "\u001b[97m");
+ p_string_ansi = p_string_ansi.replace("[color=orange]", "\u001b[38;5;208m");
+ p_string_ansi = p_string_ansi.replace("[color=gray]", "\u001b[90m");
+ p_string_ansi = p_string_ansi.replace("[/color]", "\u001b[39m");
+ }
+ if (p_string_ansi.contains("[bgcolor")) {
+ p_string_ansi = p_string_ansi.replace("[bgcolor=black]", "\u001b[40m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=red]", "\u001b[101m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=green]", "\u001b[102m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=lime]", "\u001b[102m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=yellow]", "\u001b[103m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=blue]", "\u001b[104m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=magenta]", "\u001b[105m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=pink]", "\u001b[48;5;218m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=purple]", "\u001b[48;5;98m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=cyan]", "\u001b[106m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=white]", "\u001b[107m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=orange]", "\u001b[48;5;208m");
+ p_string_ansi = p_string_ansi.replace("[bgcolor=gray]", "\u001b[100m");
+ p_string_ansi = p_string_ansi.replace("[/bgcolor]", "\u001b[49m");
+ }
+ if (p_string_ansi.contains("[fgcolor")) {
+ p_string_ansi = p_string_ansi.replace("[fgcolor=black]", "\u001b[30;40m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=red]", "\u001b[91;101m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=green]", "\u001b[92;102m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=lime]", "\u001b[92;102m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=yellow]", "\u001b[93;103m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=blue]", "\u001b[94;104m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=magenta]", "\u001b[95;105m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=pink]", "\u001b[38;5;218;48;5;218m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=purple]", "\u001b[38;5;98;48;5;98m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=cyan]", "\u001b[96;106m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=white]", "\u001b[97;107m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=orange]", "\u001b[38;5;208;48;5;208m");
+ p_string_ansi = p_string_ansi.replace("[fgcolor=gray]", "\u001b[90;100m");
+ p_string_ansi = p_string_ansi.replace("[/fgcolor]", "\u001b[39;49m");
+ }
+
+ OS::get_singleton()->print_rich("%s\n", p_string_ansi.utf8().get_data());
+
+ _global_lock();
+ PrintHandlerList *l = print_handler_list;
+ while (l) {
+ l->printfunc(l->userdata, p_string, false, true);
l = l->next;
}
@@ -87,7 +177,7 @@ void print_line(String p_string) {
}
void print_error(String p_string) {
- if (!_print_error_enabled) {
+ if (!CoreGlobals::print_error_enabled) {
return;
}
@@ -96,7 +186,7 @@ void print_error(String p_string) {
_global_lock();
PrintHandlerList *l = print_handler_list;
while (l) {
- l->printfunc(l->userdata, p_string, true);
+ l->printfunc(l->userdata, p_string, true, false);
l = l->next;
}
@@ -108,3 +198,7 @@ void print_verbose(String p_string) {
print_line(p_string);
}
}
+
+String stringify_variants(Variant p_var) {
+ return p_var.operator String();
+}
diff --git a/core/string/print_string.h b/core/string/print_string.h
index 1a9ff1efd6..ca930a3a0f 100644
--- a/core/string/print_string.h
+++ b/core/string/print_string.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -31,11 +31,11 @@
#ifndef PRINT_STRING_H
#define PRINT_STRING_H
-#include "core/string/ustring.h"
+#include "core/variant/variant.h"
extern void (*_print_func)(String);
-typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error);
+typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error, bool p_rich);
struct PrintHandlerList {
PrintHandlerFunc printfunc = nullptr;
@@ -46,13 +46,37 @@ struct PrintHandlerList {
PrintHandlerList() {}
};
+String stringify_variants(Variant p_var);
+
+template <typename... Args>
+String stringify_variants(Variant p_var, Args... p_args) {
+ return p_var.operator String() + " " + stringify_variants(p_args...);
+}
+
void add_print_handler(PrintHandlerList *p_handler);
-void remove_print_handler(PrintHandlerList *p_handler);
+void remove_print_handler(const PrintHandlerList *p_handler);
-extern bool _print_line_enabled;
-extern bool _print_error_enabled;
-extern void print_line(String p_string);
+extern void __print_line(String p_string);
+extern void __print_line_rich(String p_string);
extern void print_error(String p_string);
extern void print_verbose(String p_string);
+inline void print_line(Variant v) {
+ __print_line(stringify_variants(v));
+}
+
+inline void print_line_rich(Variant v) {
+ __print_line_rich(stringify_variants(v));
+}
+
+template <typename... Args>
+void print_line(Variant p_var, Args... p_args) {
+ __print_line(stringify_variants(p_var, p_args...));
+}
+
+template <typename... Args>
+void print_line_rich(Variant p_var, Args... p_args) {
+ __print_line_rich(stringify_variants(p_var, p_args...));
+}
+
#endif // PRINT_STRING_H
diff --git a/core/string/string_buffer.h b/core/string/string_buffer.h
index 33897c3674..424952f786 100644
--- a/core/string/string_buffer.h
+++ b/core/string/string_buffer.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
diff --git a/core/string/string_builder.cpp b/core/string/string_builder.cpp
index 834c87c845..7359ff59e1 100644
--- a/core/string/string_builder.cpp
+++ b/core/string/string_builder.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -33,7 +33,7 @@
#include <string.h>
StringBuilder &StringBuilder::append(const String &p_string) {
- if (p_string == String()) {
+ if (p_string.is_empty()) {
return *this;
}
diff --git a/core/string/string_builder.h b/core/string/string_builder.h
index 30ce2a06f7..897efa95ef 100644
--- a/core/string/string_builder.h
+++ b/core/string/string_builder.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
diff --git a/core/string/string_name.cpp b/core/string/string_name.cpp
index 9024f60dae..9c4fc4e1b7 100644
--- a/core/string/string_name.cpp
+++ b/core/string/string_name.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -73,23 +73,38 @@ void StringName::cleanup() {
d = d->next;
}
}
- print_line("\nStringName Reference Ranking:\n");
+
+ print_line("\nStringName reference ranking (from most to least referenced):\n");
+
data.sort_custom<DebugSortReferences>();
- for (int i = 0; i < MIN(100, data.size()); i++) {
+ int unreferenced_stringnames = 0;
+ int rarely_referenced_stringnames = 0;
+ for (int i = 0; i < data.size(); i++) {
print_line(itos(i + 1) + ": " + data[i]->get_name() + " - " + itos(data[i]->debug_references));
+ if (data[i]->debug_references == 0) {
+ unreferenced_stringnames += 1;
+ } else if (data[i]->debug_references < 5) {
+ rarely_referenced_stringnames += 1;
+ }
}
+
+ print_line(vformat("\nOut of %d StringNames, %d StringNames were never referenced during this run (0 times) (%.2f%%).", data.size(), unreferenced_stringnames, unreferenced_stringnames / float(data.size()) * 100));
+ print_line(vformat("Out of %d StringNames, %d StringNames were rarely referenced during this run (1-4 times) (%.2f%%).", data.size(), rarely_referenced_stringnames, rarely_referenced_stringnames / float(data.size()) * 100));
}
#endif
int lost_strings = 0;
for (int i = 0; i < STRING_TABLE_LEN; i++) {
while (_table[i]) {
_Data *d = _table[i];
- lost_strings++;
- if (d->static_count.get() != d->refcount.get() && OS::get_singleton()->is_stdout_verbose()) {
- if (d->cname) {
- print_line("Orphan StringName: " + String(d->cname));
- } else {
- print_line("Orphan StringName: " + String(d->name));
+ if (d->static_count.get() != d->refcount.get()) {
+ lost_strings++;
+
+ if (OS::get_singleton()->is_stdout_verbose()) {
+ if (d->cname) {
+ print_line("Orphan StringName: " + String(d->cname));
+ } else {
+ print_line("Orphan StringName: " + String(d->name));
+ }
}
}
@@ -232,6 +247,7 @@ StringName::StringName(const char *p_name, bool p_static) {
_data->cname = nullptr;
_data->next = _table[idx];
_data->prev = nullptr;
+
#ifdef DEBUG_ENABLED
if (unlikely(debug_stringname)) {
// Keep in memory, force static.
@@ -310,7 +326,7 @@ StringName::StringName(const String &p_name, bool p_static) {
ERR_FAIL_COND(!configured);
- if (p_name == String()) {
+ if (p_name.is_empty()) {
return;
}
@@ -434,7 +450,7 @@ StringName StringName::search(const char32_t *p_name) {
}
StringName StringName::search(const String &p_name) {
- ERR_FAIL_COND_V(p_name == "", StringName());
+ ERR_FAIL_COND_V(p_name.is_empty(), StringName());
MutexLock lock(mutex);
diff --git a/core/string/string_name.h b/core/string/string_name.h
index ce7988744b..ff4c41af94 100644
--- a/core/string/string_name.h
+++ b/core/string/string_name.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -35,6 +35,8 @@
#include "core/string/ustring.h"
#include "core/templates/safe_refcount.h"
+#define UNIQUE_NODE_PREFIX "%"
+
class Main;
struct StaticCString {
@@ -70,7 +72,7 @@ class StringName {
_Data *_data = nullptr;
union _HashUnion {
- _Data *ptr;
+ _Data *ptr = nullptr;
uint32_t hash;
};
@@ -100,6 +102,17 @@ public:
bool operator==(const String &p_name) const;
bool operator==(const char *p_name) const;
bool operator!=(const String &p_name) const;
+
+ _FORCE_INLINE_ bool is_node_unique_name() const {
+ if (!_data) {
+ return false;
+ }
+ if (_data->cname != nullptr) {
+ return (char32_t)_data->cname[0] == (char32_t)UNIQUE_NODE_PREFIX[0];
+ } else {
+ return (char32_t)_data->name[0] == (char32_t)UNIQUE_NODE_PREFIX[0];
+ }
+ }
_FORCE_INLINE_ bool operator<(const StringName &p_name) const {
return _data < p_name._data;
}
@@ -181,6 +194,18 @@ bool operator!=(const char *p_name, const StringName &p_string_name);
StringName _scs_create(const char *p_chr, bool p_static = false);
+/*
+ * The SNAME macro is used to speed up StringName creation, as it allows caching it after the first usage in a very efficient way.
+ * It should NOT be used everywhere, but instead in places where high performance is required and the creation of a StringName
+ * can be costly. Places where it should be used are:
+ * - Control::get_theme_*(<name> and Window::get_theme_*(<name> functions.
+ * - emit_signal(<name>,..) function
+ * - call_deferred(<name>,..) function
+ * - Comparisons to a StringName in overridden _set and _get methods.
+ *
+ * Use in places that can be called hundreds of times per frame (or more) is recommended, but this situation is very rare. If in doubt, do not use.
+ */
+
#define SNAME(m_arg) ([]() -> const StringName & { static StringName sname = _scs_create(m_arg, true); return sname; })()
#endif // STRING_NAME_H
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index cf61467d08..b83b7c786f 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -33,793 +33,12 @@
#include "core/config/project_settings.h"
#include "core/io/resource_loader.h"
#include "core/os/os.h"
+#include "core/string/locales.h"
#ifdef TOOLS_ENABLED
#include "main/main.h"
#endif
-// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes),
-// with the addition of glibc locales with their regional identifiers.
-// This list must match the language names (in English) of locale_names.
-//
-// References:
-// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
-// - https://lh.2xlibre.net/locales/
-// - https://iso639-3.sil.org/
-
-static const char *locale_list[] = {
- "aa", // Afar
- "aa_DJ", // Afar (Djibouti)
- "aa_ER", // Afar (Eritrea)
- "aa_ET", // Afar (Ethiopia)
- "af", // Afrikaans
- "af_ZA", // Afrikaans (South Africa)
- "agr_PE", // Aguaruna (Peru)
- "ak_GH", // Akan (Ghana)
- "am_ET", // Amharic (Ethiopia)
- "an_ES", // Aragonese (Spain)
- "anp_IN", // Angika (India)
- "ar", // Arabic
- "ar_AE", // Arabic (United Arab Emirates)
- "ar_BH", // Arabic (Bahrain)
- "ar_DZ", // Arabic (Algeria)
- "ar_EG", // Arabic (Egypt)
- "ar_IN", // Arabic (India)
- "ar_IQ", // Arabic (Iraq)
- "ar_JO", // Arabic (Jordan)
- "ar_KW", // Arabic (Kuwait)
- "ar_LB", // Arabic (Lebanon)
- "ar_LY", // Arabic (Libya)
- "ar_MA", // Arabic (Morocco)
- "ar_OM", // Arabic (Oman)
- "ar_QA", // Arabic (Qatar)
- "ar_SA", // Arabic (Saudi Arabia)
- "ar_SD", // Arabic (Sudan)
- "ar_SS", // Arabic (South Soudan)
- "ar_SY", // Arabic (Syria)
- "ar_TN", // Arabic (Tunisia)
- "ar_YE", // Arabic (Yemen)
- "as_IN", // Assamese (India)
- "ast_ES", // Asturian (Spain)
- "ayc_PE", // Southern Aymara (Peru)
- "ay_PE", // Aymara (Peru)
- "az", // Azerbaijani
- "az_AZ", // Azerbaijani (Azerbaijan)
- "be", // Belarusian
- "be_BY", // Belarusian (Belarus)
- "bem_ZM", // Bemba (Zambia)
- "ber_DZ", // Berber languages (Algeria)
- "ber_MA", // Berber languages (Morocco)
- "bg", // Bulgarian
- "bg_BG", // Bulgarian (Bulgaria)
- "bhb_IN", // Bhili (India)
- "bho_IN", // Bhojpuri (India)
- "bi_TV", // Bislama (Tuvalu)
- "bn", // Bengali
- "bn_BD", // Bengali (Bangladesh)
- "bn_IN", // Bengali (India)
- "bo", // Tibetan
- "bo_CN", // Tibetan (China)
- "bo_IN", // Tibetan (India)
- "br", // Breton
- "br_FR", // Breton (France)
- "brx_IN", // Bodo (India)
- "bs_BA", // Bosnian (Bosnia and Herzegovina)
- "byn_ER", // Bilin (Eritrea)
- "ca", // Catalan
- "ca_AD", // Catalan (Andorra)
- "ca_ES", // Catalan (Spain)
- "ca_FR", // Catalan (France)
- "ca_IT", // Catalan (Italy)
- "ce_RU", // Chechen (Russia)
- "chr_US", // Cherokee (United States)
- "cmn_TW", // Mandarin Chinese (Taiwan)
- "crh_UA", // Crimean Tatar (Ukraine)
- "csb_PL", // Kashubian (Poland)
- "cs", // Czech
- "cs_CZ", // Czech (Czech Republic)
- "cv_RU", // Chuvash (Russia)
- "cy_GB", // Welsh (United Kingdom)
- "da", // Danish
- "da_DK", // Danish (Denmark)
- "de", // German
- "de_AT", // German (Austria)
- "de_BE", // German (Belgium)
- "de_CH", // German (Switzerland)
- "de_DE", // German (Germany)
- "de_IT", // German (Italy)
- "de_LU", // German (Luxembourg)
- "doi_IN", // Dogri (India)
- "dv_MV", // Dhivehi (Maldives)
- "dz_BT", // Dzongkha (Bhutan)
- "el", // Greek
- "el_CY", // Greek (Cyprus)
- "el_GR", // Greek (Greece)
- "en", // English
- "en_AG", // English (Antigua and Barbuda)
- "en_AU", // English (Australia)
- "en_BW", // English (Botswana)
- "en_CA", // English (Canada)
- "en_DK", // English (Denmark)
- "en_GB", // English (United Kingdom)
- "en_HK", // English (Hong Kong)
- "en_IE", // English (Ireland)
- "en_IL", // English (Israel)
- "en_IN", // English (India)
- "en_NG", // English (Nigeria)
- "en_NZ", // English (New Zealand)
- "en_PH", // English (Philippines)
- "en_SG", // English (Singapore)
- "en_US", // English (United States)
- "en_ZA", // English (South Africa)
- "en_ZM", // English (Zambia)
- "en_ZW", // English (Zimbabwe)
- "eo", // Esperanto
- "es", // Spanish
- "es_AR", // Spanish (Argentina)
- "es_BO", // Spanish (Bolivia)
- "es_CL", // Spanish (Chile)
- "es_CO", // Spanish (Colombia)
- "es_CR", // Spanish (Costa Rica)
- "es_CU", // Spanish (Cuba)
- "es_DO", // Spanish (Dominican Republic)
- "es_EC", // Spanish (Ecuador)
- "es_ES", // Spanish (Spain)
- "es_GT", // Spanish (Guatemala)
- "es_HN", // Spanish (Honduras)
- "es_MX", // Spanish (Mexico)
- "es_NI", // Spanish (Nicaragua)
- "es_PA", // Spanish (Panama)
- "es_PE", // Spanish (Peru)
- "es_PR", // Spanish (Puerto Rico)
- "es_PY", // Spanish (Paraguay)
- "es_SV", // Spanish (El Salvador)
- "es_US", // Spanish (United States)
- "es_UY", // Spanish (Uruguay)
- "es_VE", // Spanish (Venezuela)
- "et", // Estonian
- "et_EE", // Estonian (Estonia)
- "eu", // Basque
- "eu_ES", // Basque (Spain)
- "fa", // Persian
- "fa_IR", // Persian (Iran)
- "ff_SN", // Fulah (Senegal)
- "fi", // Finnish
- "fi_FI", // Finnish (Finland)
- "fil", // Filipino
- "fil_PH", // Filipino (Philippines)
- "fo_FO", // Faroese (Faroe Islands)
- "fr", // French
- "fr_BE", // French (Belgium)
- "fr_CA", // French (Canada)
- "fr_CH", // French (Switzerland)
- "fr_FR", // French (France)
- "fr_LU", // French (Luxembourg)
- "fur_IT", // Friulian (Italy)
- "fy_DE", // Western Frisian (Germany)
- "fy_NL", // Western Frisian (Netherlands)
- "ga", // Irish
- "ga_IE", // Irish (Ireland)
- "gd_GB", // Scottish Gaelic (United Kingdom)
- "gez_ER", // Geez (Eritrea)
- "gez_ET", // Geez (Ethiopia)
- "gl", // Galician
- "gl_ES", // Galician (Spain)
- "gu_IN", // Gujarati (India)
- "gv_GB", // Manx (United Kingdom)
- "hak_TW", // Hakka Chinese (Taiwan)
- "ha_NG", // Hausa (Nigeria)
- "he", // Hebrew
- "he_IL", // Hebrew (Israel)
- "hi", // Hindi
- "hi_IN", // Hindi (India)
- "hne_IN", // Chhattisgarhi (India)
- "hr", // Croatian
- "hr_HR", // Croatian (Croatia)
- "hsb_DE", // Upper Sorbian (Germany)
- "ht_HT", // Haitian (Haiti)
- "hu", // Hungarian
- "hu_HU", // Hungarian (Hungary)
- "hus_MX", // Huastec (Mexico)
- "hy_AM", // Armenian (Armenia)
- "ia_FR", // Interlingua (France)
- "id", // Indonesian
- "id_ID", // Indonesian (Indonesia)
- "ig_NG", // Igbo (Nigeria)
- "ik_CA", // Inupiaq (Canada)
- "is", // Icelandic
- "is_IS", // Icelandic (Iceland)
- "it", // Italian
- "it_CH", // Italian (Switzerland)
- "it_IT", // Italian (Italy)
- "iu_CA", // Inuktitut (Canada)
- "ja", // Japanese
- "ja_JP", // Japanese (Japan)
- "kab_DZ", // Kabyle (Algeria)
- "ka", // Georgian
- "ka_GE", // Georgian (Georgia)
- "kk_KZ", // Kazakh (Kazakhstan)
- "kl_GL", // Kalaallisut (Greenland)
- "km", // Central Khmer
- "km_KH", // Central Khmer (Cambodia)
- "kn_IN", // Kannada (India)
- "kok_IN", // Konkani (India)
- "ko", // Korean
- "ko_KR", // Korean (South Korea)
- "ks_IN", // Kashmiri (India)
- "ku", // Kurdish
- "ku_TR", // Kurdish (Turkey)
- "kw_GB", // Cornish (United Kingdom)
- "ky_KG", // Kirghiz (Kyrgyzstan)
- "lb_LU", // Luxembourgish (Luxembourg)
- "lg_UG", // Ganda (Uganda)
- "li_BE", // Limburgan (Belgium)
- "li_NL", // Limburgan (Netherlands)
- "lij_IT", // Ligurian (Italy)
- "ln_CD", // Lingala (Congo)
- "lo_LA", // Lao (Laos)
- "lt", // Lithuanian
- "lt_LT", // Lithuanian (Lithuania)
- "lv", // Latvian
- "lv_LV", // Latvian (Latvia)
- "lzh_TW", // Literary Chinese (Taiwan)
- "mag_IN", // Magahi (India)
- "mai_IN", // Maithili (India)
- "mg_MG", // Malagasy (Madagascar)
- "mh_MH", // Marshallese (Marshall Islands)
- "mhr_RU", // Eastern Mari (Russia)
- "mi", // Māori
- "mi_NZ", // Māori (New Zealand)
- "miq_NI", // Mískito (Nicaragua)
- "mk", // Macedonian
- "mk_MK", // Macedonian (Macedonia)
- "ml", // Malayalam
- "ml_IN", // Malayalam (India)
- "mni_IN", // Manipuri (India)
- "mn_MN", // Mongolian (Mongolia)
- "mr", // Marathi
- "mr_IN", // Marathi (India)
- "ms", // Malay
- "ms_MY", // Malay (Malaysia)
- "mt", // Maltese
- "mt_MT", // Maltese (Malta)
- "my_MM", // Burmese (Myanmar)
- "myv_RU", // Erzya (Russia)
- "nah_MX", // Nahuatl languages (Mexico)
- "nan_TW", // Min Nan Chinese (Taiwan)
- "nb", // Norwegian Bokmål
- "nb_NO", // Norwegian Bokmål (Norway)
- "nds_DE", // Low German (Germany)
- "nds_NL", // Low German (Netherlands)
- "ne_NP", // Nepali (Nepal)
- "nhn_MX", // Central Nahuatl (Mexico)
- "niu_NU", // Niuean (Niue)
- "niu_NZ", // Niuean (New Zealand)
- "nl", // Dutch
- "nl_AW", // Dutch (Aruba)
- "nl_BE", // Dutch (Belgium)
- "nl_NL", // Dutch (Netherlands)
- "nn", // Norwegian Nynorsk
- "nn_NO", // Norwegian Nynorsk (Norway)
- "nr_ZA", // South Ndebele (South Africa)
- "nso_ZA", // Pedi (South Africa)
- "oc_FR", // Occitan (France)
- "om", // Oromo
- "om_ET", // Oromo (Ethiopia)
- "om_KE", // Oromo (Kenya)
- "or", // Oriya
- "or_IN", // Oriya (India)
- "os_RU", // Ossetian (Russia)
- "pa_IN", // Panjabi (India)
- "pap", // Papiamento
- "pap_AN", // Papiamento (Netherlands Antilles)
- "pap_AW", // Papiamento (Aruba)
- "pap_CW", // Papiamento (Curaçao)
- "pa_PK", // Panjabi (Pakistan)
- "pl", // Polish
- "pl_PL", // Polish (Poland)
- "pr", // Pirate
- "ps_AF", // Pushto (Afghanistan)
- "pt", // Portuguese
- "pt_BR", // Portuguese (Brazil)
- "pt_PT", // Portuguese (Portugal)
- "quy_PE", // Ayacucho Quechua (Peru)
- "quz_PE", // Cusco Quechua (Peru)
- "raj_IN", // Rajasthani (India)
- "ro", // Romanian
- "ro_RO", // Romanian (Romania)
- "ru", // Russian
- "ru_RU", // Russian (Russia)
- "ru_UA", // Russian (Ukraine)
- "rw_RW", // Kinyarwanda (Rwanda)
- "sa_IN", // Sanskrit (India)
- "sat_IN", // Santali (India)
- "sc_IT", // Sardinian (Italy)
- "sco", // Scots
- "sd_IN", // Sindhi (India)
- "se_NO", // Northern Sami (Norway)
- "sgs_LT", // Samogitian (Lithuania)
- "shs_CA", // Shuswap (Canada)
- "sid_ET", // Sidamo (Ethiopia)
- "si", // Sinhala
- "si_LK", // Sinhala (Sri Lanka)
- "sk", // Slovak
- "sk_SK", // Slovak (Slovakia)
- "sl", // Slovenian
- "sl_SI", // Slovenian (Slovenia)
- "so", // Somali
- "so_DJ", // Somali (Djibouti)
- "so_ET", // Somali (Ethiopia)
- "so_KE", // Somali (Kenya)
- "so_SO", // Somali (Somalia)
- "son_ML", // Songhai languages (Mali)
- "sq", // Albanian
- "sq_AL", // Albanian (Albania)
- "sq_KV", // Albanian (Kosovo)
- "sq_MK", // Albanian (Macedonia)
- "sr", // Serbian
- "sr_Cyrl", // Serbian (Cyrillic)
- "sr_Latn", // Serbian (Latin)
- "sr_ME", // Serbian (Montenegro)
- "sr_RS", // Serbian (Serbia)
- "ss_ZA", // Swati (South Africa)
- "st_ZA", // Southern Sotho (South Africa)
- "sv", // Swedish
- "sv_FI", // Swedish (Finland)
- "sv_SE", // Swedish (Sweden)
- "sw_KE", // Swahili (Kenya)
- "sw_TZ", // Swahili (Tanzania)
- "szl_PL", // Silesian (Poland)
- "ta", // Tamil
- "ta_IN", // Tamil (India)
- "ta_LK", // Tamil (Sri Lanka)
- "tcy_IN", // Tulu (India)
- "te", // Telugu
- "te_IN", // Telugu (India)
- "tg_TJ", // Tajik (Tajikistan)
- "the_NP", // Chitwania Tharu (Nepal)
- "th", // Thai
- "th_TH", // Thai (Thailand)
- "ti", // Tigrinya
- "ti_ER", // Tigrinya (Eritrea)
- "ti_ET", // Tigrinya (Ethiopia)
- "tig_ER", // Tigre (Eritrea)
- "tk_TM", // Turkmen (Turkmenistan)
- "tl_PH", // Tagalog (Philippines)
- "tn_ZA", // Tswana (South Africa)
- "tr", // Turkish
- "tr_CY", // Turkish (Cyprus)
- "tr_TR", // Turkish (Turkey)
- "ts_ZA", // Tsonga (South Africa)
- "tt", // Tatar
- "tt_RU", // Tatar (Russia)
- "tzm", // Central Atlas Tamazight
- "tzm_MA", // Central Atlas Tamazight (Marrocos)
- "ug_CN", // Uighur (China)
- "uk", // Ukrainian
- "uk_UA", // Ukrainian (Ukraine)
- "unm_US", // Unami (United States)
- "ur", // Urdu
- "ur_IN", // Urdu (India)
- "ur_PK", // Urdu (Pakistan)
- "uz", // Uzbek
- "uz_UZ", // Uzbek (Uzbekistan)
- "ve_ZA", // Venda (South Africa)
- "vi", // Vietnamese
- "vi_VN", // Vietnamese (Vietnam)
- "wa_BE", // Walloon (Belgium)
- "wae_CH", // Walser (Switzerland)
- "wal_ET", // Wolaytta (Ethiopia)
- "wo_SN", // Wolof (Senegal)
- "xh_ZA", // Xhosa (South Africa)
- "yi_US", // Yiddish (United States)
- "yo_NG", // Yoruba (Nigeria)
- "yue_HK", // Yue Chinese (Hong Kong)
- "zh", // Chinese
- "zh_CN", // Chinese (China)
- "zh_HK", // Chinese (Hong Kong)
- "zh_SG", // Chinese (Singapore)
- "zh_TW", // Chinese (Taiwan)
- "zu_ZA", // Zulu (South Africa)
- nullptr
-};
-
-static const char *locale_names[] = {
- "Afar",
- "Afar (Djibouti)",
- "Afar (Eritrea)",
- "Afar (Ethiopia)",
- "Afrikaans",
- "Afrikaans (South Africa)",
- "Aguaruna (Peru)",
- "Akan (Ghana)",
- "Amharic (Ethiopia)",
- "Aragonese (Spain)",
- "Angika (India)",
- "Arabic",
- "Arabic (United Arab Emirates)",
- "Arabic (Bahrain)",
- "Arabic (Algeria)",
- "Arabic (Egypt)",
- "Arabic (India)",
- "Arabic (Iraq)",
- "Arabic (Jordan)",
- "Arabic (Kuwait)",
- "Arabic (Lebanon)",
- "Arabic (Libya)",
- "Arabic (Morocco)",
- "Arabic (Oman)",
- "Arabic (Qatar)",
- "Arabic (Saudi Arabia)",
- "Arabic (Sudan)",
- "Arabic (South Soudan)",
- "Arabic (Syria)",
- "Arabic (Tunisia)",
- "Arabic (Yemen)",
- "Assamese (India)",
- "Asturian (Spain)",
- "Southern Aymara (Peru)",
- "Aymara (Peru)",
- "Azerbaijani",
- "Azerbaijani (Azerbaijan)",
- "Belarusian",
- "Belarusian (Belarus)",
- "Bemba (Zambia)",
- "Berber languages (Algeria)",
- "Berber languages (Morocco)",
- "Bulgarian",
- "Bulgarian (Bulgaria)",
- "Bhili (India)",
- "Bhojpuri (India)",
- "Bislama (Tuvalu)",
- "Bengali",
- "Bengali (Bangladesh)",
- "Bengali (India)",
- "Tibetan",
- "Tibetan (China)",
- "Tibetan (India)",
- "Breton",
- "Breton (France)",
- "Bodo (India)",
- "Bosnian (Bosnia and Herzegovina)",
- "Bilin (Eritrea)",
- "Catalan",
- "Catalan (Andorra)",
- "Catalan (Spain)",
- "Catalan (France)",
- "Catalan (Italy)",
- "Chechen (Russia)",
- "Cherokee (United States)",
- "Mandarin Chinese (Taiwan)",
- "Crimean Tatar (Ukraine)",
- "Kashubian (Poland)",
- "Czech",
- "Czech (Czech Republic)",
- "Chuvash (Russia)",
- "Welsh (United Kingdom)",
- "Danish",
- "Danish (Denmark)",
- "German",
- "German (Austria)",
- "German (Belgium)",
- "German (Switzerland)",
- "German (Germany)",
- "German (Italy)",
- "German (Luxembourg)",
- "Dogri (India)",
- "Dhivehi (Maldives)",
- "Dzongkha (Bhutan)",
- "Greek",
- "Greek (Cyprus)",
- "Greek (Greece)",
- "English",
- "English (Antigua and Barbuda)",
- "English (Australia)",
- "English (Botswana)",
- "English (Canada)",
- "English (Denmark)",
- "English (United Kingdom)",
- "English (Hong Kong)",
- "English (Ireland)",
- "English (Israel)",
- "English (India)",
- "English (Nigeria)",
- "English (New Zealand)",
- "English (Philippines)",
- "English (Singapore)",
- "English (United States)",
- "English (South Africa)",
- "English (Zambia)",
- "English (Zimbabwe)",
- "Esperanto",
- "Spanish",
- "Spanish (Argentina)",
- "Spanish (Bolivia)",
- "Spanish (Chile)",
- "Spanish (Colombia)",
- "Spanish (Costa Rica)",
- "Spanish (Cuba)",
- "Spanish (Dominican Republic)",
- "Spanish (Ecuador)",
- "Spanish (Spain)",
- "Spanish (Guatemala)",
- "Spanish (Honduras)",
- "Spanish (Mexico)",
- "Spanish (Nicaragua)",
- "Spanish (Panama)",
- "Spanish (Peru)",
- "Spanish (Puerto Rico)",
- "Spanish (Paraguay)",
- "Spanish (El Salvador)",
- "Spanish (United States)",
- "Spanish (Uruguay)",
- "Spanish (Venezuela)",
- "Estonian",
- "Estonian (Estonia)",
- "Basque",
- "Basque (Spain)",
- "Persian",
- "Persian (Iran)",
- "Fulah (Senegal)",
- "Finnish",
- "Finnish (Finland)",
- "Filipino",
- "Filipino (Philippines)",
- "Faroese (Faroe Islands)",
- "French",
- "French (Belgium)",
- "French (Canada)",
- "French (Switzerland)",
- "French (France)",
- "French (Luxembourg)",
- "Friulian (Italy)",
- "Western Frisian (Germany)",
- "Western Frisian (Netherlands)",
- "Irish",
- "Irish (Ireland)",
- "Scottish Gaelic (United Kingdom)",
- "Geez (Eritrea)",
- "Geez (Ethiopia)",
- "Galician",
- "Galician (Spain)",
- "Gujarati (India)",
- "Manx (United Kingdom)",
- "Hakka Chinese (Taiwan)",
- "Hausa (Nigeria)",
- "Hebrew",
- "Hebrew (Israel)",
- "Hindi",
- "Hindi (India)",
- "Chhattisgarhi (India)",
- "Croatian",
- "Croatian (Croatia)",
- "Upper Sorbian (Germany)",
- "Haitian (Haiti)",
- "Hungarian",
- "Hungarian (Hungary)",
- "Huastec (Mexico)",
- "Armenian (Armenia)",
- "Interlingua (France)",
- "Indonesian",
- "Indonesian (Indonesia)",
- "Igbo (Nigeria)",
- "Inupiaq (Canada)",
- "Icelandic",
- "Icelandic (Iceland)",
- "Italian",
- "Italian (Switzerland)",
- "Italian (Italy)",
- "Inuktitut (Canada)",
- "Japanese",
- "Japanese (Japan)",
- "Kabyle (Algeria)",
- "Georgian",
- "Georgian (Georgia)",
- "Kazakh (Kazakhstan)",
- "Kalaallisut (Greenland)",
- "Central Khmer",
- "Central Khmer (Cambodia)",
- "Kannada (India)",
- "Konkani (India)",
- "Korean",
- "Korean (South Korea)",
- "Kashmiri (India)",
- "Kurdish",
- "Kurdish (Turkey)",
- "Cornish (United Kingdom)",
- "Kirghiz (Kyrgyzstan)",
- "Luxembourgish (Luxembourg)",
- "Ganda (Uganda)",
- "Limburgan (Belgium)",
- "Limburgan (Netherlands)",
- "Ligurian (Italy)",
- "Lingala (Congo)",
- "Lao (Laos)",
- "Lithuanian",
- "Lithuanian (Lithuania)",
- "Latvian",
- "Latvian (Latvia)",
- "Literary Chinese (Taiwan)",
- "Magahi (India)",
- "Maithili (India)",
- "Malagasy (Madagascar)",
- "Marshallese (Marshall Islands)",
- "Eastern Mari (Russia)",
- "Māori",
- "Māori (New Zealand)",
- "Mískito (Nicaragua)",
- "Macedonian",
- "Macedonian (Macedonia)",
- "Malayalam",
- "Malayalam (India)",
- "Manipuri (India)",
- "Mongolian (Mongolia)",
- "Marathi",
- "Marathi (India)",
- "Malay",
- "Malay (Malaysia)",
- "Maltese",
- "Maltese (Malta)",
- "Burmese (Myanmar)",
- "Erzya (Russia)",
- "Nahuatl languages (Mexico)",
- "Min Nan Chinese (Taiwan)",
- "Norwegian Bokmål",
- "Norwegian Bokmål (Norway)",
- "Low German (Germany)",
- "Low German (Netherlands)",
- "Nepali (Nepal)",
- "Central Nahuatl (Mexico)",
- "Niuean (Niue)",
- "Niuean (New Zealand)",
- "Dutch",
- "Dutch (Aruba)",
- "Dutch (Belgium)",
- "Dutch (Netherlands)",
- "Norwegian Nynorsk",
- "Norwegian Nynorsk (Norway)",
- "South Ndebele (South Africa)",
- "Pedi (South Africa)",
- "Occitan (France)",
- "Oromo",
- "Oromo (Ethiopia)",
- "Oromo (Kenya)",
- "Oriya",
- "Oriya (India)",
- "Ossetian (Russia)",
- "Panjabi (India)",
- "Papiamento",
- "Papiamento (Netherlands Antilles)",
- "Papiamento (Aruba)",
- "Papiamento (Curaçao)",
- "Panjabi (Pakistan)",
- "Polish",
- "Polish (Poland)",
- "Pirate",
- "Pushto (Afghanistan)",
- "Portuguese",
- "Portuguese (Brazil)",
- "Portuguese (Portugal)",
- "Ayacucho Quechua (Peru)",
- "Cusco Quechua (Peru)",
- "Rajasthani (India)",
- "Romanian",
- "Romanian (Romania)",
- "Russian",
- "Russian (Russia)",
- "Russian (Ukraine)",
- "Kinyarwanda (Rwanda)",
- "Sanskrit (India)",
- "Santali (India)",
- "Sardinian (Italy)",
- "Scots (Scotland)",
- "Sindhi (India)",
- "Northern Sami (Norway)",
- "Samogitian (Lithuania)",
- "Shuswap (Canada)",
- "Sidamo (Ethiopia)",
- "Sinhala",
- "Sinhala (Sri Lanka)",
- "Slovak",
- "Slovak (Slovakia)",
- "Slovenian",
- "Slovenian (Slovenia)",
- "Somali",
- "Somali (Djibouti)",
- "Somali (Ethiopia)",
- "Somali (Kenya)",
- "Somali (Somalia)",
- "Songhai languages (Mali)",
- "Albanian",
- "Albanian (Albania)",
- "Albanian (Kosovo)",
- "Albanian (Macedonia)",
- "Serbian",
- "Serbian (Cyrillic)",
- "Serbian (Latin)",
- "Serbian (Montenegro)",
- "Serbian (Serbia)",
- "Swati (South Africa)",
- "Southern Sotho (South Africa)",
- "Swedish",
- "Swedish (Finland)",
- "Swedish (Sweden)",
- "Swahili (Kenya)",
- "Swahili (Tanzania)",
- "Silesian (Poland)",
- "Tamil",
- "Tamil (India)",
- "Tamil (Sri Lanka)",
- "Tulu (India)",
- "Telugu",
- "Telugu (India)",
- "Tajik (Tajikistan)",
- "Chitwania Tharu (Nepal)",
- "Thai",
- "Thai (Thailand)",
- "Tigrinya",
- "Tigrinya (Eritrea)",
- "Tigrinya (Ethiopia)",
- "Tigre (Eritrea)",
- "Turkmen (Turkmenistan)",
- "Tagalog (Philippines)",
- "Tswana (South Africa)",
- "Turkish",
- "Turkish (Cyprus)",
- "Turkish (Turkey)",
- "Tsonga (South Africa)",
- "Tatar",
- "Tatar (Russia)",
- "Central Atlas Tamazight",
- "Central Atlas Tamazight (Marrocos)",
- "Uighur (China)",
- "Ukrainian",
- "Ukrainian (Ukraine)",
- "Unami (United States)",
- "Urdu",
- "Urdu (India)",
- "Urdu (Pakistan)",
- "Uzbek",
- "Uzbek (Uzbekistan)",
- "Venda (South Africa)",
- "Vietnamese",
- "Vietnamese (Vietnam)",
- "Walloon (Belgium)",
- "Walser (Switzerland)",
- "Wolaytta (Ethiopia)",
- "Wolof (Senegal)",
- "Xhosa (South Africa)",
- "Yiddish (United States)",
- "Yoruba (Nigeria)",
- "Yue Chinese (Hong Kong)",
- "Chinese",
- "Chinese (China)",
- "Chinese (Hong Kong)",
- "Chinese (Singapore)",
- "Chinese (Taiwan)",
- "Zulu (South Africa)",
- nullptr
-};
-
-// Windows has some weird locale identifiers which do not honor the ISO 639-1
-// standardized nomenclature. Whenever those don't conflict with existing ISO
-// identifiers, we override them.
-//
-// Reference:
-// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx
-
-static const char *locale_renames[][2] = {
- { "in", "id" }, // Indonesian
- { "iw", "he" }, // Hebrew
- { "no", "nb" }, // Norwegian Bokmål
- { "C", "en" }, // "C" is the simple/default/untranslated Computer locale.
- // ASCII-only, English, no currency symbols. Godot treats this as "en".
- // See https://unix.stackexchange.com/a/87763/164141 "The C locale is"...
- { nullptr, nullptr }
-};
-
-///////////////////////////////////////////////
-
Dictionary Translation::_get_messages() const {
Dictionary d;
for (const KeyValue<StringName, StringName> &E : translation_map) {
@@ -849,17 +68,7 @@ void Translation::_set_messages(const Dictionary &p_messages) {
}
void Translation::set_locale(const String &p_locale) {
- String univ_locale = TranslationServer::standardize_locale(p_locale);
-
- if (!TranslationServer::is_locale_valid(univ_locale)) {
- String trimmed_locale = TranslationServer::get_language_code(univ_locale);
-
- ERR_FAIL_COND_MSG(!TranslationServer::is_locale_valid(trimmed_locale), "Invalid locale: " + trimmed_locale + ".");
-
- locale = trimmed_locale;
- } else {
- locale = univ_locale;
- }
+ locale = TranslationServer::get_singleton()->standardize_locale(p_locale);
if (OS::get_singleton()->get_main_loop() && TranslationServer::get_singleton()->get_loaded_locales().has(this)) {
OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
@@ -886,12 +95,12 @@ StringName Translation::get_message(const StringName &p_src_text, const StringNa
WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class");
}
- const Map<StringName, StringName>::Element *E = translation_map.find(p_src_text);
+ HashMap<StringName, StringName>::ConstIterator E = translation_map.find(p_src_text);
if (!E) {
return StringName();
}
- return E->get();
+ return E->value;
}
StringName Translation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const {
@@ -932,13 +141,13 @@ void Translation::_bind_methods() {
ClassDB::bind_method(D_METHOD("erase_message", "src_message", "context"), &Translation::erase_message, DEFVAL(""));
ClassDB::bind_method(D_METHOD("get_message_list"), &Translation::_get_message_list);
ClassDB::bind_method(D_METHOD("get_message_count"), &Translation::get_message_count);
- ClassDB::bind_method(D_METHOD("_set_messages"), &Translation::_set_messages);
+ ClassDB::bind_method(D_METHOD("_set_messages", "messages"), &Translation::_set_messages);
ClassDB::bind_method(D_METHOD("_get_messages"), &Translation::_get_messages);
GDVIRTUAL_BIND(_get_plural_message, "src_message", "src_plural_message", "n", "context");
GDVIRTUAL_BIND(_get_message, "src_message", "context");
- ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages");
+ ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages");
ADD_PROPERTY(PropertyInfo(Variant::STRING, "locale"), "set_locale", "get_locale");
}
@@ -1004,121 +213,306 @@ static _character_accent_pair _character_to_accented[] = {
{ 'z', U"ź" },
};
-bool TranslationServer::is_locale_valid(const String &p_locale) {
- const char **ptr = locale_list;
+Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info;
- while (*ptr) {
- if (*ptr == p_locale) {
- return true;
+HashMap<String, String> TranslationServer::language_map;
+HashMap<String, String> TranslationServer::script_map;
+HashMap<String, String> TranslationServer::locale_rename_map;
+HashMap<String, String> TranslationServer::country_name_map;
+HashMap<String, String> TranslationServer::variant_map;
+HashMap<String, String> TranslationServer::country_rename_map;
+
+void TranslationServer::init_locale_info() {
+ // Init locale info.
+ language_map.clear();
+ int idx = 0;
+ while (language_list[idx][0] != nullptr) {
+ language_map[language_list[idx][0]] = String::utf8(language_list[idx][1]);
+ idx++;
+ }
+
+ // Init locale-script map.
+ locale_script_info.clear();
+ idx = 0;
+ while (locale_scripts[idx][0] != nullptr) {
+ LocaleScriptInfo info;
+ info.name = locale_scripts[idx][0];
+ info.script = locale_scripts[idx][1];
+ info.default_country = locale_scripts[idx][2];
+ Vector<String> supported_countries = String(locale_scripts[idx][3]).split(",", false);
+ for (int i = 0; i < supported_countries.size(); i++) {
+ info.supported_countries.insert(supported_countries[i]);
}
- ptr++;
+ locale_script_info.push_back(info);
+ idx++;
}
- return false;
-}
+ // Init supported script list.
+ script_map.clear();
+ idx = 0;
+ while (script_list[idx][0] != nullptr) {
+ script_map[script_list[idx][1]] = String::utf8(script_list[idx][0]);
+ idx++;
+ }
-String TranslationServer::standardize_locale(const String &p_locale) {
- // Replaces '-' with '_' for macOS Sierra-style locales
- String univ_locale = p_locale.replace("-", "_");
+ // Init regional variant map.
+ variant_map.clear();
+ idx = 0;
+ while (locale_variants[idx][0] != nullptr) {
+ variant_map[locale_variants[idx][0]] = locale_variants[idx][1];
+ idx++;
+ }
- // Handles known non-ISO locale names used e.g. on Windows
- int idx = 0;
+ // Init locale renames.
+ locale_rename_map.clear();
+ idx = 0;
while (locale_renames[idx][0] != nullptr) {
- if (locale_renames[idx][0] == univ_locale) {
- univ_locale = locale_renames[idx][1];
- break;
+ if (!String(locale_renames[idx][1]).is_empty()) {
+ locale_rename_map[locale_renames[idx][0]] = locale_renames[idx][1];
}
idx++;
}
- return univ_locale;
+ // Init country names.
+ country_name_map.clear();
+ idx = 0;
+ while (country_names[idx][0] != nullptr) {
+ country_name_map[String(country_names[idx][0])] = String::utf8(country_names[idx][1]);
+ idx++;
+ }
+
+ // Init country renames.
+ country_rename_map.clear();
+ idx = 0;
+ while (country_renames[idx][0] != nullptr) {
+ if (!String(country_renames[idx][1]).is_empty()) {
+ country_rename_map[country_renames[idx][0]] = country_renames[idx][1];
+ }
+ idx++;
+ }
}
-String TranslationServer::get_language_code(const String &p_locale) {
- ERR_FAIL_COND_V_MSG(p_locale.length() < 2, p_locale, "Invalid locale '" + p_locale + "'.");
- // Most language codes are two letters, but some are three,
- // so we have to look for a regional code separator ('_' or '-')
- // to extract the left part.
- // For example we get 'nah_MX' as input and should return 'nah'.
- int split = p_locale.find("_");
- if (split == -1) {
- split = p_locale.find("-");
+String TranslationServer::standardize_locale(const String &p_locale) const {
+ // Replaces '-' with '_' for macOS style locales.
+ String univ_locale = p_locale.replace("-", "_");
+
+ // Extract locale elements.
+ String lang, script, country, variant;
+ Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
+ lang = locale_elements[0];
+ if (locale_elements.size() >= 2) {
+ if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
+ script = locale_elements[1];
+ }
+ if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
+ country = locale_elements[1];
+ }
}
- if (split == -1) { // No separator, so the locale is already only a language code.
- return p_locale;
+ if (locale_elements.size() >= 3) {
+ if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
+ country = locale_elements[2];
+ } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) {
+ variant = locale_elements[2].to_lower();
+ }
+ }
+ if (locale_elements.size() >= 4) {
+ if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang) {
+ variant = locale_elements[3].to_lower();
+ }
}
- return p_locale.left(split);
-}
-void TranslationServer::set_locale(const String &p_locale) {
- String univ_locale = standardize_locale(p_locale);
+ // Try extract script and variant from the extra part.
+ Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";");
+ for (int i = 0; i < script_extra.size(); i++) {
+ if (script_extra[i].to_lower() == "cyrillic") {
+ script = "Cyrl";
+ break;
+ } else if (script_extra[i].to_lower() == "latin") {
+ script = "Latn";
+ break;
+ } else if (script_extra[i].to_lower() == "devanagari") {
+ script = "Deva";
+ break;
+ } else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang) {
+ variant = script_extra[i].to_lower();
+ }
+ }
- if (!is_locale_valid(univ_locale)) {
- String trimmed_locale = get_language_code(univ_locale);
- print_verbose(vformat("Unsupported locale '%s', falling back to '%s'.", p_locale, trimmed_locale));
+ // Handles known non-ISO language names used e.g. on Windows.
+ if (locale_rename_map.has(lang)) {
+ lang = locale_rename_map[lang];
+ }
- if (!is_locale_valid(trimmed_locale)) {
- ERR_PRINT(vformat("Unsupported locale '%s', falling back to 'en'.", trimmed_locale));
- locale = "en";
- } else {
- locale = trimmed_locale;
+ // Handle country renames.
+ if (country_rename_map.has(country)) {
+ country = country_rename_map[country];
+ }
+
+ // Remove unsupported script codes.
+ if (!script_map.has(script)) {
+ script = "";
+ }
+
+ // Add script code base on language and country codes for some ambiguous cases.
+ if (script.is_empty()) {
+ for (int i = 0; i < locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = locale_script_info[i];
+ if (info.name == lang) {
+ if (country.is_empty() || info.supported_countries.has(country)) {
+ script = info.script;
+ break;
+ }
+ }
+ }
+ }
+ if (!script.is_empty() && country.is_empty()) {
+ // Add conntry code based on script for some ambiguous cases.
+ for (int i = 0; i < locale_script_info.size(); i++) {
+ const LocaleScriptInfo &info = locale_script_info[i];
+ if (info.name == lang && info.script == script) {
+ country = info.default_country;
+ break;
+ }
+ }
+ }
+
+ // Combine results.
+ String locale = lang;
+ if (!script.is_empty()) {
+ locale = locale + "_" + script;
+ }
+ if (!country.is_empty()) {
+ locale = locale + "_" + country;
+ }
+ if (!variant.is_empty()) {
+ locale = locale + "_" + variant;
+ }
+ return locale;
+}
+
+int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const {
+ String locale_a = standardize_locale(p_locale_a);
+ String locale_b = standardize_locale(p_locale_b);
+
+ if (locale_a == locale_b) {
+ // Exact match.
+ return 10;
+ }
+
+ Vector<String> locale_a_elements = locale_a.split("_");
+ Vector<String> locale_b_elements = locale_b.split("_");
+ if (locale_a_elements[0] == locale_b_elements[0]) {
+ // Matching language, both locales have extra parts.
+ // Return number of matching elements.
+ int matching_elements = 1;
+ for (int i = 1; i < locale_a_elements.size(); i++) {
+ for (int j = 1; j < locale_b_elements.size(); j++) {
+ if (locale_a_elements[i] == locale_b_elements[j]) {
+ matching_elements++;
+ }
+ }
}
+ return matching_elements;
} else {
- locale = univ_locale;
+ // No match.
+ return 0;
+ }
+}
+
+String TranslationServer::get_locale_name(const String &p_locale) const {
+ String locale = standardize_locale(p_locale);
+
+ String lang, script, country;
+ Vector<String> locale_elements = locale.split("_");
+ lang = locale_elements[0];
+ if (locale_elements.size() >= 2) {
+ if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
+ script = locale_elements[1];
+ }
+ if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
+ country = locale_elements[1];
+ }
+ }
+ if (locale_elements.size() >= 3) {
+ if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
+ country = locale_elements[2];
+ }
}
- if (OS::get_singleton()->get_main_loop()) {
- OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
+ String name = language_map[lang];
+ if (!script.is_empty()) {
+ name = name + " (" + script_map[script] + ")";
+ }
+ if (!country.is_empty()) {
+ name = name + ", " + country_name_map[country];
}
+ return name;
+}
- ResourceLoader::reload_translation_remaps();
+Vector<String> TranslationServer::get_all_languages() const {
+ Vector<String> languages;
+
+ for (const KeyValue<String, String> &E : language_map) {
+ languages.push_back(E.key);
+ }
+
+ return languages;
}
-String TranslationServer::get_locale() const {
- return locale;
+String TranslationServer::get_language_name(const String &p_language) const {
+ return language_map[p_language];
}
-String TranslationServer::get_locale_name(const String &p_locale) const {
- if (!locale_name_map.has(p_locale)) {
- return String();
+Vector<String> TranslationServer::get_all_scripts() const {
+ Vector<String> scripts;
+
+ for (const KeyValue<String, String> &E : script_map) {
+ scripts.push_back(E.key);
}
- return locale_name_map[p_locale];
+
+ return scripts;
}
-Array TranslationServer::get_loaded_locales() const {
- Array locales;
- for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
- const Ref<Translation> &t = E->get();
- ERR_FAIL_COND_V(t.is_null(), Array());
- String l = t->get_locale();
+String TranslationServer::get_script_name(const String &p_script) const {
+ return script_map[p_script];
+}
- locales.push_back(l);
+Vector<String> TranslationServer::get_all_countries() const {
+ Vector<String> countries;
+
+ for (const KeyValue<String, String> &E : country_name_map) {
+ countries.push_back(E.key);
}
- return locales;
+ return countries;
}
-Vector<String> TranslationServer::get_all_locales() {
- Vector<String> locales;
+String TranslationServer::get_country_name(const String &p_country) const {
+ return country_name_map[p_country];
+}
- const char **ptr = locale_list;
+void TranslationServer::set_locale(const String &p_locale) {
+ locale = standardize_locale(p_locale);
- while (*ptr) {
- locales.push_back(*ptr);
- ptr++;
+ if (OS::get_singleton()->get_main_loop()) {
+ OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED);
}
- return locales;
+ ResourceLoader::reload_translation_remaps();
}
-Vector<String> TranslationServer::get_all_locale_names() {
- Vector<String> locales;
+String TranslationServer::get_locale() const {
+ return locale;
+}
- const char **ptr = locale_names;
+Array TranslationServer::get_loaded_locales() const {
+ Array locales;
+ for (const Ref<Translation> &E : translations) {
+ const Ref<Translation> &t = E;
+ ERR_FAIL_COND_V(t.is_null(), Array());
+ String l = t->get_locale();
- while (*ptr) {
- locales.push_back(String::utf8(*ptr));
- ptr++;
+ locales.push_back(l);
}
return locales;
@@ -1134,23 +528,20 @@ void TranslationServer::remove_translation(const Ref<Translation> &p_translation
Ref<Translation> TranslationServer::get_translation_object(const String &p_locale) {
Ref<Translation> res;
- String lang = get_language_code(p_locale);
- bool near_match_found = false;
+ int best_score = 0;
- for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
- const Ref<Translation> &t = E->get();
+ for (const Ref<Translation> &E : translations) {
+ const Ref<Translation> &t = E;
ERR_FAIL_COND_V(t.is_null(), nullptr);
String l = t->get_locale();
- // Exact match.
- if (l == p_locale) {
- return t;
- }
-
- // If near match found, keep that match, but keep looking to try to look for perfect match.
- if (get_language_code(l) == lang && !near_match_found) {
+ int score = compare_locales(p_locale, l);
+ if (score > 0 && score >= best_score) {
res = t;
- near_match_found = true;
+ best_score = score;
+ if (score == 10) {
+ break; // Exact match, skip the rest.
+ }
}
}
return res;
@@ -1167,8 +558,6 @@ StringName TranslationServer::translate(const StringName &p_message, const Strin
return p_message;
}
- ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid.");
-
StringName res = _get_message_from_translations(p_message, p_context, locale, false);
if (!res && fallback.length() >= 2) {
@@ -1190,8 +579,6 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons
return p_message_plural;
}
- ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid.");
-
StringName res = _get_message_from_translations(p_message, p_context, locale, true, p_message_plural, p_n);
if (!res && fallback.length() >= 2) {
@@ -1209,51 +596,30 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons
}
StringName TranslationServer::_get_message_from_translations(const StringName &p_message, const StringName &p_context, const String &p_locale, bool plural, const String &p_message_plural, int p_n) const {
- // Locale can be of the form 'll_CC', i.e. language code and regional code,
- // e.g. 'en_US', 'en_GB', etc. It might also be simply 'll', e.g. 'en'.
- // To find the relevant translation, we look for those with locale starting
- // with the language code, and then if any is an exact match for the long
- // form. If not found, we fall back to a near match (another locale with
- // same language code).
-
- // Note: ResourceLoader::_path_remap reproduces this locale near matching
- // logic, so be sure to propagate changes there when changing things here.
-
StringName res;
- String lang = get_language_code(p_locale);
- bool near_match = false;
+ int best_score = 0;
- for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) {
- const Ref<Translation> &t = E->get();
+ for (const Ref<Translation> &E : translations) {
+ const Ref<Translation> &t = E;
ERR_FAIL_COND_V(t.is_null(), p_message);
String l = t->get_locale();
- bool exact_match = (l == p_locale);
- if (!exact_match) {
- if (near_match) {
- continue; // Only near-match once, but keep looking for exact matches.
+ int score = compare_locales(p_locale, l);
+ if (score > 0 && score >= best_score) {
+ StringName r;
+ if (!plural) {
+ r = t->get_message(p_message, p_context);
+ } else {
+ r = t->get_plural_message(p_message, p_message_plural, p_n, p_context);
}
- if (get_language_code(l) != lang) {
- continue; // Language code does not match.
+ if (!r) {
+ continue;
+ }
+ res = r;
+ best_score = score;
+ if (score == 10) {
+ break; // Exact match, skip the rest.
}
- }
-
- StringName r;
- if (!plural) {
- r = t->get_message(p_message, p_context);
- } else {
- r = t->get_plural_message(p_message, p_message_plural, p_n, p_context);
- }
-
- if (!r) {
- continue;
- }
- res = r;
-
- if (exact_match) {
- break;
- } else {
- near_match = true;
}
}
@@ -1287,7 +653,7 @@ bool TranslationServer::_load_translations(const String &p_from) {
void TranslationServer::setup() {
String test = GLOBAL_DEF("internationalization/locale/test", "");
test = test.strip_edges();
- if (test != "") {
+ if (!test.is_empty()) {
set_locale(test);
} else {
set_locale(OS::get_singleton()->get_locale());
@@ -1305,18 +671,7 @@ void TranslationServer::setup() {
pseudolocalization_skip_placeholders_enabled = GLOBAL_DEF("internationalization/pseudolocalization/skip_placeholders", true);
#ifdef TOOLS_ENABLED
- {
- String options = "";
- int idx = 0;
- while (locale_list[idx]) {
- if (idx > 0) {
- options += ",";
- }
- options += locale_list[idx];
- idx++;
- }
- ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_ENUM, options));
- }
+ ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_LOCALE_ID, ""));
#endif
}
@@ -1330,8 +685,12 @@ Ref<Translation> TranslationServer::get_tool_translation() const {
String TranslationServer::get_tool_locale() {
#ifdef TOOLS_ENABLED
- if (TranslationServer::get_singleton()->get_tool_translation().is_valid() && (Engine::get_singleton()->is_editor_hint() || Main::is_project_manager())) {
- return tool_translation->get_locale();
+ if (Engine::get_singleton()->is_editor_hint() || Engine::get_singleton()->is_project_manager_hint()) {
+ if (TranslationServer::get_singleton()->get_tool_translation().is_valid()) {
+ return tool_translation->get_locale();
+ } else {
+ return "en";
+ }
} else {
#else
{
@@ -1532,7 +891,7 @@ String TranslationServer::wrap_with_fakebidi_characters(String &p_message) const
return res;
}
-String TranslationServer::add_padding(String &p_message, int p_length) const {
+String TranslationServer::add_padding(const String &p_message, int p_length) const {
String res;
String prefix = pseudolocalization_prefix;
String suffix;
@@ -1548,7 +907,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const {
}
const char32_t *TranslationServer::get_accented_version(char32_t p_character) const {
- if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) {
+ if (!is_ascii_char(p_character)) {
return nullptr;
}
@@ -1562,14 +921,27 @@ const char32_t *TranslationServer::get_accented_version(char32_t p_character) co
}
bool TranslationServer::is_placeholder(String &p_message, int p_index) const {
- return p_message[p_index] == '%' && p_index < p_message.size() - 1 &&
- (p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' ||
- p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f');
+ return p_index < p_message.size() - 1 && p_message[p_index] == '%' &&
+ (p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' ||
+ p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f');
}
void TranslationServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_locale", "locale"), &TranslationServer::set_locale);
ClassDB::bind_method(D_METHOD("get_locale"), &TranslationServer::get_locale);
+ ClassDB::bind_method(D_METHOD("get_tool_locale"), &TranslationServer::get_tool_locale);
+
+ ClassDB::bind_method(D_METHOD("compare_locales", "locale_a", "locale_b"), &TranslationServer::compare_locales);
+ ClassDB::bind_method(D_METHOD("standardize_locale", "locale"), &TranslationServer::standardize_locale);
+
+ ClassDB::bind_method(D_METHOD("get_all_languages"), &TranslationServer::get_all_languages);
+ ClassDB::bind_method(D_METHOD("get_language_name", "language"), &TranslationServer::get_language_name);
+
+ ClassDB::bind_method(D_METHOD("get_all_scripts"), &TranslationServer::get_all_scripts);
+ ClassDB::bind_method(D_METHOD("get_script_name", "script"), &TranslationServer::get_script_name);
+
+ ClassDB::bind_method(D_METHOD("get_all_countries"), &TranslationServer::get_all_countries);
+ ClassDB::bind_method(D_METHOD("get_country_name", "country"), &TranslationServer::get_country_name);
ClassDB::bind_method(D_METHOD("get_locale_name", "locale"), &TranslationServer::get_locale_name);
@@ -1603,8 +975,5 @@ void TranslationServer::load_translations() {
TranslationServer::TranslationServer() {
singleton = this;
-
- for (int i = 0; locale_list[i]; ++i) {
- locale_name_map.insert(locale_list[i], String::utf8(locale_names[i]));
- }
+ init_locale_info();
}
diff --git a/core/string/translation.h b/core/string/translation.h
index 6aec0bb8ea..20c6ebd5a5 100644
--- a/core/string/translation.h
+++ b/core/string/translation.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -41,7 +41,7 @@ class Translation : public Resource {
RES_BASE_EXTENSION("translation");
String locale = "en";
- Map<StringName, StringName> translation_map;
+ HashMap<StringName, StringName> translation_map;
virtual Vector<String> _get_message_list() const;
virtual Dictionary _get_messages() const;
@@ -74,12 +74,10 @@ class TranslationServer : public Object {
String locale = "en";
String fallback;
- Set<Ref<Translation>> translations;
+ HashSet<Ref<Translation>> translations;
Ref<Translation> tool_translation;
Ref<Translation> doc_translation;
- Map<String, String> locale_name_map;
-
bool enabled = true;
bool pseudolocalization_enabled = false;
@@ -98,7 +96,7 @@ class TranslationServer : public Object {
String double_vowels(String &p_message) const;
String replace_with_accented_string(String &p_message) const;
String wrap_with_fakebidi_characters(String &p_message) const;
- String add_padding(String &p_message, int p_length) const;
+ String add_padding(const String &p_message, int p_length) const;
const char32_t *get_accented_version(char32_t p_character) const;
bool is_placeholder(String &p_message, int p_index) const;
@@ -109,6 +107,23 @@ class TranslationServer : public Object {
static void _bind_methods();
+ struct LocaleScriptInfo {
+ String name;
+ String script;
+ String default_country;
+ HashSet<String> supported_countries;
+ };
+ static Vector<LocaleScriptInfo> locale_script_info;
+
+ static HashMap<String, String> language_map;
+ static HashMap<String, String> script_map;
+ static HashMap<String, String> locale_rename_map;
+ static HashMap<String, String> country_name_map;
+ static HashMap<String, String> country_rename_map;
+ static HashMap<String, String> variant_map;
+
+ void init_locale_info();
+
public:
_FORCE_INLINE_ static TranslationServer *get_singleton() { return singleton; }
@@ -119,6 +134,15 @@ public:
String get_locale() const;
Ref<Translation> get_translation_object(const String &p_locale);
+ Vector<String> get_all_languages() const;
+ String get_language_name(const String &p_language) const;
+
+ Vector<String> get_all_scripts() const;
+ String get_script_name(const String &p_script) const;
+
+ Vector<String> get_all_countries() const;
+ String get_country_name(const String &p_country) const;
+
String get_locale_name(const String &p_locale) const;
Array get_loaded_locales() const;
@@ -136,11 +160,9 @@ public:
void set_editor_pseudolocalization(bool p_enabled);
void reload_pseudolocalization();
- static Vector<String> get_all_locales();
- static Vector<String> get_all_locale_names();
- static bool is_locale_valid(const String &p_locale);
- static String standardize_locale(const String &p_locale);
- static String get_language_code(const String &p_locale);
+ String standardize_locale(const String &p_locale) const;
+
+ int compare_locales(const String &p_locale_a, const String &p_locale_b) const;
String get_tool_locale();
void set_tool_translation(const Ref<Translation> &p_translation);
diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp
index 1da00aa54b..fa656b634d 100644
--- a/core/string/translation_po.cpp
+++ b/core/string/translation_po.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -35,7 +35,7 @@
#ifdef DEBUG_TRANSLATION_PO
void TranslationPO::print_translation_map() {
Error err;
- FileAccess *file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err);
+ Ref<FileAccess> file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err);
if (err != OK) {
ERR_PRINT("Failed to open translation_map_print_test.txt");
return;
@@ -62,7 +62,6 @@ void TranslationPO::print_translation_map() {
file->store_line("");
}
}
- file->close();
}
#endif
@@ -71,21 +70,14 @@ Dictionary TranslationPO::_get_messages() const {
Dictionary d;
- List<StringName> context_l;
- translation_map.get_key_list(&context_l);
- for (const StringName &ctx : context_l) {
- const HashMap<StringName, Vector<StringName>> &id_str_map = translation_map[ctx];
-
+ for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
Dictionary d2;
- List<StringName> id_l;
- id_str_map.get_key_list(&id_l);
- // Save list of id and strs associated with a context in a temporary dictionary.
- for (List<StringName>::Element *E2 = id_l.front(); E2; E2 = E2->next()) {
- StringName id = E2->get();
- d2[id] = id_str_map[id];
+
+ for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) {
+ d2[E2.key] = E2.value;
}
- d[ctx] = d2;
+ d[E.key] = d2;
}
return d;
@@ -275,31 +267,24 @@ void TranslationPO::get_message_list(List<StringName> *r_messages) const {
// OptimizedTranslation uses this function to get the list of msgid.
// Return all the keys of translation_map under "" context.
- List<StringName> context_l;
- translation_map.get_key_list(&context_l);
-
- for (const StringName &E : context_l) {
- if (String(E) != "") {
+ for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
+ if (E.key != StringName()) {
continue;
}
- List<StringName> msgid_l;
- translation_map[E].get_key_list(&msgid_l);
-
- for (List<StringName>::Element *E2 = msgid_l.front(); E2; E2 = E2->next()) {
- r_messages->push_back(E2->get());
+ for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) {
+ r_messages->push_back(E2.key);
}
}
}
int TranslationPO::get_message_count() const {
- List<StringName> context_l;
- translation_map.get_key_list(&context_l);
-
int count = 0;
- for (const StringName &E : context_l) {
- count += translation_map[E].size();
+
+ for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) {
+ count += E.value.size();
}
+
return count;
}
diff --git a/core/string/translation_po.h b/core/string/translation_po.h
index 0e1d03d6ca..7d63af2246 100644
--- a/core/string/translation_po.h
+++ b/core/string/translation_po.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
diff --git a/core/string/ucaps.h b/core/string/ucaps.h
index b785ac7879..357d36e703 100644
--- a/core/string/ucaps.h
+++ b/core/string/ucaps.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 397743fb6e..c02be9e5b7 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -35,9 +35,11 @@
#include "core/math/math_funcs.h"
#include "core/os/memory.h"
#include "core/string/print_string.h"
+#include "core/string/string_name.h"
#include "core/string/translation.h"
#include "core/string/ucaps.h"
#include "core/variant/variant.h"
+#include "core/version_generated.gen.h"
#include <stdio.h>
#include <stdlib.h>
@@ -53,34 +55,14 @@
static const int MAX_DECIMALS = 32;
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
- return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
- return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
- return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
- return (c >= 'a' && c <= 'z');
-}
-
static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
- return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+ return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
}
const char CharString::_null = 0;
const char16_t Char16String::_null = 0;
const char32_t String::_null = 0;
-bool is_symbol(char32_t c) {
- return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
const String &s = p_s;
int beg = CLAMP(p_col, 0, s.length());
@@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const {
}
Char16String &Char16String::operator+=(char16_t p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- set(length() - 1, p_char);
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+
+ char16_t *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
return *this;
}
-Char16String &Char16String::operator=(const char16_t *p_cstr) {
+void Char16String::operator=(const char16_t *p_cstr) {
copy_from(p_cstr);
- return *this;
}
const char16_t *Char16String::get_data() const {
@@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const {
}
CharString &CharString::operator+=(char p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- set(length() - 1, p_char);
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+
+ char *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
return *this;
}
-CharString &CharString::operator=(const char *p_cstr) {
+void CharString::operator=(const char *p_cstr) {
copy_from(p_cstr);
- return *this;
}
const char *CharString::get_data() const {
@@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) {
return;
}
- int len = 0;
- const char *ptr = p_cstr;
- while (*(ptr++) != 0) {
- len++;
- }
+ const size_t len = strlen(p_cstr);
if (len == 0) {
resize(0);
@@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) {
char32_t *dst = this->ptrw();
- for (int i = 0; i < len + 1; i++) {
- dst[i] = p_cstr[i];
+ for (size_t i = 0; i <= len; i++) {
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0 && i < len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
}
@@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
char32_t *dst = this->ptrw();
for (int i = 0; i < len; i++) {
- dst[i] = p_cstr[i];
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
dst[len] = 0;
}
@@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
}
void String::copy_from(const char32_t &p_char) {
- resize(2);
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- set(0, 0xfffd);
- } else {
- set(0, p_char);
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
}
- set(1, 0);
+
+ resize(2);
+
+ char32_t *dst = ptrw();
+ dst[0] = p_char;
+ dst[1] = 0;
}
void String::copy_from(const char32_t *p_cstr) {
@@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
// p_length <= p_char strlen
void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
resize(p_length + 1);
- set(p_length, 0);
-
char32_t *dst = ptrw();
+ dst[p_length] = 0;
for (int i = 0; i < p_length; i++) {
- if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
- dst[i] = 0xfffd;
- } else {
- dst[i] = p_char[i];
+ if (p_char[i] == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ continue;
+ }
+ if ((p_char[i] & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
}
+ if (p_char[i] > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+ }
+ dst[i] = p_char[i];
}
}
@@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const {
return res;
}
+String String::operator+(char32_t p_char) const {
+ String res = *this;
+ res += p_char;
+ return res;
+}
+
String operator+(const char *p_chr, const String &p_str) {
String tmp = p_chr;
tmp += p_str;
@@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
// wchar_t is 16-bit
String tmp = String::utf16((const char16_t *)p_chr);
#else
- // wchar_t is 32-bi
+ // wchar_t is 32-bit
String tmp = (const char32_t *)p_chr;
#endif
tmp += p_str;
@@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) {
}
String &String::operator+=(const String &p_str) {
- if (is_empty()) {
+ const int lhs_len = length();
+ if (lhs_len == 0) {
*this = p_str;
return *this;
}
- if (p_str.is_empty()) {
+ const int rhs_len = p_str.length();
+ if (rhs_len == 0) {
return *this;
}
- int from = length();
-
- resize(length() + p_str.size());
+ resize(lhs_len + rhs_len + 1);
- const char32_t *src = p_str.get_data();
- char32_t *dst = ptrw();
+ const char32_t *src = p_str.ptr();
+ char32_t *dst = ptrw() + lhs_len;
- set(length(), 0);
-
- for (int i = 0; i < p_str.length(); i++) {
- dst[from + i] = src[i];
- }
+ // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
+ memcpy(dst, src, rhs_len * sizeof(char32_t));
+ *(dst + rhs_len) = _null;
return *this;
}
@@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) {
return *this;
}
- int src_len = 0;
- const char *ptr = p_str;
- while (*(ptr++) != 0) {
- src_len++;
- }
-
- int from = length();
+ const int lhs_len = length();
+ const size_t rhs_len = strlen(p_str);
- resize(from + src_len + 1);
-
- char32_t *dst = ptrw();
+ resize(lhs_len + rhs_len + 1);
- set(length(), 0);
+ char32_t *dst = ptrw() + lhs_len;
- for (int i = 0; i < src_len; i++) {
- dst[from + i] = p_str[i];
+ for (size_t i = 0; i <= rhs_len; i++) {
+ uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
+ if (c == 0 && i < rhs_len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
return *this;
@@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) {
}
String &String::operator+=(char32_t p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- set(length() - 1, 0xfffd);
- } else {
- set(length() - 1, p_char);
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return *this;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
}
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+ char32_t *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
+
return *this;
}
@@ -952,10 +970,6 @@ const char32_t *String::get_data() const {
return size() ? &operator[](0) : &zero;
}
-void String::erase(int p_pos, int p_chars) {
- *this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars)));
-}
-
String String::capitalize() const {
String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges();
String cap;
@@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
int start_index = 0;
for (int i = 1; i < this->size(); i++) {
- bool is_upper = is_upper_case(cstr[i]);
+ bool is_upper = is_ascii_upper_case(cstr[i]);
bool is_number = is_digit(cstr[i]);
bool are_next_2_lower = false;
bool is_next_lower = false;
bool is_next_number = false;
- bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+ bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
bool was_precedent_number = is_digit(cstr[i - 1]);
if (i + 2 < this->size()) {
- are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+ are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
}
if (i + 1 < this->size()) {
- is_next_lower = is_lower_case(cstr[i + 1]);
+ is_next_lower = is_ascii_lower_case(cstr[i + 1]);
is_next_number = is_digit(cstr[i + 1]);
}
@@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
}
String String::num_real(double p_num, bool p_trailing) {
- if (Math::is_nan(p_num)) {
- return "nan";
- }
-
- if (Math::is_inf(p_num)) {
- if (signbit(p_num)) {
- return "-inf";
+ if (p_num == (double)(int64_t)p_num) {
+ if (p_trailing) {
+ return num_int64((int64_t)p_num) + ".0";
} else {
- return "inf";
+ return num_int64((int64_t)p_num);
}
}
-
- String s;
- String sd;
-
- // Integer part.
-
- bool neg = p_num < 0;
- p_num = ABS(p_num);
- int64_t intn = (int64_t)p_num;
-
- // Decimal part.
-
- if (intn != p_num) {
- double dec = p_num - (double)intn;
-
- int digit = 0;
-
#ifdef REAL_T_IS_DOUBLE
- int decimals = 14;
- double tolerance = 1e-14;
+ int decimals = 14;
#else
- int decimals = 6;
- double tolerance = 1e-6;
+ int decimals = 6;
#endif
- // We want to align the digits to the above sane default, so we only
- // need to subtract log10 for numbers with a positive power of ten.
- if (p_num > 10) {
- decimals -= (int)floor(log10(p_num));
- }
-
- if (decimals > MAX_DECIMALS) {
- decimals = MAX_DECIMALS;
- }
-
- // In case the value ends up ending in "99999", we want to add a
- // tiny bit to the value we're checking when deciding when to stop,
- // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15).
- double check_multiplier = 1 + tolerance / 10;
-
- int64_t dec_int = 0;
- int64_t dec_max = 0;
-
- while (true) {
- dec *= 10.0;
- dec_int = dec_int * 10 + (int64_t)dec % 10;
- dec_max = dec_max * 10 + 9;
- digit++;
-
- if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) {
- break;
- }
-
- if (digit == decimals) {
- break;
- }
- }
-
- dec *= 10;
- int last = (int64_t)dec % 10;
-
- if (last > 5) {
- if (dec_int == dec_max) {
- dec_int = 0;
- intn++;
- } else {
- dec_int++;
- }
- }
-
- String decimal;
- for (int i = 0; i < digit; i++) {
- char num[2] = { 0, 0 };
- num[0] = '0' + dec_int % 10;
- decimal = num + decimal;
- dec_int /= 10;
- }
- sd = '.' + decimal;
- } else if (p_trailing) {
- sd = ".0";
- } else {
- sd = "";
- }
-
- if (intn == 0) {
- s = "0";
- } else {
- while (intn) {
- char32_t num = '0' + (intn % 10);
- intn /= 10;
- s = num + s;
- }
- }
-
- s = s + sd;
- if (neg) {
- s = "-" + s;
+ // We want to align the digits to the above sane default, so we only
+ // need to subtract log10 for numbers with a positive power of ten.
+ if (p_num > 10) {
+ decimals -= (int)floor(log10(p_num));
}
- return s;
+ return num(p_num, decimals);
}
String String::num_scientific(double p_num) {
@@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
return ret;
}
+void String::print_unicode_error(const String &p_message, bool p_critical) const {
+ if (p_critical) {
+ print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+ } else {
+ print_error(vformat("Unicode parsing error: %s", p_message));
+ }
+}
+
CharString String::ascii(bool p_allow_extended) const {
if (!length()) {
return CharString();
@@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const {
if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
cs[i] = c;
} else {
- print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
cs[i] = 0x20;
}
}
@@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) {
return ret;
}
-bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
-
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
if (!p_utf8) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
}
+ bool decode_error = false;
+ bool decode_failed = false;
{
const char *ptrtmp = p_utf8;
const char *ptrtmp_limit = &p_utf8[p_len];
int skip = 0;
+ uint8_t c_start = 0;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
- if (skip == 0) {
- uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ ptrtmp++;
+ continue;
+ }
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
skip = 0;
@@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
skip = 2;
} else if ((c & 0xf8) == 0xf0) {
skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ skip = 5;
} else {
- _UNICERROR("invalid skip at " + num_int64(cstr_size));
- return true; //invalid utf8
+ skip = 0;
+ print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
+ decode_failed = true;
}
+ c_start = c;
if (skip == 1 && (c & 0x1e) == 0) {
- _UNICERROR("overlong rejected at " + num_int64(cstr_size));
- return true; //reject overlong
+ print_unicode_error(vformat("Overlong encoding (%x ...)", c));
+ decode_error = true;
}
-
str_size++;
-
} else {
- --skip;
+ if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
+ print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
+ decode_error = true;
+ }
+ if (c < 0x80 || c > 0xbf) {
+ print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
+ decode_failed = true;
+ skip = 0;
+ } else {
+ --skip;
+ }
}
cstr_size++;
@@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
if (skip) {
- _UNICERROR("no space left");
- return true; //not enough space
+ print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
+ decode_failed = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ int skip = 0;
+ uint32_t unichar = 0;
while (cstr_size) {
- int len = 0;
-
- /* Determine the number of characters in sequence */
- if ((*p_utf8 & 0x80) == 0) {
- len = 1;
- } else if ((*p_utf8 & 0xe0) == 0xc0) {
- len = 2;
- } else if ((*p_utf8 & 0xf0) == 0xe0) {
- len = 3;
- } else if ((*p_utf8 & 0xf8) == 0xf0) {
- len = 4;
- } else {
- _UNICERROR("invalid len");
- return true; //invalid UTF8
- }
-
- if (len > cstr_size) {
- _UNICERROR("no space left");
- return true; //not enough space
- }
+ uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
- if (len == 2 && (*p_utf8 & 0x1E) == 0) {
- _UNICERROR("no space left");
- return true; //reject overlong
- }
-
- /* Convert the first character */
-
- uint32_t unichar = 0;
-
- if (len == 1) {
- unichar = *p_utf8;
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ p_utf8++;
+ continue;
+ }
+ /* Determine the number of characters in sequence */
+ if ((c & 0x80) == 0) {
+ *(dst++) = c;
+ unichar = 0;
+ skip = 0;
+ } else if ((c & 0xe0) == 0xc0) {
+ unichar = (0xff >> 3) & c;
+ skip = 1;
+ } else if ((c & 0xf0) == 0xe0) {
+ unichar = (0xff >> 4) & c;
+ skip = 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ unichar = (0xff >> 5) & c;
+ skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ unichar = (0xff >> 6) & c;
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ unichar = (0xff >> 7) & c;
+ skip = 5;
+ } else {
+ *(dst++) = 0x20;
+ unichar = 0;
+ skip = 0;
+ }
} else {
- unichar = (0xff >> (len + 1)) & *p_utf8;
-
- for (int i = 1; i < len; i++) {
- if ((p_utf8[i] & 0xc0) != 0x80) {
- _UNICERROR("invalid utf8");
- return true; //invalid utf8
- }
- if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
- _UNICERROR("invalid utf8 overlong");
- return true; //no overlong
+ if (c < 0x80 || c > 0xbf) {
+ *(dst++) = 0x20;
+ skip = 0;
+ } else {
+ unichar = (unichar << 6) | (c & 0x3f);
+ --skip;
+ if (skip == 0) {
+ if (unichar == 0) {
+ print_unicode_error("NUL character", true);
+ decode_failed = true;
+ unichar = 0x20;
+ }
+ if ((unichar & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
+ decode_error = true;
+ }
+ if (unichar > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
+ decode_error = true;
+ }
+ *(dst++) = unichar;
}
- unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
}
}
- if (unichar >= 0xd800 && unichar <= 0xdfff) {
- _UNICERROR("invalid code point");
- return CharString();
- }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf8 += len;
+ cstr_size--;
+ p_utf8++;
+ }
+ if (skip) {
+ *(dst++) = 0x20;
}
- return false;
-#undef _UNICERROR
+ if (decode_failed) {
+ return ERR_INVALID_DATA;
+ } else if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
CharString String::utf8() const {
@@ -1881,15 +1846,17 @@ CharString String::utf8() const {
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
- } else if (c <= 0x0010ffff) { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
fl += 4;
+ } else if (c <= 0x03ffffff) { // 26 bits
+ fl += 5;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
+ } else if (c <= 0x7fffffff) { // 31 bits
+ fl += 6;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
+ fl += 1;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
}
}
@@ -1915,11 +1882,26 @@ CharString String::utf8() const {
APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x03ffffff) { // 26 bits
+ APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x7fffffff) { // 31 bits
+ APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
return ret;
}
-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
-
+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
if (!p_utf16) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
}
}
+ bool decode_error = false;
{
const char16_t *ptrtmp = p_utf16;
const char16_t *ptrtmp_limit = &p_utf16[p_len];
- int skip = 0;
+ uint32_t c_prev = 0;
+ bool skip = false;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
- if (skip == 0) {
- if ((c & 0xfffffc00) == 0xd800) {
- skip = 1; // lead surrogate
- } else if ((c & 0xfffffc00) == 0xdc00) {
- _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
- } else {
- skip = 0;
+
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
+ decode_error = true;
}
- str_size++;
- } else {
- if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
- --skip;
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ str_size--;
} else {
- _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
+ print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
+ decode_error = true;
}
+ skip = false;
+ } else {
+ skip = false;
}
+ c_prev = c;
+ str_size++;
cstr_size++;
ptrtmp++;
}
if (skip) {
- _UNICERROR("no space left");
- return true; // not enough space
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
+ decode_error = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ bool skip = false;
+ uint32_t c_prev = 0;
while (cstr_size) {
- int len = 0;
uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
- if ((c & 0xfffffc00) == 0xd800) {
- len = 2;
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ *(dst++) = c_prev; // unpaired, store as is
+ }
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
+ } else {
+ *(dst++) = c; // unpaired, store as is
+ }
+ skip = false;
} else {
- len = 1;
- }
-
- if (len > cstr_size) {
- _UNICERROR("no space left");
- return true; //not enough space
+ *(dst++) = c;
+ skip = false;
}
- uint32_t unichar = 0;
- if (len == 1) {
- unichar = c;
- } else {
- uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
- unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
- }
+ cstr_size--;
+ p_utf16++;
+ c_prev = c;
+ }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf16 += len;
+ if (skip) {
+ *(dst++) = c_prev;
}
- return false;
-#undef _UNICERROR
+ if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
Char16String String::utf16() const {
@@ -2054,15 +2044,14 @@ Char16String String::utf16() const {
uint32_t c = d[i];
if (c <= 0xffff) { // 16 bits.
fl += 1;
+ if ((c & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", c));
+ }
} else if (c <= 0x10ffff) { // 32 bits.
fl += 2;
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
+ fl += 1;
}
}
@@ -2081,9 +2070,11 @@ Char16String String::utf16() const {
if (c <= 0xffff) { // 16 bits.
APPEND_CHAR(c);
- } else { // 32 bits.
+ } else if (c <= 0x10ffff) { // 32 bits.
APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const {
}
// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
hex *= 16;
hex += n;
s++;
@@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const {
}
// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
binary *= 2;
binary += n;
s++;
@@ -2217,7 +2208,7 @@ int64_t String::to_int() const {
char32_t c = operator[](i);
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) {
char c = p_str[i];
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) {
wchar_t c = p_str[i];
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2308,7 +2299,7 @@ bool String::is_numeric() const {
return false;
}
dot = true;
- } else if (c < '0' || c > '9') {
+ } else if (!is_digit(c)) {
return false;
}
}
@@ -2317,28 +2308,33 @@ bool String::is_numeric() const {
}
template <class C>
-static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number,
- * optionally preceded by white space. Must
- * have form "-I.FE-X", where I is the integer
- * part of the mantissa, F is the fractional
- * part of the mantissa, and X is the
- * exponent. Either of the signs may be "+",
- * "-", or omitted. Either I or F may be
- * omitted, or both. The decimal point isn't
- * necessary unless F is present. The "E" may
- * actually be an "e". E and X may both be
- * omitted (but not just one). */
- C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's
- * address here. */
-{
- static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
- * exponent larger than this will already
- * produce underflow or overflow, so there's
- * no need to worry about additional digits.
- */
- static const double powersOf10[] = { /* Table giving binary powers of 10. Entry */
- 10., /* is 10^2^i. Used to convert decimal */
- 100., /* exponents into floating-point numbers. */
+static double built_in_strtod(
+ /* A decimal ASCII floating-point number,
+ * optionally preceded by white space. Must
+ * have form "-I.FE-X", where I is the integer
+ * part of the mantissa, F is the fractional
+ * part of the mantissa, and X is the
+ * exponent. Either of the signs may be "+",
+ * "-", or omitted. Either I or F may be
+ * omitted, or both. The decimal point isn't
+ * necessary unless F is present. The "E" may
+ * actually be an "e". E and X may both be
+ * omitted (but not just one). */
+ const C *string,
+ /* If non-nullptr, store terminating Cacter's
+ * address here. */
+ C **endPtr = nullptr) {
+ /* Largest possible base 10 exponent. Any
+ * exponent larger than this will already
+ * produce underflow or overflow, so there's
+ * no need to worry about additional digits. */
+ static const int maxExponent = 511;
+ /* Table giving binary powers of 10. Entry
+ * is 10^2^i. Used to convert decimal
+ * exponents into floating-point numbers. */
+ static const double powersOf10[] = {
+ 10.,
+ 100.,
1.0e4,
1.0e8,
1.0e16,
@@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
const double *d;
const C *p;
int c;
- int exp = 0; /* Exponent read from "EX" field. */
- int fracExp = 0; /* Exponent that derives from the fractional
- * part. Under normal circumstances, it is
- * the negative of the number of digits in F.
- * However, if I is very long, the last digits
- * of I get dropped (otherwise a long I with a
- * large negative exponent could cause an
- * unnecessary overflow on I alone). In this
- * case, fracExp is incremented one for each
- * dropped digit. */
- int mantSize; /* Number of digits in mantissa. */
- int decPt; /* Number of mantissa digits BEFORE decimal
- * point. */
- const C *pExp; /* Temporarily holds location of exponent in
- * string. */
+ /* Exponent read from "EX" field. */
+ int exp = 0;
+ /* Exponent that derives from the fractional
+ * part. Under normal circumstances, it is
+ * the negative of the number of digits in F.
+ * However, if I is very long, the last digits
+ * of I get dropped (otherwise a long I with a
+ * large negative exponent could cause an
+ * unnecessary overflow on I alone). In this
+ * case, fracExp is incremented one for each
+ * dropped digit. */
+ int fracExp = 0;
+ /* Number of digits in mantissa. */
+ int mantSize;
+ /* Number of mantissa digits BEFORE decimal point. */
+ int decPt;
+ /* Temporarily holds location of exponent in string. */
+ const C *pExp;
/*
- * Strip off leading blanks and check for a sign.
- */
+ * Strip off leading blanks and check for a sign.
+ */
p = string;
while (*p == ' ' || *p == '\t' || *p == '\n') {
@@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Count the number of digits in the mantissa (including the decimal
- * point), and also locate the decimal point.
- */
+ * Count the number of digits in the mantissa (including the decimal
+ * point), and also locate the decimal point.
+ */
decPt = -1;
for (mantSize = 0;; mantSize += 1) {
@@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Now suck up the digits in the mantissa. Use two integers to collect 9
- * digits each (this is faster than using floating-point). If the mantissa
- * has more than 18 digits, ignore the extras, since they can't affect the
- * value anyway.
- */
+ * Now suck up the digits in the mantissa. Use two integers to collect 9
+ * digits each (this is faster than using floating-point). If the mantissa
+ * has more than 18 digits, ignore the extras, since they can't affect the
+ * value anyway.
+ */
pExp = p;
p -= mantSize;
@@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Skim off the exponent.
- */
+ * Skim off the exponent.
+ */
p = pExp;
if ((*p == 'E') || (*p == 'e')) {
@@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Generate a floating-point number that represents the exponent. Do this
- * by processing the exponent one bit at a time to combine many powers of
- * 2 of 10. Then combine the exponent with the fraction.
- */
+ * Generate a floating-point number that represents the exponent. Do this
+ * by processing the exponent one bit at a time to combine many powers of
+ * 2 of 10. Then combine the exponent with the fraction.
+ */
if (exp < 0) {
expSign = true;
@@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
return INT64_MIN;
}
} else {
- ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
}
}
integer *= 10;
@@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const {
return _base_is_subsequence_of(p_string, false);
}
-bool String::is_subsequence_ofi(const String &p_string) const {
+bool String::is_subsequence_ofn(const String &p_string) const {
return _base_is_subsequence_of(p_string, true);
}
@@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const {
String String::repeat(int p_count) const {
ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
- String new_string;
- const char32_t *src = this->get_data();
-
- new_string.resize(length() * p_count + 1);
- new_string[length() * p_count] = 0;
-
- for (int i = 0; i < p_count; i++) {
- for (int j = 0; j < length(); j++) {
- new_string[i * length() + j] = src[j];
- }
- }
-
+ int len = length();
+ String new_string = *this;
+ new_string.resize(p_count * len + 1);
+
+ char32_t *dst = new_string.ptrw();
+ int offset = 1;
+ int stride = 1;
+ while (offset < p_count) {
+ memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
+ offset += stride;
+ stride = MIN(stride * 2, p_count - offset);
+ }
+ dst[p_count * len] = _null;
return new_string;
}
-String String::left(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::left(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(0, p_pos);
+ return substr(0, p_len);
}
-String String::right(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::right(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(length() - p_pos);
+ return substr(length() - p_len);
}
char32_t String::unicode_at(int p_idx) const {
@@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const {
return operator[](p_idx);
}
+String String::indent(const String &p_prefix) const {
+ String new_string;
+ int line_start = 0;
+
+ for (int i = 0; i < length(); i++) {
+ const char32_t c = operator[](i);
+ if (c == '\n') {
+ if (i == line_start) {
+ new_string += c; // Leave empty lines empty.
+ } else {
+ new_string += p_prefix + substr(line_start, i - line_start + 1);
+ }
+ line_start = i + 1;
+ }
+ }
+ if (line_start != length()) {
+ new_string += p_prefix + substr(line_start);
+ }
+ return new_string;
+}
+
String String::dedent() const {
String new_string;
String indent;
@@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const {
return substr(0, end + 1);
}
+bool String::is_network_share_path() const {
+ return begins_with("//") || begins_with("\\\\");
+}
+
String String::simplify_path() const {
String s = *this;
String drive;
@@ -3637,6 +3662,9 @@ String String::simplify_path() const {
} else if (s.begins_with("user://")) {
drive = "user://";
s = s.substr(7, s.length());
+ } else if (is_network_share_path()) {
+ drive = s.substr(0, 2);
+ s = s.substr(2, s.length() - 2);
} else if (s.begins_with("/") || s.begins_with("\\")) {
drive = s.substr(0, 1);
s = s.substr(1, s.length() - 1);
@@ -3665,15 +3693,15 @@ String String::simplify_path() const {
for (int i = 0; i < dirs.size(); i++) {
String d = dirs[i];
if (d == ".") {
- dirs.remove(i);
+ dirs.remove_at(i);
i--;
} else if (d == "..") {
if (i == 0) {
- dirs.remove(i);
+ dirs.remove_at(i);
i--;
} else {
- dirs.remove(i);
- dirs.remove(i - 1);
+ dirs.remove_at(i);
+ dirs.remove_at(i - 1);
i -= 2;
}
}
@@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const {
}
}
+static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
+ if (p_index == 0 && is_digit(p_char)) {
+ return false; // No start with number plz.
+ }
+ return is_ascii_identifier_char(p_char);
+}
+
+String String::validate_identifier() const {
+ if (is_empty()) {
+ return "_"; // Empty string is not a valid identifier;
+ }
+
+ String result = *this;
+ int len = result.length();
+ char32_t *buffer = result.ptrw();
+
+ for (int i = 0; i < len; i++) {
+ if (!_is_valid_identifier_bit(i, buffer[i])) {
+ buffer[i] = '_';
+ }
+ }
+
+ return result;
+}
+
bool String::is_valid_identifier() const {
int len = length();
@@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const {
const char32_t *str = &operator[](0);
for (int i = 0; i < len; i++) {
- if (i == 0) {
- if (is_digit(str[0])) {
- return false; // no start with number plz
- }
- }
-
- bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
-
- if (!valid_char) {
+ if (!_is_valid_identifier_bit(i, str[i])) {
return false;
}
}
@@ -3775,18 +3820,15 @@ String String::uri_encode() const {
const CharString temp = utf8();
String res;
for (int i = 0; i < temp.length(); ++i) {
- char ord = temp[i];
- if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+ uint8_t ord = temp[i];
+ if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
res += ord;
} else {
- char h_Val[3];
-#if defined(__GNUC__) || defined(_MSC_VER)
- snprintf(h_Val, 3, "%02hhX", ord);
-#else
- sprintf(h_Val, "%02hhX", ord);
-#endif
- res += "%";
- res += h_Val;
+ char p[4] = { '%', 0, 0, 0 };
+ static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ p[1] = hex[ord >> 4];
+ p[2] = hex[ord & 0xF];
+ res += p;
}
}
return res;
@@ -3798,9 +3840,9 @@ String String::uri_decode() const {
for (int i = 0; i < src.length(); ++i) {
if (src[i] == '%' && i + 2 < src.length()) {
char ord1 = src[i + 1];
- if (is_digit(ord1) || is_upper_case(ord1)) {
+ if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
char ord2 = src[i + 2];
- if (is_digit(ord2) || is_upper_case(ord2)) {
+ if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
char bytes[3] = { (char)ord1, (char)ord2, 0 };
res += (char)strtol(bytes, nullptr, 16);
i += 2;
@@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
for (int i = 2; i < p_src_len; i++) {
eat = i + 1;
char32_t ct = p_src[i];
- if (ct == ';' || ct < '0' || ct > '9') {
+ if (ct == ';' || !is_digit(ct)) {
break;
}
}
@@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const {
int begin = 0;
- while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+ while (begin < end && !is_digit(s[begin])) {
begin++;
}
@@ -4102,7 +4144,7 @@ bool String::is_valid_int() const {
}
for (int i = from; i < len; i++) {
- if (operator[](i) < '0' || operator[](i) > '9') {
+ if (!is_digit(operator[](i))) {
return false; // no start with number plz
}
}
@@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const {
dst += "/";
}
- String base;
-
if (src.begins_with("res://") && dst.begins_with("res://")) {
- base = "res:/";
src = src.replace("res://", "/");
dst = dst.replace("res://", "/");
} else if (src.begins_with("user://") && dst.begins_with("user://")) {
- base = "user:/";
src = src.replace("user://", "/");
dst = dst.replace("user://", "/");
@@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const {
return p_path; //impossible to do this
}
- base = src_begin;
src = src.substr(src_begin.length(), src.length());
dst = dst.substr(dst_begin.length(), dst.length());
}
@@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const {
return false;
}
- if (stripped == String()) {
+ if (stripped.is_empty()) {
return false;
}
@@ -4338,13 +4375,13 @@ bool String::is_relative_path() const {
String String::get_base_dir() const {
int end = 0;
- // url scheme style base
+ // URL scheme style base.
int basepos = find("://");
if (basepos != -1) {
end = basepos + 3;
}
- // windows top level directory base
+ // Windows top level directory base.
if (end == 0) {
basepos = find(":/");
if (basepos == -1) {
@@ -4355,7 +4392,24 @@ String String::get_base_dir() const {
}
}
- // unix root directory base
+ // Windows UNC network share path.
+ if (end == 0) {
+ if (is_network_share_path()) {
+ basepos = find("/", 2);
+ if (basepos == -1) {
+ basepos = find("\\", 2);
+ }
+ int servpos = find("/", basepos + 1);
+ if (servpos == -1) {
+ servpos = find("\\", basepos + 1);
+ }
+ if (servpos != -1) {
+ end = servpos + 1;
+ }
+ }
+ }
+
+ // Unix root directory base.
if (end == 0) {
if (begins_with("/")) {
end = 1;
@@ -4412,7 +4466,7 @@ String String::property_name_encode() const {
// as well as '"', '=' or ' ' (32)
const char32_t *cstr = get_data();
for (int i = 0; cstr[i]; i++) {
- if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) {
+ if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
return "\"" + c_escape_multiline() + "\"";
}
}
@@ -4421,7 +4475,7 @@ String String::property_name_encode() const {
}
// Changes made to the set of invalid characters must also be reflected in the String documentation.
-const String String::invalid_node_name_characters = ". : @ / \"";
+const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX;
String String::validate_node_name() const {
Vector<String> chars = String::invalid_node_name_characters.split(" ");
@@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const {
int min_chars = 0;
int min_decimals = 0;
bool in_decimals = false;
- bool pad_with_zeroes = false;
+ bool pad_with_zeros = false;
bool left_justified = false;
bool show_sign = false;
@@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const {
// Padding.
int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars;
- String pad_char = pad_with_zeroes ? String("0") : String(" ");
+ String pad_char = pad_with_zeros ? String("0") : String(" ");
if (left_justified) {
str = str.rpad(pad_chars_count, pad_char);
} else {
@@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const {
}
// Sign.
- if (show_sign && value >= 0) {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+");
- } else if (value < 0) {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-");
+ if (show_sign || value < 0) {
+ String sign_char = value < 0 ? "-" : "+";
+ if (left_justified) {
+ str = str.insert(0, sign_char);
+ } else {
+ str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
+ }
}
formatted += str;
@@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const {
// Padding. Leave room for sign later if required.
int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
- String pad_char = pad_with_zeroes ? String("0") : String(" ");
+ String pad_char = pad_with_zeros ? String("0") : String(" ");
if (left_justified) {
- if (pad_with_zeroes) {
- return "left justification cannot be used with zeros as the padding";
- } else {
- str = str.rpad(pad_chars_count, pad_char);
- }
+ str = str.rpad(pad_chars_count, pad_char);
} else {
str = str.lpad(pad_chars_count, pad_char);
}
@@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const {
if (left_justified) {
str = str.insert(0, sign_char);
} else {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char);
+ str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
}
}
@@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const {
min_decimals += n;
} else {
if (c == '0' && min_chars == 0) {
- pad_with_zeroes = true;
+ if (left_justified) {
+ WARN_PRINT("'0' flag ignored with '-' flag in string format");
+ } else {
+ pad_with_zeros = true;
+ }
} else {
min_chars *= 10;
min_chars += n;
@@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const {
// Back to defaults:
min_chars = 0;
min_decimals = 6;
- pad_with_zeroes = false;
+ pad_with_zeros = false;
left_justified = false;
show_sign = false;
in_decimals = false;
@@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const {
}
#ifdef TOOLS_ENABLED
+/**
+ * "Tools TRanslate". Performs string replacement for internationalization
+ * within the editor. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `TTRN()` instead of `TTR()`.
+ *
+ * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTR()` instead.
+ */
String TTR(const String &p_text, const String &p_context) {
if (TranslationServer::get_singleton()) {
return TranslationServer::get_singleton()->tool_translate(p_text, p_context);
@@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) {
return p_text;
}
+/**
+ * "Tools TRanslate for N items". Performs string replacement for
+ * internationalization within the editor. A translation context can optionally
+ * be specified to disambiguate between identical source strings in
+ * translations. Use `TTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTRN()` instead.
+ */
String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
if (TranslationServer::get_singleton()) {
return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
@@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St
return p_text_plural;
}
+/**
+ * "Docs TRanslate". Used for the editor class reference documentation,
+ * handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
String DTR(const String &p_text, const String &p_context) {
// Comes straight from the XML, so remove indentation and any trailing whitespace.
const String text = p_text.dedent().strip_edges();
if (TranslationServer::get_singleton()) {
- return TranslationServer::get_singleton()->doc_translate(text, p_context);
+ return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
}
- return text;
+ return text.replace("$DOCS_URL", VERSION_DOCS_URL);
}
+/**
+ * "Docs TRanslate for N items". Used for the editor class reference documentation
+ * (with support for plurals), handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
const String text = p_text.dedent().strip_edges();
const String text_plural = p_text_plural.dedent().strip_edges();
if (TranslationServer::get_singleton()) {
- return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context);
+ return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
}
// Return message based on English plural rule if translation is not possible.
if (p_n == 1) {
- return text;
+ return text.replace("$DOCS_URL", VERSION_DOCS_URL);
}
- return text_plural;
+ return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL);
}
#endif
+/**
+ * "Run-time TRanslate". Performs string replacement for internationalization
+ * within a running project. The translation string must be supplied by the
+ * project, as Godot does not provide built-in translations for `RTR()` strings
+ * to keep binary size low. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `RTRN()` instead of `RTR()`.
+ *
+ * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTR()` instead.
+ */
String RTR(const String &p_text, const String &p_context) {
if (TranslationServer::get_singleton()) {
String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context);
- if (rtr == String() || rtr == p_text) {
+ if (rtr.is_empty() || rtr == p_text) {
return TranslationServer::get_singleton()->translate(p_text, p_context);
} else {
return rtr;
@@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) {
return p_text;
}
+/**
+ * "Run-time TRanslate for N items". Performs string replacement for
+ * internationalization within a running project. The translation string must be
+ * supplied by the project, as Godot does not provide built-in translations for
+ * `RTRN()` strings to keep binary size low. A translation context can
+ * optionally be specified to disambiguate between identical source strings in
+ * translations. Use `RTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTRN()` instead.
+ */
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
if (TranslationServer::get_singleton()) {
String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
- if (rtr == String() || rtr == p_text || rtr == p_text_plural) {
+ if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) {
return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
} else {
return rtr;
diff --git a/core/string/ustring.h b/core/string/ustring.h
index 1d80ccf58d..6c3169f136 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -28,10 +28,12 @@
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
+// Note: _GODOT suffix added to avoid conflict with ICU header with the same guard.
+
#ifndef USTRING_GODOT_H
#define USTRING_GODOT_H
-// Note: Renamed to avoid conflict with ICU header with the same name.
+#include "core/string/char_utils.h"
#include "core/templates/cowdata.h"
#include "core/templates/vector.h"
#include "core/typedefs.h"
@@ -108,13 +110,10 @@ public:
_FORCE_INLINE_ Char16String() {}
_FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); }
- _FORCE_INLINE_ Char16String &operator=(const Char16String &p_str) {
- _cowdata._ref(p_str._cowdata);
- return *this;
- }
+ _FORCE_INLINE_ void operator=(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); }
_FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); }
- Char16String &operator=(const char16_t *p_cstr);
+ void operator=(const char16_t *p_cstr);
bool operator<(const Char16String &p_right) const;
Char16String &operator+=(char16_t p_char);
int length() const { return size() ? size() - 1 : 0; }
@@ -152,13 +151,10 @@ public:
_FORCE_INLINE_ CharString() {}
_FORCE_INLINE_ CharString(const CharString &p_str) { _cowdata._ref(p_str._cowdata); }
- _FORCE_INLINE_ CharString &operator=(const CharString &p_str) {
- _cowdata._ref(p_str._cowdata);
- return *this;
- }
+ _FORCE_INLINE_ void operator=(const CharString &p_str) { _cowdata._ref(p_str._cowdata); }
_FORCE_INLINE_ CharString(const char *p_cstr) { copy_from(p_cstr); }
- CharString &operator=(const char *p_cstr);
+ void operator=(const char *p_cstr);
bool operator<(const CharString &p_right) const;
CharString &operator+=(char p_char);
int length() const { return size() ? size() - 1 : 0; }
@@ -209,7 +205,7 @@ public:
_FORCE_INLINE_ char32_t *ptrw() { return _cowdata.ptrw(); }
_FORCE_INLINE_ const char32_t *ptr() const { return _cowdata.ptr(); }
- void remove(int p_index) { _cowdata.remove(p_index); }
+ void remove_at(int p_index) { _cowdata.remove_at(p_index); }
_FORCE_INLINE_ void clear() { resize(0); }
@@ -230,6 +226,7 @@ public:
bool operator==(const String &p_str) const;
bool operator!=(const String &p_str) const;
String operator+(const String &p_str) const;
+ String operator+(char32_t p_char) const;
String &operator+=(const String &);
String &operator+=(char32_t p_char);
@@ -275,6 +272,9 @@ public:
bool is_valid_string() const;
+ /* debug, error messages */
+ void print_unicode_error(const String &p_message, bool p_critical = false) const;
+
/* complex helpers */
String substr(int p_from, int p_chars = -1) const;
int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed
@@ -291,7 +291,7 @@ public:
bool ends_with(const String &p_string) const;
bool is_enclosed_in(const String &p_string) const;
bool is_subsequence_of(const String &p_string) const;
- bool is_subsequence_ofi(const String &p_string) const;
+ bool is_subsequence_ofn(const String &p_string) const;
bool is_quoted() const;
Vector<String> bigrams() const;
float similarity(const String &p_string) const;
@@ -360,8 +360,9 @@ public:
int count(const String &p_string, int p_from = 0, int p_to = 0) const;
int countn(const String &p_string, int p_from = 0, int p_to = 0) const;
- String left(int p_pos) const;
- String right(int p_pos) const;
+ String left(int p_len) const;
+ String right(int p_len) const;
+ String indent(const String &p_prefix) const;
String dedent() const;
String strip_edges(bool left = true, bool right = true) const;
String strip_escapes() const;
@@ -376,11 +377,11 @@ public:
CharString ascii(bool p_allow_extended = false) const;
CharString utf8() const;
- bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error
+ Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false);
static String utf8(const char *p_utf8, int p_len = -1);
Char16String utf16() const;
- bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error
+ Error parse_utf16(const char16_t *p_utf16, int p_len = -1);
static String utf16(const char16_t *p_utf16, int p_len = -1);
static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
@@ -399,6 +400,8 @@ public:
Vector<uint8_t> sha256_buffer() const;
_FORCE_INLINE_ bool is_empty() const { return length() == 0; }
+ _FORCE_INLINE_ bool contains(const char *p_str) const { return find(p_str) != -1; }
+ _FORCE_INLINE_ bool contains(const String &p_str) const { return find(p_str) != -1; }
// path functions
bool is_absolute_path() const;
@@ -410,6 +413,7 @@ public:
String get_file() const;
static String humanize_size(uint64_t p_size);
String simplify_path() const;
+ bool is_network_share_path() const;
String xml_escape(bool p_escape_quotes = false) const;
String xml_unescape() const;
@@ -427,6 +431,7 @@ public:
// node functions
static const String invalid_node_name_characters;
String validate_node_name() const;
+ String validate_identifier() const;
bool is_valid_identifier() const;
bool is_valid_int() const;
@@ -442,11 +447,7 @@ public:
_FORCE_INLINE_ String() {}
_FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); }
-
- String &operator=(const String &p_str) {
- _cowdata._ref(p_str._cowdata);
- return *this;
- }
+ _FORCE_INLINE_ void operator=(const String &p_str) { _cowdata._ref(p_str._cowdata); }
Vector<uint8_t> to_ascii_buffer() const;
Vector<uint8_t> to_utf8_buffer() const;
@@ -527,19 +528,24 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St
#define TTRGET(m_value) TTR(m_value)
#else
-#define TTR(m_value) String()
-#define TTRN(m_value) String()
-#define DTR(m_value) String()
-#define DTRN(m_value) String()
#define TTRC(m_value) (m_value)
#define TTRGET(m_value) (m_value)
#endif
+// Use this to mark property names for editor translation.
+// Often for dynamic properties defined in _get_property_list().
+// Property names defined directly inside EDITOR_DEF, GLOBAL_DEF, and ADD_PROPERTY macros don't need this.
+#define PNAME(m_value) (m_value)
+
+// Similar to PNAME, but to mark groups, i.e. properties with PROPERTY_USAGE_GROUP.
+// Groups defined directly inside ADD_GROUP macros don't need this.
+// The arguments are the same as ADD_GROUP. m_prefix is only used for extraction.
+#define GNAME(m_value, m_prefix) (m_value)
+
// Runtime translate for the public node API.
String RTR(const String &p_text, const String &p_context = "");
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = "");
-bool is_symbol(char32_t c);
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end);
_FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) {