diff options
Diffstat (limited to 'core/string')
-rw-r--r-- | core/string/char_range.inc | 1456 | ||||
-rw-r--r-- | core/string/char_utils.h | 112 | ||||
-rw-r--r-- | core/string/locales.h | 1197 | ||||
-rw-r--r-- | core/string/node_path.cpp | 27 | ||||
-rw-r--r-- | core/string/node_path.h | 6 | ||||
-rw-r--r-- | core/string/optimized_translation.cpp | 14 | ||||
-rw-r--r-- | core/string/optimized_translation.h | 6 | ||||
-rw-r--r-- | core/string/print_string.cpp | 114 | ||||
-rw-r--r-- | core/string/print_string.h | 40 | ||||
-rw-r--r-- | core/string/string_buffer.h | 4 | ||||
-rw-r--r-- | core/string/string_builder.cpp | 6 | ||||
-rw-r--r-- | core/string/string_builder.h | 4 | ||||
-rw-r--r-- | core/string/string_name.cpp | 40 | ||||
-rw-r--r-- | core/string/string_name.h | 31 | ||||
-rw-r--r-- | core/string/translation.cpp | 1265 | ||||
-rw-r--r-- | core/string/translation.h | 46 | ||||
-rw-r--r-- | core/string/translation_po.cpp | 47 | ||||
-rw-r--r-- | core/string/translation_po.h | 4 | ||||
-rw-r--r-- | core/string/ucaps.h | 4 | ||||
-rw-r--r-- | core/string/ustring.cpp | 1059 | ||||
-rw-r--r-- | core/string/ustring.h | 64 |
21 files changed, 3994 insertions, 1552 deletions
diff --git a/core/string/char_range.inc b/core/string/char_range.inc new file mode 100644 index 0000000000..c0be9016ad --- /dev/null +++ b/core/string/char_range.inc @@ -0,0 +1,1456 @@ +/*************************************************************************/ +/* char_range.inc */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CHAR_RANGE_INC +#define CHAR_RANGE_INC + +#include "core/typedefs.h" + +struct CharRange { + char32_t start; + char32_t end; +}; + +static CharRange xid_start[] = { + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, + { 0xaa, 0xaa }, + { 0xb5, 0xb5 }, + { 0xba, 0xba }, + { 0xc0, 0xd6 }, + { 0xd8, 0xf6 }, + { 0xf8, 0x2c1 }, + { 0x2c6, 0x2d1 }, + { 0x2e0, 0x2e4 }, + { 0x2ec, 0x2ec }, + { 0x2ee, 0x2ee }, + { 0x370, 0x374 }, + { 0x376, 0x377 }, + { 0x37a, 0x37d }, + { 0x37f, 0x37f }, + { 0x386, 0x386 }, + { 0x388, 0x38a }, + { 0x38c, 0x38c }, + { 0x38e, 0x3a1 }, + { 0x3a3, 0x3f5 }, + { 0x3f7, 0x481 }, + { 0x48a, 0x52f }, + { 0x531, 0x556 }, + { 0x559, 0x559 }, + { 0x560, 0x588 }, + { 0x5d0, 0x5ea }, + { 0x5ef, 0x5f2 }, + { 0x620, 0x64a }, + { 0x66e, 0x66f }, + { 0x671, 0x6d3 }, + { 0x6d5, 0x6d5 }, + { 0x6e5, 0x6e6 }, + { 0x6ee, 0x6ef }, + { 0x6fa, 0x6fc }, + { 0x6ff, 0x6ff }, + { 0x710, 0x710 }, + { 0x712, 0x72f }, + { 0x74d, 0x7a5 }, + { 0x7b1, 0x7b1 }, + { 0x7ca, 0x7ea }, + { 0x7f4, 0x7f5 }, + { 0x7fa, 0x7fa }, + { 0x800, 0x815 }, + { 0x81a, 0x81a }, + { 0x824, 0x824 }, + { 0x828, 0x828 }, + { 0x840, 0x858 }, + { 0x860, 0x86a }, + { 0x870, 0x887 }, + { 0x889, 0x88e }, + { 0x8a0, 0x8c9 }, + { 0x904, 0x939 }, + { 0x93d, 0x93d }, + { 0x950, 0x950 }, + { 0x958, 0x961 }, + { 0x971, 0x980 }, + { 0x985, 0x98c }, + { 0x98f, 0x990 }, + { 0x993, 0x9a8 }, + { 0x9aa, 0x9b0 }, + { 0x9b2, 0x9b2 }, + { 0x9b6, 0x9b9 }, + { 0x9bd, 0x9bd }, + { 0x9ce, 0x9ce }, + { 0x9dc, 0x9dd }, + { 0x9df, 0x9e1 }, + { 0x9f0, 0x9f1 }, + { 0x9fc, 0x9fc }, + { 0xa05, 0xa0a }, + { 0xa0f, 0xa10 }, + { 0xa13, 0xa28 }, + { 0xa2a, 0xa30 }, + { 0xa32, 0xa33 }, + { 0xa35, 0xa36 }, + { 0xa38, 0xa39 }, + { 0xa59, 0xa5c }, + { 0xa5e, 0xa5e }, + { 0xa72, 0xa74 }, + { 0xa85, 0xa8d }, + { 0xa8f, 0xa91 }, + { 0xa93, 0xaa8 }, + { 0xaaa, 0xab0 }, + { 0xab2, 0xab3 }, + { 0xab5, 0xab9 }, + { 0xabd, 0xabd }, + { 0xad0, 0xad0 }, + { 0xae0, 0xae1 }, + { 0xaf9, 0xaf9 }, + { 0xb05, 0xb0c }, + { 0xb0f, 0xb10 }, + { 0xb13, 0xb28 }, + { 0xb2a, 0xb30 }, + { 0xb32, 0xb33 }, + { 0xb35, 0xb39 }, + { 0xb3d, 0xb3d }, + { 0xb5c, 0xb5d }, + { 0xb5f, 0xb61 }, + { 0xb71, 0xb71 }, + { 0xb83, 0xb83 }, + { 0xb85, 0xb8a }, + { 0xb8e, 0xb90 }, + { 0xb92, 0xb95 }, + { 0xb99, 0xb9a }, + { 0xb9c, 0xb9c }, + { 0xb9e, 0xb9f }, + { 0xba3, 0xba4 }, + { 0xba8, 0xbaa }, + { 0xbae, 0xbb9 }, + { 0xbd0, 0xbd0 }, + { 0xc05, 0xc0c }, + { 0xc0e, 0xc10 }, + { 0xc12, 0xc28 }, + { 0xc2a, 0xc39 }, + { 0xc3d, 0xc3d }, + { 0xc58, 0xc5a }, + { 0xc5d, 0xc5d }, + { 0xc60, 0xc61 }, + { 0xc80, 0xc80 }, + { 0xc85, 0xc8c }, + { 0xc8e, 0xc90 }, + { 0xc92, 0xca8 }, + { 0xcaa, 0xcb3 }, + { 0xcb5, 0xcb9 }, + { 0xcbd, 0xcbd }, + { 0xcdd, 0xcde }, + { 0xce0, 0xce1 }, + { 0xcf1, 0xcf2 }, + { 0xd04, 0xd0c }, + { 0xd0e, 0xd10 }, + { 0xd12, 0xd3a }, + { 0xd3d, 0xd3d }, + { 0xd4e, 0xd4e }, + { 0xd54, 0xd56 }, + { 0xd5f, 0xd61 }, + { 0xd7a, 0xd7f }, + { 0xd85, 0xd96 }, + { 0xd9a, 0xdb1 }, + { 0xdb3, 0xdbb }, + { 0xdbd, 0xdbd }, + { 0xdc0, 0xdc6 }, + { 0xe01, 0xe30 }, + { 0xe32, 0xe33 }, + { 0xe40, 0xe46 }, + { 0xe81, 0xe82 }, + { 0xe84, 0xe84 }, + { 0xe86, 0xe8a }, + { 0xe8c, 0xea3 }, + { 0xea5, 0xea5 }, + { 0xea7, 0xeb0 }, + { 0xeb2, 0xeb3 }, + { 0xebd, 0xebd }, + { 0xec0, 0xec4 }, + { 0xec6, 0xec6 }, + { 0xedc, 0xedf }, + { 0xf00, 0xf00 }, + { 0xf40, 0xf47 }, + { 0xf49, 0xf6c }, + { 0xf88, 0xf8c }, + { 0x1000, 0x102a }, + { 0x103f, 0x103f }, + { 0x1050, 0x1055 }, + { 0x105a, 0x105d }, + { 0x1061, 0x1061 }, + { 0x1065, 0x1066 }, + { 0x106e, 0x1070 }, + { 0x1075, 0x1081 }, + { 0x108e, 0x108e }, + { 0x10a0, 0x10c5 }, + { 0x10c7, 0x10c7 }, + { 0x10cd, 0x10cd }, + { 0x10d0, 0x10fa }, + { 0x10fc, 0x1248 }, + { 0x124a, 0x124d }, + { 0x1250, 0x1256 }, + { 0x1258, 0x1258 }, + { 0x125a, 0x125d }, + { 0x1260, 0x1288 }, + { 0x128a, 0x128d }, + { 0x1290, 0x12b0 }, + { 0x12b2, 0x12b5 }, + { 0x12b8, 0x12be }, + { 0x12c0, 0x12c0 }, + { 0x12c2, 0x12c5 }, + { 0x12c8, 0x12d6 }, + { 0x12d8, 0x1310 }, + { 0x1312, 0x1315 }, + { 0x1318, 0x135a }, + { 0x1380, 0x138f }, + { 0x13a0, 0x13f5 }, + { 0x13f8, 0x13fd }, + { 0x1401, 0x166c }, + { 0x166f, 0x167f }, + { 0x1681, 0x169a }, + { 0x16a0, 0x16ea }, + { 0x16ee, 0x16f8 }, + { 0x1700, 0x1711 }, + { 0x171f, 0x1731 }, + { 0x1740, 0x1751 }, + { 0x1760, 0x176c }, + { 0x176e, 0x1770 }, + { 0x1780, 0x17b3 }, + { 0x17d7, 0x17d7 }, + { 0x17dc, 0x17dc }, + { 0x1820, 0x1878 }, + { 0x1880, 0x1884 }, + { 0x1887, 0x18a8 }, + { 0x18aa, 0x18aa }, + { 0x18b0, 0x18f5 }, + { 0x1900, 0x191e }, + { 0x1950, 0x196d }, + { 0x1970, 0x1974 }, + { 0x1980, 0x19ab }, + { 0x19b0, 0x19c9 }, + { 0x1a00, 0x1a16 }, + { 0x1a20, 0x1a54 }, + { 0x1aa7, 0x1aa7 }, + { 0x1b05, 0x1b33 }, + { 0x1b45, 0x1b4c }, + { 0x1b83, 0x1ba0 }, + { 0x1bae, 0x1baf }, + { 0x1bba, 0x1be5 }, + { 0x1c00, 0x1c23 }, + { 0x1c4d, 0x1c4f }, + { 0x1c5a, 0x1c7d }, + { 0x1c80, 0x1c88 }, + { 0x1c90, 0x1cba }, + { 0x1cbd, 0x1cbf }, + { 0x1ce9, 0x1cec }, + { 0x1cee, 0x1cf3 }, + { 0x1cf5, 0x1cf6 }, + { 0x1cfa, 0x1cfa }, + { 0x1d00, 0x1dbf }, + { 0x1e00, 0x1f15 }, + { 0x1f18, 0x1f1d }, + { 0x1f20, 0x1f45 }, + { 0x1f48, 0x1f4d }, + { 0x1f50, 0x1f57 }, + { 0x1f59, 0x1f59 }, + { 0x1f5b, 0x1f5b }, + { 0x1f5d, 0x1f5d }, + { 0x1f5f, 0x1f7d }, + { 0x1f80, 0x1fb4 }, + { 0x1fb6, 0x1fbc }, + { 0x1fbe, 0x1fbe }, + { 0x1fc2, 0x1fc4 }, + { 0x1fc6, 0x1fcc }, + { 0x1fd0, 0x1fd3 }, + { 0x1fd6, 0x1fdb }, + { 0x1fe0, 0x1fec }, + { 0x1ff2, 0x1ff4 }, + { 0x1ff6, 0x1ffc }, + { 0x2071, 0x2071 }, + { 0x207f, 0x207f }, + { 0x2090, 0x209c }, + { 0x2102, 0x2102 }, + { 0x2107, 0x2107 }, + { 0x210a, 0x2113 }, + { 0x2115, 0x2115 }, + { 0x2118, 0x211d }, + { 0x2124, 0x2124 }, + { 0x2126, 0x2126 }, + { 0x2128, 0x2128 }, + { 0x212a, 0x2139 }, + { 0x213c, 0x213f }, + { 0x2145, 0x2149 }, + { 0x214e, 0x214e }, + { 0x2160, 0x2188 }, + { 0x2c00, 0x2ce4 }, + { 0x2ceb, 0x2cee }, + { 0x2cf2, 0x2cf3 }, + { 0x2d00, 0x2d25 }, + { 0x2d27, 0x2d27 }, + { 0x2d2d, 0x2d2d }, + { 0x2d30, 0x2d67 }, + { 0x2d6f, 0x2d6f }, + { 0x2d80, 0x2d96 }, + { 0x2da0, 0x2da6 }, + { 0x2da8, 0x2dae }, + { 0x2db0, 0x2db6 }, + { 0x2db8, 0x2dbe }, + { 0x2dc0, 0x2dc6 }, + { 0x2dc8, 0x2dce }, + { 0x2dd0, 0x2dd6 }, + { 0x2dd8, 0x2dde }, + { 0x3005, 0x3007 }, + { 0x3021, 0x3029 }, + { 0x3031, 0x3035 }, + { 0x3038, 0x303c }, + { 0x3041, 0x3096 }, + { 0x309b, 0x309f }, + { 0x30a1, 0x30fa }, + { 0x30fc, 0x30ff }, + { 0x3105, 0x312f }, + { 0x3131, 0x318e }, + { 0x31a0, 0x31bf }, + { 0x31f0, 0x31ff }, + { 0x3400, 0x4dbf }, + { 0x4e00, 0xa48c }, + { 0xa4d0, 0xa4fd }, + { 0xa500, 0xa60c }, + { 0xa610, 0xa61f }, + { 0xa62a, 0xa62b }, + { 0xa640, 0xa66e }, + { 0xa67f, 0xa69d }, + { 0xa6a0, 0xa6ef }, + { 0xa717, 0xa71f }, + { 0xa722, 0xa788 }, + { 0xa78b, 0xa7ca }, + { 0xa7d0, 0xa7d1 }, + { 0xa7d3, 0xa7d3 }, + { 0xa7d5, 0xa7d9 }, + { 0xa7f2, 0xa801 }, + { 0xa803, 0xa805 }, + { 0xa807, 0xa80a }, + { 0xa80c, 0xa822 }, + { 0xa840, 0xa873 }, + { 0xa882, 0xa8b3 }, + { 0xa8f2, 0xa8f7 }, + { 0xa8fb, 0xa8fb }, + { 0xa8fd, 0xa8fe }, + { 0xa90a, 0xa925 }, + { 0xa930, 0xa946 }, + { 0xa960, 0xa97c }, + { 0xa984, 0xa9b2 }, + { 0xa9cf, 0xa9cf }, + { 0xa9e0, 0xa9e4 }, + { 0xa9e6, 0xa9ef }, + { 0xa9fa, 0xa9fe }, + { 0xaa00, 0xaa28 }, + { 0xaa40, 0xaa42 }, + { 0xaa44, 0xaa4b }, + { 0xaa60, 0xaa76 }, + { 0xaa7a, 0xaa7a }, + { 0xaa7e, 0xaaaf }, + { 0xaab1, 0xaab1 }, + { 0xaab5, 0xaab6 }, + { 0xaab9, 0xaabd }, + { 0xaac0, 0xaac0 }, + { 0xaac2, 0xaac2 }, + { 0xaadb, 0xaadd }, + { 0xaae0, 0xaaea }, + { 0xaaf2, 0xaaf4 }, + { 0xab01, 0xab06 }, + { 0xab09, 0xab0e }, + { 0xab11, 0xab16 }, + { 0xab20, 0xab26 }, + { 0xab28, 0xab2e }, + { 0xab30, 0xab5a }, + { 0xab5c, 0xab69 }, + { 0xab70, 0xabe2 }, + { 0xac00, 0xd7a3 }, + { 0xd7b0, 0xd7c6 }, + { 0xd7cb, 0xd7fb }, + { 0xf900, 0xfa6d }, + { 0xfa70, 0xfad9 }, + { 0xfb00, 0xfb06 }, + { 0xfb13, 0xfb17 }, + { 0xfb1d, 0xfb1d }, + { 0xfb1f, 0xfb28 }, + { 0xfb2a, 0xfb36 }, + { 0xfb38, 0xfb3c }, + { 0xfb3e, 0xfb3e }, + { 0xfb40, 0xfb41 }, + { 0xfb43, 0xfb44 }, + { 0xfb46, 0xfbb1 }, + { 0xfbd3, 0xfd3d }, + { 0xfd50, 0xfd8f }, + { 0xfd92, 0xfdc7 }, + { 0xfdf0, 0xfdfb }, + { 0xfe70, 0xfe74 }, + { 0xfe76, 0xfefc }, + { 0xff21, 0xff3a }, + { 0xff41, 0xff5a }, + { 0xff66, 0xffbe }, + { 0xffc2, 0xffc7 }, + { 0xffca, 0xffcf }, + { 0xffd2, 0xffd7 }, + { 0xffda, 0xffdc }, + { 0x10000, 0x1000b }, + { 0x1000d, 0x10026 }, + { 0x10028, 0x1003a }, + { 0x1003c, 0x1003d }, + { 0x1003f, 0x1004d }, + { 0x10050, 0x1005d }, + { 0x10080, 0x100fa }, + { 0x10140, 0x10174 }, + { 0x10280, 0x1029c }, + { 0x102a0, 0x102d0 }, + { 0x10300, 0x1031f }, + { 0x1032d, 0x1034a }, + { 0x10350, 0x10375 }, + { 0x10380, 0x1039d }, + { 0x103a0, 0x103c3 }, + { 0x103c8, 0x103cf }, + { 0x103d1, 0x103d5 }, + { 0x10400, 0x1049d }, + { 0x104b0, 0x104d3 }, + { 0x104d8, 0x104fb }, + { 0x10500, 0x10527 }, + { 0x10530, 0x10563 }, + { 0x10570, 0x1057a }, + { 0x1057c, 0x1058a }, + { 0x1058c, 0x10592 }, + { 0x10594, 0x10595 }, + { 0x10597, 0x105a1 }, + { 0x105a3, 0x105b1 }, + { 0x105b3, 0x105b9 }, + { 0x105bb, 0x105bc }, + { 0x10600, 0x10736 }, + { 0x10740, 0x10755 }, + { 0x10760, 0x10767 }, + { 0x10780, 0x10785 }, + { 0x10787, 0x107b0 }, + { 0x107b2, 0x107ba }, + { 0x10800, 0x10805 }, + { 0x10808, 0x10808 }, + { 0x1080a, 0x10835 }, + { 0x10837, 0x10838 }, + { 0x1083c, 0x1083c }, + { 0x1083f, 0x10855 }, + { 0x10860, 0x10876 }, + { 0x10880, 0x1089e }, + { 0x108e0, 0x108f2 }, + { 0x108f4, 0x108f5 }, + { 0x10900, 0x10915 }, + { 0x10920, 0x10939 }, + { 0x10980, 0x109b7 }, + { 0x109be, 0x109bf }, + { 0x10a00, 0x10a00 }, + { 0x10a10, 0x10a13 }, + { 0x10a15, 0x10a17 }, + { 0x10a19, 0x10a35 }, + { 0x10a60, 0x10a7c }, + { 0x10a80, 0x10a9c }, + { 0x10ac0, 0x10ac7 }, + { 0x10ac9, 0x10ae4 }, + { 0x10b00, 0x10b35 }, + { 0x10b40, 0x10b55 }, + { 0x10b60, 0x10b72 }, + { 0x10b80, 0x10b91 }, + { 0x10c00, 0x10c48 }, + { 0x10c80, 0x10cb2 }, + { 0x10cc0, 0x10cf2 }, + { 0x10d00, 0x10d23 }, + { 0x10e80, 0x10ea9 }, + { 0x10eb0, 0x10eb1 }, + { 0x10f00, 0x10f1c }, + { 0x10f27, 0x10f27 }, + { 0x10f30, 0x10f45 }, + { 0x10f70, 0x10f81 }, + { 0x10fb0, 0x10fc4 }, + { 0x10fe0, 0x10ff6 }, + { 0x11003, 0x11037 }, + { 0x11071, 0x11072 }, + { 0x11075, 0x11075 }, + { 0x11083, 0x110af }, + { 0x110d0, 0x110e8 }, + { 0x11103, 0x11126 }, + { 0x11144, 0x11144 }, + { 0x11147, 0x11147 }, + { 0x11150, 0x11172 }, + { 0x11176, 0x11176 }, + { 0x11183, 0x111b2 }, + { 0x111c1, 0x111c4 }, + { 0x111da, 0x111da }, + { 0x111dc, 0x111dc }, + { 0x11200, 0x11211 }, + { 0x11213, 0x1122b }, + { 0x11280, 0x11286 }, + { 0x11288, 0x11288 }, + { 0x1128a, 0x1128d }, + { 0x1128f, 0x1129d }, + { 0x1129f, 0x112a8 }, + { 0x112b0, 0x112de }, + { 0x11305, 0x1130c }, + { 0x1130f, 0x11310 }, + { 0x11313, 0x11328 }, + { 0x1132a, 0x11330 }, + { 0x11332, 0x11333 }, + { 0x11335, 0x11339 }, + { 0x1133d, 0x1133d }, + { 0x11350, 0x11350 }, + { 0x1135d, 0x11361 }, + { 0x11400, 0x11434 }, + { 0x11447, 0x1144a }, + { 0x1145f, 0x11461 }, + { 0x11480, 0x114af }, + { 0x114c4, 0x114c5 }, + { 0x114c7, 0x114c7 }, + { 0x11580, 0x115ae }, + { 0x115d8, 0x115db }, + { 0x11600, 0x1162f }, + { 0x11644, 0x11644 }, + { 0x11680, 0x116aa }, + { 0x116b8, 0x116b8 }, + { 0x11700, 0x1171a }, + { 0x11740, 0x11746 }, + { 0x11800, 0x1182b }, + { 0x118a0, 0x118df }, + { 0x118ff, 0x11906 }, + { 0x11909, 0x11909 }, + { 0x1190c, 0x11913 }, + { 0x11915, 0x11916 }, + { 0x11918, 0x1192f }, + { 0x1193f, 0x1193f }, + { 0x11941, 0x11941 }, + { 0x119a0, 0x119a7 }, + { 0x119aa, 0x119d0 }, + { 0x119e1, 0x119e1 }, + { 0x119e3, 0x119e3 }, + { 0x11a00, 0x11a00 }, + { 0x11a0b, 0x11a32 }, + { 0x11a3a, 0x11a3a }, + { 0x11a50, 0x11a50 }, + { 0x11a5c, 0x11a89 }, + { 0x11a9d, 0x11a9d }, + { 0x11ab0, 0x11af8 }, + { 0x11c00, 0x11c08 }, + { 0x11c0a, 0x11c2e }, + { 0x11c40, 0x11c40 }, + { 0x11c72, 0x11c8f }, + { 0x11d00, 0x11d06 }, + { 0x11d08, 0x11d09 }, + { 0x11d0b, 0x11d30 }, + { 0x11d46, 0x11d46 }, + { 0x11d60, 0x11d65 }, + { 0x11d67, 0x11d68 }, + { 0x11d6a, 0x11d89 }, + { 0x11d98, 0x11d98 }, + { 0x11ee0, 0x11ef2 }, + { 0x11fb0, 0x11fb0 }, + { 0x12000, 0x12399 }, + { 0x12400, 0x1246e }, + { 0x12480, 0x12543 }, + { 0x12f90, 0x12ff0 }, + { 0x13000, 0x1342e }, + { 0x14400, 0x14646 }, + { 0x16800, 0x16a38 }, + { 0x16a40, 0x16a5e }, + { 0x16a70, 0x16abe }, + { 0x16ad0, 0x16aed }, + { 0x16b00, 0x16b2f }, + { 0x16b40, 0x16b43 }, + { 0x16b63, 0x16b77 }, + { 0x16b7d, 0x16b8f }, + { 0x16e40, 0x16e7f }, + { 0x16f00, 0x16f4a }, + { 0x16f50, 0x16f50 }, + { 0x16f93, 0x16f9f }, + { 0x16fe0, 0x16fe1 }, + { 0x16fe3, 0x16fe3 }, + { 0x17000, 0x187f7 }, + { 0x18800, 0x18cd5 }, + { 0x18d00, 0x18d08 }, + { 0x1aff0, 0x1aff3 }, + { 0x1aff5, 0x1affb }, + { 0x1affd, 0x1affe }, + { 0x1b000, 0x1b122 }, + { 0x1b150, 0x1b152 }, + { 0x1b164, 0x1b167 }, + { 0x1b170, 0x1b2fb }, + { 0x1bc00, 0x1bc6a }, + { 0x1bc70, 0x1bc7c }, + { 0x1bc80, 0x1bc88 }, + { 0x1bc90, 0x1bc99 }, + { 0x1d400, 0x1d454 }, + { 0x1d456, 0x1d49c }, + { 0x1d49e, 0x1d49f }, + { 0x1d4a2, 0x1d4a2 }, + { 0x1d4a5, 0x1d4a6 }, + { 0x1d4a9, 0x1d4ac }, + { 0x1d4ae, 0x1d4b9 }, + { 0x1d4bb, 0x1d4bb }, + { 0x1d4bd, 0x1d4c3 }, + { 0x1d4c5, 0x1d505 }, + { 0x1d507, 0x1d50a }, + { 0x1d50d, 0x1d514 }, + { 0x1d516, 0x1d51c }, + { 0x1d51e, 0x1d539 }, + { 0x1d53b, 0x1d53e }, + { 0x1d540, 0x1d544 }, + { 0x1d546, 0x1d546 }, + { 0x1d54a, 0x1d550 }, + { 0x1d552, 0x1d6a5 }, + { 0x1d6a8, 0x1d6c0 }, + { 0x1d6c2, 0x1d6da }, + { 0x1d6dc, 0x1d6fa }, + { 0x1d6fc, 0x1d714 }, + { 0x1d716, 0x1d734 }, + { 0x1d736, 0x1d74e }, + { 0x1d750, 0x1d76e }, + { 0x1d770, 0x1d788 }, + { 0x1d78a, 0x1d7a8 }, + { 0x1d7aa, 0x1d7c2 }, + { 0x1d7c4, 0x1d7cb }, + { 0x1df00, 0x1df1e }, + { 0x1e100, 0x1e12c }, + { 0x1e137, 0x1e13d }, + { 0x1e14e, 0x1e14e }, + { 0x1e290, 0x1e2ad }, + { 0x1e2c0, 0x1e2eb }, + { 0x1e7e0, 0x1e7e6 }, + { 0x1e7e8, 0x1e7eb }, + { 0x1e7ed, 0x1e7ee }, + { 0x1e7f0, 0x1e7fe }, + { 0x1e800, 0x1e8c4 }, + { 0x1e900, 0x1e943 }, + { 0x1e94b, 0x1e94b }, + { 0x1ee00, 0x1ee03 }, + { 0x1ee05, 0x1ee1f }, + { 0x1ee21, 0x1ee22 }, + { 0x1ee24, 0x1ee24 }, + { 0x1ee27, 0x1ee27 }, + { 0x1ee29, 0x1ee32 }, + { 0x1ee34, 0x1ee37 }, + { 0x1ee39, 0x1ee39 }, + { 0x1ee3b, 0x1ee3b }, + { 0x1ee42, 0x1ee42 }, + { 0x1ee47, 0x1ee47 }, + { 0x1ee49, 0x1ee49 }, + { 0x1ee4b, 0x1ee4b }, + { 0x1ee4d, 0x1ee4f }, + { 0x1ee51, 0x1ee52 }, + { 0x1ee54, 0x1ee54 }, + { 0x1ee57, 0x1ee57 }, + { 0x1ee59, 0x1ee59 }, + { 0x1ee5b, 0x1ee5b }, + { 0x1ee5d, 0x1ee5d }, + { 0x1ee5f, 0x1ee5f }, + { 0x1ee61, 0x1ee62 }, + { 0x1ee64, 0x1ee64 }, + { 0x1ee67, 0x1ee6a }, + { 0x1ee6c, 0x1ee72 }, + { 0x1ee74, 0x1ee77 }, + { 0x1ee79, 0x1ee7c }, + { 0x1ee7e, 0x1ee7e }, + { 0x1ee80, 0x1ee89 }, + { 0x1ee8b, 0x1ee9b }, + { 0x1eea1, 0x1eea3 }, + { 0x1eea5, 0x1eea9 }, + { 0x1eeab, 0x1eebb }, + { 0x20000, 0x2a6df }, + { 0x2a700, 0x2b738 }, + { 0x2b740, 0x2b81d }, + { 0x2b820, 0x2cea1 }, + { 0x2ceb0, 0x2ebe0 }, + { 0x2f800, 0x2fa1d }, + { 0x30000, 0x3134a }, + { 0x0, 0x0 }, +}; + +static CharRange xid_continue[] = { + { 0x30, 0x39 }, + { 0x41, 0x5a }, + { 0x5f, 0x5f }, + { 0x61, 0x7a }, + { 0xaa, 0xaa }, + { 0xb5, 0xb5 }, + { 0xb7, 0xb7 }, + { 0xba, 0xba }, + { 0xc0, 0xd6 }, + { 0xd8, 0xf6 }, + { 0xf8, 0x2c1 }, + { 0x2c6, 0x2d1 }, + { 0x2e0, 0x2e4 }, + { 0x2ec, 0x2ec }, + { 0x2ee, 0x2ee }, + { 0x300, 0x374 }, + { 0x376, 0x377 }, + { 0x37a, 0x37d }, + { 0x37f, 0x37f }, + { 0x386, 0x38a }, + { 0x38c, 0x38c }, + { 0x38e, 0x3a1 }, + { 0x3a3, 0x3f5 }, + { 0x3f7, 0x481 }, + { 0x483, 0x487 }, + { 0x48a, 0x52f }, + { 0x531, 0x556 }, + { 0x559, 0x559 }, + { 0x560, 0x588 }, + { 0x591, 0x5bd }, + { 0x5bf, 0x5bf }, + { 0x5c1, 0x5c2 }, + { 0x5c4, 0x5c5 }, + { 0x5c7, 0x5c7 }, + { 0x5d0, 0x5ea }, + { 0x5ef, 0x5f2 }, + { 0x610, 0x61a }, + { 0x620, 0x669 }, + { 0x66e, 0x6d3 }, + { 0x6d5, 0x6dc }, + { 0x6df, 0x6e8 }, + { 0x6ea, 0x6fc }, + { 0x6ff, 0x6ff }, + { 0x710, 0x74a }, + { 0x74d, 0x7b1 }, + { 0x7c0, 0x7f5 }, + { 0x7fa, 0x7fa }, + { 0x7fd, 0x7fd }, + { 0x800, 0x82d }, + { 0x840, 0x85b }, + { 0x860, 0x86a }, + { 0x870, 0x887 }, + { 0x889, 0x88e }, + { 0x898, 0x8e1 }, + { 0x8e3, 0x963 }, + { 0x966, 0x96f }, + { 0x971, 0x983 }, + { 0x985, 0x98c }, + { 0x98f, 0x990 }, + { 0x993, 0x9a8 }, + { 0x9aa, 0x9b0 }, + { 0x9b2, 0x9b2 }, + { 0x9b6, 0x9b9 }, + { 0x9bc, 0x9c4 }, + { 0x9c7, 0x9c8 }, + { 0x9cb, 0x9ce }, + { 0x9d7, 0x9d7 }, + { 0x9dc, 0x9dd }, + { 0x9df, 0x9e3 }, + { 0x9e6, 0x9f1 }, + { 0x9fc, 0x9fc }, + { 0x9fe, 0x9fe }, + { 0xa01, 0xa03 }, + { 0xa05, 0xa0a }, + { 0xa0f, 0xa10 }, + { 0xa13, 0xa28 }, + { 0xa2a, 0xa30 }, + { 0xa32, 0xa33 }, + { 0xa35, 0xa36 }, + { 0xa38, 0xa39 }, + { 0xa3c, 0xa3c }, + { 0xa3e, 0xa42 }, + { 0xa47, 0xa48 }, + { 0xa4b, 0xa4d }, + { 0xa51, 0xa51 }, + { 0xa59, 0xa5c }, + { 0xa5e, 0xa5e }, + { 0xa66, 0xa75 }, + { 0xa81, 0xa83 }, + { 0xa85, 0xa8d }, + { 0xa8f, 0xa91 }, + { 0xa93, 0xaa8 }, + { 0xaaa, 0xab0 }, + { 0xab2, 0xab3 }, + { 0xab5, 0xab9 }, + { 0xabc, 0xac5 }, + { 0xac7, 0xac9 }, + { 0xacb, 0xacd }, + { 0xad0, 0xad0 }, + { 0xae0, 0xae3 }, + { 0xae6, 0xaef }, + { 0xaf9, 0xaff }, + { 0xb01, 0xb03 }, + { 0xb05, 0xb0c }, + { 0xb0f, 0xb10 }, + { 0xb13, 0xb28 }, + { 0xb2a, 0xb30 }, + { 0xb32, 0xb33 }, + { 0xb35, 0xb39 }, + { 0xb3c, 0xb44 }, + { 0xb47, 0xb48 }, + { 0xb4b, 0xb4d }, + { 0xb55, 0xb57 }, + { 0xb5c, 0xb5d }, + { 0xb5f, 0xb63 }, + { 0xb66, 0xb6f }, + { 0xb71, 0xb71 }, + { 0xb82, 0xb83 }, + { 0xb85, 0xb8a }, + { 0xb8e, 0xb90 }, + { 0xb92, 0xb95 }, + { 0xb99, 0xb9a }, + { 0xb9c, 0xb9c }, + { 0xb9e, 0xb9f }, + { 0xba3, 0xba4 }, + { 0xba8, 0xbaa }, + { 0xbae, 0xbb9 }, + { 0xbbe, 0xbc2 }, + { 0xbc6, 0xbc8 }, + { 0xbca, 0xbcd }, + { 0xbd0, 0xbd0 }, + { 0xbd7, 0xbd7 }, + { 0xbe6, 0xbef }, + { 0xc00, 0xc0c }, + { 0xc0e, 0xc10 }, + { 0xc12, 0xc28 }, + { 0xc2a, 0xc39 }, + { 0xc3c, 0xc44 }, + { 0xc46, 0xc48 }, + { 0xc4a, 0xc4d }, + { 0xc55, 0xc56 }, + { 0xc58, 0xc5a }, + { 0xc5d, 0xc5d }, + { 0xc60, 0xc63 }, + { 0xc66, 0xc6f }, + { 0xc80, 0xc83 }, + { 0xc85, 0xc8c }, + { 0xc8e, 0xc90 }, + { 0xc92, 0xca8 }, + { 0xcaa, 0xcb3 }, + { 0xcb5, 0xcb9 }, + { 0xcbc, 0xcc4 }, + { 0xcc6, 0xcc8 }, + { 0xcca, 0xccd }, + { 0xcd5, 0xcd6 }, + { 0xcdd, 0xcde }, + { 0xce0, 0xce3 }, + { 0xce6, 0xcef }, + { 0xcf1, 0xcf2 }, + { 0xd00, 0xd0c }, + { 0xd0e, 0xd10 }, + { 0xd12, 0xd44 }, + { 0xd46, 0xd48 }, + { 0xd4a, 0xd4e }, + { 0xd54, 0xd57 }, + { 0xd5f, 0xd63 }, + { 0xd66, 0xd6f }, + { 0xd7a, 0xd7f }, + { 0xd81, 0xd83 }, + { 0xd85, 0xd96 }, + { 0xd9a, 0xdb1 }, + { 0xdb3, 0xdbb }, + { 0xdbd, 0xdbd }, + { 0xdc0, 0xdc6 }, + { 0xdca, 0xdca }, + { 0xdcf, 0xdd4 }, + { 0xdd6, 0xdd6 }, + { 0xdd8, 0xddf }, + { 0xde6, 0xdef }, + { 0xdf2, 0xdf3 }, + { 0xe01, 0xe3a }, + { 0xe40, 0xe4e }, + { 0xe50, 0xe59 }, + { 0xe81, 0xe82 }, + { 0xe84, 0xe84 }, + { 0xe86, 0xe8a }, + { 0xe8c, 0xea3 }, + { 0xea5, 0xea5 }, + { 0xea7, 0xebd }, + { 0xec0, 0xec4 }, + { 0xec6, 0xec6 }, + { 0xec8, 0xecd }, + { 0xed0, 0xed9 }, + { 0xedc, 0xedf }, + { 0xf00, 0xf00 }, + { 0xf18, 0xf19 }, + { 0xf20, 0xf29 }, + { 0xf35, 0xf35 }, + { 0xf37, 0xf37 }, + { 0xf39, 0xf39 }, + { 0xf3e, 0xf47 }, + { 0xf49, 0xf6c }, + { 0xf71, 0xf84 }, + { 0xf86, 0xf97 }, + { 0xf99, 0xfbc }, + { 0xfc6, 0xfc6 }, + { 0x1000, 0x1049 }, + { 0x1050, 0x109d }, + { 0x10a0, 0x10c5 }, + { 0x10c7, 0x10c7 }, + { 0x10cd, 0x10cd }, + { 0x10d0, 0x10fa }, + { 0x10fc, 0x1248 }, + { 0x124a, 0x124d }, + { 0x1250, 0x1256 }, + { 0x1258, 0x1258 }, + { 0x125a, 0x125d }, + { 0x1260, 0x1288 }, + { 0x128a, 0x128d }, + { 0x1290, 0x12b0 }, + { 0x12b2, 0x12b5 }, + { 0x12b8, 0x12be }, + { 0x12c0, 0x12c0 }, + { 0x12c2, 0x12c5 }, + { 0x12c8, 0x12d6 }, + { 0x12d8, 0x1310 }, + { 0x1312, 0x1315 }, + { 0x1318, 0x135a }, + { 0x135d, 0x135f }, + { 0x1369, 0x1369 }, + { 0x1371, 0x1371 }, + { 0x1380, 0x138f }, + { 0x13a0, 0x13f5 }, + { 0x13f8, 0x13fd }, + { 0x1401, 0x166c }, + { 0x166f, 0x167f }, + { 0x1681, 0x169a }, + { 0x16a0, 0x16ea }, + { 0x16ee, 0x16f8 }, + { 0x1700, 0x1715 }, + { 0x171f, 0x1734 }, + { 0x1740, 0x1753 }, + { 0x1760, 0x176c }, + { 0x176e, 0x1770 }, + { 0x1772, 0x1773 }, + { 0x1780, 0x17d3 }, + { 0x17d7, 0x17d7 }, + { 0x17dc, 0x17dd }, + { 0x17e0, 0x17e9 }, + { 0x180b, 0x180d }, + { 0x180f, 0x1819 }, + { 0x1820, 0x1878 }, + { 0x1880, 0x18aa }, + { 0x18b0, 0x18f5 }, + { 0x1900, 0x191e }, + { 0x1920, 0x192b }, + { 0x1930, 0x193b }, + { 0x1946, 0x196d }, + { 0x1970, 0x1974 }, + { 0x1980, 0x19ab }, + { 0x19b0, 0x19c9 }, + { 0x19d0, 0x19da }, + { 0x1a00, 0x1a1b }, + { 0x1a20, 0x1a5e }, + { 0x1a60, 0x1a7c }, + { 0x1a7f, 0x1a89 }, + { 0x1a90, 0x1a99 }, + { 0x1aa7, 0x1aa7 }, + { 0x1ab0, 0x1abd }, + { 0x1abf, 0x1ace }, + { 0x1b00, 0x1b4c }, + { 0x1b50, 0x1b59 }, + { 0x1b6b, 0x1b73 }, + { 0x1b80, 0x1bf3 }, + { 0x1c00, 0x1c37 }, + { 0x1c40, 0x1c49 }, + { 0x1c4d, 0x1c7d }, + { 0x1c80, 0x1c88 }, + { 0x1c90, 0x1cba }, + { 0x1cbd, 0x1cbf }, + { 0x1cd0, 0x1cd2 }, + { 0x1cd4, 0x1cfa }, + { 0x1d00, 0x1f15 }, + { 0x1f18, 0x1f1d }, + { 0x1f20, 0x1f45 }, + { 0x1f48, 0x1f4d }, + { 0x1f50, 0x1f57 }, + { 0x1f59, 0x1f59 }, + { 0x1f5b, 0x1f5b }, + { 0x1f5d, 0x1f5d }, + { 0x1f5f, 0x1f7d }, + { 0x1f80, 0x1fb4 }, + { 0x1fb6, 0x1fbc }, + { 0x1fbe, 0x1fbe }, + { 0x1fc2, 0x1fc4 }, + { 0x1fc6, 0x1fcc }, + { 0x1fd0, 0x1fd3 }, + { 0x1fd6, 0x1fdb }, + { 0x1fe0, 0x1fec }, + { 0x1ff2, 0x1ff4 }, + { 0x1ff6, 0x1ffc }, + { 0x203f, 0x2040 }, + { 0x2054, 0x2054 }, + { 0x2071, 0x2071 }, + { 0x207f, 0x207f }, + { 0x2090, 0x209c }, + { 0x20d0, 0x20dc }, + { 0x20e1, 0x20e1 }, + { 0x20e5, 0x20f0 }, + { 0x2102, 0x2102 }, + { 0x2107, 0x2107 }, + { 0x210a, 0x2113 }, + { 0x2115, 0x2115 }, + { 0x2118, 0x211d }, + { 0x2124, 0x2124 }, + { 0x2126, 0x2126 }, + { 0x2128, 0x2128 }, + { 0x212a, 0x2139 }, + { 0x213c, 0x213f }, + { 0x2145, 0x2149 }, + { 0x214e, 0x214e }, + { 0x2160, 0x2188 }, + { 0x2c00, 0x2ce4 }, + { 0x2ceb, 0x2cf3 }, + { 0x2d00, 0x2d25 }, + { 0x2d27, 0x2d27 }, + { 0x2d2d, 0x2d2d }, + { 0x2d30, 0x2d67 }, + { 0x2d6f, 0x2d6f }, + { 0x2d7f, 0x2d96 }, + { 0x2da0, 0x2da6 }, + { 0x2da8, 0x2dae }, + { 0x2db0, 0x2db6 }, + { 0x2db8, 0x2dbe }, + { 0x2dc0, 0x2dc6 }, + { 0x2dc8, 0x2dce }, + { 0x2dd0, 0x2dd6 }, + { 0x2dd8, 0x2dde }, + { 0x2de0, 0x2dff }, + { 0x3005, 0x3007 }, + { 0x3021, 0x302f }, + { 0x3031, 0x3035 }, + { 0x3038, 0x303c }, + { 0x3041, 0x3096 }, + { 0x3099, 0x309f }, + { 0x30a1, 0x30fa }, + { 0x30fc, 0x30ff }, + { 0x3105, 0x312f }, + { 0x3131, 0x318e }, + { 0x31a0, 0x31bf }, + { 0x31f0, 0x31ff }, + { 0x3400, 0x4dbf }, + { 0x4e00, 0xa48c }, + { 0xa4d0, 0xa4fd }, + { 0xa500, 0xa60c }, + { 0xa610, 0xa62b }, + { 0xa640, 0xa66f }, + { 0xa674, 0xa67d }, + { 0xa67f, 0xa6f1 }, + { 0xa717, 0xa71f }, + { 0xa722, 0xa788 }, + { 0xa78b, 0xa7ca }, + { 0xa7d0, 0xa7d1 }, + { 0xa7d3, 0xa7d3 }, + { 0xa7d5, 0xa7d9 }, + { 0xa7f2, 0xa827 }, + { 0xa82c, 0xa82c }, + { 0xa840, 0xa873 }, + { 0xa880, 0xa8c5 }, + { 0xa8d0, 0xa8d9 }, + { 0xa8e0, 0xa8f7 }, + { 0xa8fb, 0xa8fb }, + { 0xa8fd, 0xa92d }, + { 0xa930, 0xa953 }, + { 0xa960, 0xa97c }, + { 0xa980, 0xa9c0 }, + { 0xa9cf, 0xa9d9 }, + { 0xa9e0, 0xa9fe }, + { 0xaa00, 0xaa36 }, + { 0xaa40, 0xaa4d }, + { 0xaa50, 0xaa59 }, + { 0xaa60, 0xaa76 }, + { 0xaa7a, 0xaac2 }, + { 0xaadb, 0xaadd }, + { 0xaae0, 0xaaef }, + { 0xaaf2, 0xaaf6 }, + { 0xab01, 0xab06 }, + { 0xab09, 0xab0e }, + { 0xab11, 0xab16 }, + { 0xab20, 0xab26 }, + { 0xab28, 0xab2e }, + { 0xab30, 0xab5a }, + { 0xab5c, 0xab69 }, + { 0xab70, 0xabea }, + { 0xabec, 0xabed }, + { 0xabf0, 0xabf9 }, + { 0xac00, 0xd7a3 }, + { 0xd7b0, 0xd7c6 }, + { 0xd7cb, 0xd7fb }, + { 0xf900, 0xfa6d }, + { 0xfa70, 0xfad9 }, + { 0xfb00, 0xfb06 }, + { 0xfb13, 0xfb17 }, + { 0xfb1d, 0xfb28 }, + { 0xfb2a, 0xfb36 }, + { 0xfb38, 0xfb3c }, + { 0xfb3e, 0xfb3e }, + { 0xfb40, 0xfb41 }, + { 0xfb43, 0xfb44 }, + { 0xfb46, 0xfbb1 }, + { 0xfbd3, 0xfd3d }, + { 0xfd50, 0xfd8f }, + { 0xfd92, 0xfdc7 }, + { 0xfdf0, 0xfdfb }, + { 0xfe00, 0xfe0f }, + { 0xfe20, 0xfe2f }, + { 0xfe33, 0xfe34 }, + { 0xfe4d, 0xfe4f }, + { 0xfe70, 0xfe74 }, + { 0xfe76, 0xfefc }, + { 0xff10, 0xff19 }, + { 0xff21, 0xff3a }, + { 0xff3f, 0xff3f }, + { 0xff41, 0xff5a }, + { 0xff66, 0xffbe }, + { 0xffc2, 0xffc7 }, + { 0xffca, 0xffcf }, + { 0xffd2, 0xffd7 }, + { 0xffda, 0xffdc }, + { 0x10000, 0x1000b }, + { 0x1000d, 0x10026 }, + { 0x10028, 0x1003a }, + { 0x1003c, 0x1003d }, + { 0x1003f, 0x1004d }, + { 0x10050, 0x1005d }, + { 0x10080, 0x100fa }, + { 0x10140, 0x10174 }, + { 0x101fd, 0x101fd }, + { 0x10280, 0x1029c }, + { 0x102a0, 0x102d0 }, + { 0x102e0, 0x102e0 }, + { 0x10300, 0x1031f }, + { 0x1032d, 0x1034a }, + { 0x10350, 0x1037a }, + { 0x10380, 0x1039d }, + { 0x103a0, 0x103c3 }, + { 0x103c8, 0x103cf }, + { 0x103d1, 0x103d5 }, + { 0x10400, 0x1049d }, + { 0x104a0, 0x104a9 }, + { 0x104b0, 0x104d3 }, + { 0x104d8, 0x104fb }, + { 0x10500, 0x10527 }, + { 0x10530, 0x10563 }, + { 0x10570, 0x1057a }, + { 0x1057c, 0x1058a }, + { 0x1058c, 0x10592 }, + { 0x10594, 0x10595 }, + { 0x10597, 0x105a1 }, + { 0x105a3, 0x105b1 }, + { 0x105b3, 0x105b9 }, + { 0x105bb, 0x105bc }, + { 0x10600, 0x10736 }, + { 0x10740, 0x10755 }, + { 0x10760, 0x10767 }, + { 0x10780, 0x10785 }, + { 0x10787, 0x107b0 }, + { 0x107b2, 0x107ba }, + { 0x10800, 0x10805 }, + { 0x10808, 0x10808 }, + { 0x1080a, 0x10835 }, + { 0x10837, 0x10838 }, + { 0x1083c, 0x1083c }, + { 0x1083f, 0x10855 }, + { 0x10860, 0x10876 }, + { 0x10880, 0x1089e }, + { 0x108e0, 0x108f2 }, + { 0x108f4, 0x108f5 }, + { 0x10900, 0x10915 }, + { 0x10920, 0x10939 }, + { 0x10980, 0x109b7 }, + { 0x109be, 0x109bf }, + { 0x10a00, 0x10a03 }, + { 0x10a05, 0x10a06 }, + { 0x10a0c, 0x10a13 }, + { 0x10a15, 0x10a17 }, + { 0x10a19, 0x10a35 }, + { 0x10a38, 0x10a3a }, + { 0x10a3f, 0x10a3f }, + { 0x10a60, 0x10a7c }, + { 0x10a80, 0x10a9c }, + { 0x10ac0, 0x10ac7 }, + { 0x10ac9, 0x10ae6 }, + { 0x10b00, 0x10b35 }, + { 0x10b40, 0x10b55 }, + { 0x10b60, 0x10b72 }, + { 0x10b80, 0x10b91 }, + { 0x10c00, 0x10c48 }, + { 0x10c80, 0x10cb2 }, + { 0x10cc0, 0x10cf2 }, + { 0x10d00, 0x10d27 }, + { 0x10d30, 0x10d39 }, + { 0x10e80, 0x10ea9 }, + { 0x10eab, 0x10eac }, + { 0x10eb0, 0x10eb1 }, + { 0x10f00, 0x10f1c }, + { 0x10f27, 0x10f27 }, + { 0x10f30, 0x10f50 }, + { 0x10f70, 0x10f85 }, + { 0x10fb0, 0x10fc4 }, + { 0x10fe0, 0x10ff6 }, + { 0x11000, 0x11046 }, + { 0x11066, 0x11075 }, + { 0x1107f, 0x110ba }, + { 0x110c2, 0x110c2 }, + { 0x110d0, 0x110e8 }, + { 0x110f0, 0x110f9 }, + { 0x11100, 0x11134 }, + { 0x11136, 0x1113f }, + { 0x11144, 0x11147 }, + { 0x11150, 0x11173 }, + { 0x11176, 0x11176 }, + { 0x11180, 0x111c4 }, + { 0x111c9, 0x111cc }, + { 0x111ce, 0x111da }, + { 0x111dc, 0x111dc }, + { 0x11200, 0x11211 }, + { 0x11213, 0x11237 }, + { 0x1123e, 0x1123e }, + { 0x11280, 0x11286 }, + { 0x11288, 0x11288 }, + { 0x1128a, 0x1128d }, + { 0x1128f, 0x1129d }, + { 0x1129f, 0x112a8 }, + { 0x112b0, 0x112ea }, + { 0x112f0, 0x112f9 }, + { 0x11300, 0x11303 }, + { 0x11305, 0x1130c }, + { 0x1130f, 0x11310 }, + { 0x11313, 0x11328 }, + { 0x1132a, 0x11330 }, + { 0x11332, 0x11333 }, + { 0x11335, 0x11339 }, + { 0x1133b, 0x11344 }, + { 0x11347, 0x11348 }, + { 0x1134b, 0x1134d }, + { 0x11350, 0x11350 }, + { 0x11357, 0x11357 }, + { 0x1135d, 0x11363 }, + { 0x11366, 0x1136c }, + { 0x11370, 0x11374 }, + { 0x11400, 0x1144a }, + { 0x11450, 0x11459 }, + { 0x1145e, 0x11461 }, + { 0x11480, 0x114c5 }, + { 0x114c7, 0x114c7 }, + { 0x114d0, 0x114d9 }, + { 0x11580, 0x115b5 }, + { 0x115b8, 0x115c0 }, + { 0x115d8, 0x115dd }, + { 0x11600, 0x11640 }, + { 0x11644, 0x11644 }, + { 0x11650, 0x11659 }, + { 0x11680, 0x116b8 }, + { 0x116c0, 0x116c9 }, + { 0x11700, 0x1171a }, + { 0x1171d, 0x1172b }, + { 0x11730, 0x11739 }, + { 0x11740, 0x11746 }, + { 0x11800, 0x1183a }, + { 0x118a0, 0x118e9 }, + { 0x118ff, 0x11906 }, + { 0x11909, 0x11909 }, + { 0x1190c, 0x11913 }, + { 0x11915, 0x11916 }, + { 0x11918, 0x11935 }, + { 0x11937, 0x11938 }, + { 0x1193b, 0x11943 }, + { 0x11950, 0x11959 }, + { 0x119a0, 0x119a7 }, + { 0x119aa, 0x119d7 }, + { 0x119da, 0x119e1 }, + { 0x119e3, 0x119e4 }, + { 0x11a00, 0x11a3e }, + { 0x11a47, 0x11a47 }, + { 0x11a50, 0x11a99 }, + { 0x11a9d, 0x11a9d }, + { 0x11ab0, 0x11af8 }, + { 0x11c00, 0x11c08 }, + { 0x11c0a, 0x11c36 }, + { 0x11c38, 0x11c40 }, + { 0x11c50, 0x11c59 }, + { 0x11c72, 0x11c8f }, + { 0x11c92, 0x11ca7 }, + { 0x11ca9, 0x11cb6 }, + { 0x11d00, 0x11d06 }, + { 0x11d08, 0x11d09 }, + { 0x11d0b, 0x11d36 }, + { 0x11d3a, 0x11d3a }, + { 0x11d3c, 0x11d3d }, + { 0x11d3f, 0x11d47 }, + { 0x11d50, 0x11d59 }, + { 0x11d60, 0x11d65 }, + { 0x11d67, 0x11d68 }, + { 0x11d6a, 0x11d8e }, + { 0x11d90, 0x11d91 }, + { 0x11d93, 0x11d98 }, + { 0x11da0, 0x11da9 }, + { 0x11ee0, 0x11ef6 }, + { 0x11fb0, 0x11fb0 }, + { 0x12000, 0x12399 }, + { 0x12400, 0x1246e }, + { 0x12480, 0x12543 }, + { 0x12f90, 0x12ff0 }, + { 0x13000, 0x1342e }, + { 0x14400, 0x14646 }, + { 0x16800, 0x16a38 }, + { 0x16a40, 0x16a5e }, + { 0x16a60, 0x16a69 }, + { 0x16a70, 0x16abe }, + { 0x16ac0, 0x16ac9 }, + { 0x16ad0, 0x16aed }, + { 0x16af0, 0x16af4 }, + { 0x16b00, 0x16b36 }, + { 0x16b40, 0x16b43 }, + { 0x16b50, 0x16b59 }, + { 0x16b63, 0x16b77 }, + { 0x16b7d, 0x16b8f }, + { 0x16e40, 0x16e7f }, + { 0x16f00, 0x16f4a }, + { 0x16f4f, 0x16f87 }, + { 0x16f8f, 0x16f9f }, + { 0x16fe0, 0x16fe1 }, + { 0x16fe3, 0x16fe4 }, + { 0x16ff0, 0x16ff1 }, + { 0x17000, 0x187f7 }, + { 0x18800, 0x18cd5 }, + { 0x18d00, 0x18d08 }, + { 0x1aff0, 0x1aff3 }, + { 0x1aff5, 0x1affb }, + { 0x1affd, 0x1affe }, + { 0x1b000, 0x1b122 }, + { 0x1b150, 0x1b152 }, + { 0x1b164, 0x1b167 }, + { 0x1b170, 0x1b2fb }, + { 0x1bc00, 0x1bc6a }, + { 0x1bc70, 0x1bc7c }, + { 0x1bc80, 0x1bc88 }, + { 0x1bc90, 0x1bc99 }, + { 0x1bc9d, 0x1bc9e }, + { 0x1cf00, 0x1cf2d }, + { 0x1cf30, 0x1cf46 }, + { 0x1d165, 0x1d169 }, + { 0x1d16d, 0x1d172 }, + { 0x1d17b, 0x1d182 }, + { 0x1d185, 0x1d18b }, + { 0x1d1aa, 0x1d1ad }, + { 0x1d242, 0x1d244 }, + { 0x1d400, 0x1d454 }, + { 0x1d456, 0x1d49c }, + { 0x1d49e, 0x1d49f }, + { 0x1d4a2, 0x1d4a2 }, + { 0x1d4a5, 0x1d4a6 }, + { 0x1d4a9, 0x1d4ac }, + { 0x1d4ae, 0x1d4b9 }, + { 0x1d4bb, 0x1d4bb }, + { 0x1d4bd, 0x1d4c3 }, + { 0x1d4c5, 0x1d505 }, + { 0x1d507, 0x1d50a }, + { 0x1d50d, 0x1d514 }, + { 0x1d516, 0x1d51c }, + { 0x1d51e, 0x1d539 }, + { 0x1d53b, 0x1d53e }, + { 0x1d540, 0x1d544 }, + { 0x1d546, 0x1d546 }, + { 0x1d54a, 0x1d550 }, + { 0x1d552, 0x1d6a5 }, + { 0x1d6a8, 0x1d6c0 }, + { 0x1d6c2, 0x1d6da }, + { 0x1d6dc, 0x1d6fa }, + { 0x1d6fc, 0x1d714 }, + { 0x1d716, 0x1d734 }, + { 0x1d736, 0x1d74e }, + { 0x1d750, 0x1d76e }, + { 0x1d770, 0x1d788 }, + { 0x1d78a, 0x1d7a8 }, + { 0x1d7aa, 0x1d7c2 }, + { 0x1d7c4, 0x1d7cb }, + { 0x1d7ce, 0x1d7ff }, + { 0x1da00, 0x1da36 }, + { 0x1da3b, 0x1da6c }, + { 0x1da75, 0x1da75 }, + { 0x1da84, 0x1da84 }, + { 0x1da9b, 0x1da9f }, + { 0x1daa1, 0x1daaf }, + { 0x1df00, 0x1df1e }, + { 0x1e000, 0x1e006 }, + { 0x1e008, 0x1e018 }, + { 0x1e01b, 0x1e021 }, + { 0x1e023, 0x1e024 }, + { 0x1e026, 0x1e02a }, + { 0x1e100, 0x1e12c }, + { 0x1e130, 0x1e13d }, + { 0x1e140, 0x1e149 }, + { 0x1e14e, 0x1e14e }, + { 0x1e290, 0x1e2ae }, + { 0x1e2c0, 0x1e2f9 }, + { 0x1e7e0, 0x1e7e6 }, + { 0x1e7e8, 0x1e7eb }, + { 0x1e7ed, 0x1e7ee }, + { 0x1e7f0, 0x1e7fe }, + { 0x1e800, 0x1e8c4 }, + { 0x1e8d0, 0x1e8d6 }, + { 0x1e900, 0x1e94b }, + { 0x1e950, 0x1e959 }, + { 0x1ee00, 0x1ee03 }, + { 0x1ee05, 0x1ee1f }, + { 0x1ee21, 0x1ee22 }, + { 0x1ee24, 0x1ee24 }, + { 0x1ee27, 0x1ee27 }, + { 0x1ee29, 0x1ee32 }, + { 0x1ee34, 0x1ee37 }, + { 0x1ee39, 0x1ee39 }, + { 0x1ee3b, 0x1ee3b }, + { 0x1ee42, 0x1ee42 }, + { 0x1ee47, 0x1ee47 }, + { 0x1ee49, 0x1ee49 }, + { 0x1ee4b, 0x1ee4b }, + { 0x1ee4d, 0x1ee4f }, + { 0x1ee51, 0x1ee52 }, + { 0x1ee54, 0x1ee54 }, + { 0x1ee57, 0x1ee57 }, + { 0x1ee59, 0x1ee59 }, + { 0x1ee5b, 0x1ee5b }, + { 0x1ee5d, 0x1ee5d }, + { 0x1ee5f, 0x1ee5f }, + { 0x1ee61, 0x1ee62 }, + { 0x1ee64, 0x1ee64 }, + { 0x1ee67, 0x1ee6a }, + { 0x1ee6c, 0x1ee72 }, + { 0x1ee74, 0x1ee77 }, + { 0x1ee79, 0x1ee7c }, + { 0x1ee7e, 0x1ee7e }, + { 0x1ee80, 0x1ee89 }, + { 0x1ee8b, 0x1ee9b }, + { 0x1eea1, 0x1eea3 }, + { 0x1eea5, 0x1eea9 }, + { 0x1eeab, 0x1eebb }, + { 0x1fbf0, 0x1fbf9 }, + { 0x20000, 0x2a6df }, + { 0x2a700, 0x2b738 }, + { 0x2b740, 0x2b81d }, + { 0x2b820, 0x2cea1 }, + { 0x2ceb0, 0x2ebe0 }, + { 0x2f800, 0x2fa1d }, + { 0x30000, 0x3134a }, + { 0xe0100, 0xe01ef }, + { 0x0, 0x0 }, +}; + +#endif // CHAR_RANGE_INC diff --git a/core/string/char_utils.h b/core/string/char_utils.h new file mode 100644 index 0000000000..67147a4327 --- /dev/null +++ b/core/string/char_utils.h @@ -0,0 +1,112 @@ +/*************************************************************************/ +/* char_utils.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CHAR_UTILS_H +#define CHAR_UTILS_H + +#include "core/typedefs.h" + +#include "char_range.inc" + +static _FORCE_INLINE_ bool is_unicode_identifier_start(char32_t c) { + for (int i = 0; xid_start[i].start != 0; i++) { + if (c >= xid_start[i].start && c <= xid_start[i].end) { + return true; + } + } + return false; +} + +static _FORCE_INLINE_ bool is_unicode_identifier_continue(char32_t c) { + for (int i = 0; xid_continue[i].start != 0; i++) { + if (c >= xid_continue[i].start && c <= xid_continue[i].end) { + return true; + } + } + return false; +} + +static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) { + return (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) { + return (c >= 'a' && c <= 'z'); +} + +static _FORCE_INLINE_ bool is_digit(char32_t c) { + return (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { + return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); +} + +static _FORCE_INLINE_ bool is_binary_digit(char32_t c) { + return (c == '0' || c == '1'); +} + +static _FORCE_INLINE_ bool is_ascii_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +static _FORCE_INLINE_ bool is_symbol(char32_t c) { + return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); +} + +static _FORCE_INLINE_ bool is_control(char32_t p_char) { + return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); +} + +static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) { + return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); +} + +static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) { + return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); +} + +static _FORCE_INLINE_ bool is_punct(char32_t p_char) { + return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); +} + +static _FORCE_INLINE_ bool is_underscore(char32_t p_char) { + return (p_char == '_'); +} + +#endif // CHAR_UTILS_H diff --git a/core/string/locales.h b/core/string/locales.h new file mode 100644 index 0000000000..32d6608ec2 --- /dev/null +++ b/core/string/locales.h @@ -0,0 +1,1197 @@ +/*************************************************************************/ +/* locales.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef LOCALES_H +#define LOCALES_H + +// Windows has some weird locale identifiers which do not honor the ISO 639-1 +// standardized nomenclature. Whenever those don't conflict with existing ISO +// identifiers, we override them. +// +// Reference: +// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx + +static const char *locale_renames[][2] = { + { "in", "id" }, // Indonesian + { "iw", "he" }, // Hebrew + { "no", "nb" }, // Norwegian Bokmål + { "C", "en" }, // Locale is not set, fallback to English. + { nullptr, nullptr } +}; + +// Additional script information to preferred scripts. +// Language code, script code, default country, supported countries. +// Reference: +// - https://lh.2xlibre.net/locales/ +// - https://www.localeplanet.com/icu/index.html +// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f + +static const char *locale_scripts[][4] = { + { "az", "Latn", "", "AZ" }, + { "az", "Arab", "", "IR" }, + { "bs", "Latn", "", "BA" }, + { "ff", "Latn", "", "BF,CM,GH,GM,GN,GW,LR,MR,NE,NG,SL,SN" }, + { "pa", "Arab", "PK", "PK" }, + { "pa", "Guru", "IN", "IN" }, + { "sd", "Arab", "PK", "PK" }, + { "sd", "Deva", "IN", "IN" }, + { "shi", "Tfng", "", "MA" }, + { "sr", "Cyrl", "", "BA,RS,XK" }, + { "sr", "Latn", "", "ME" }, + { "uz", "Latn", "", "UZ" }, + { "uz", "Arab", "AF", "AF" }, + { "vai", "Vaii", "", "LR" }, + { "yue", "Hans", "CN", "CN" }, + { "yue", "Hant", "HK", "HK" }, + { "zh", "Hans", "CN", "CN,SG" }, + { "zh", "Hant", "TW", "HK,MO,TW" }, + { nullptr, nullptr, nullptr, nullptr } +}; + +// Additional mapping for outdated, temporary or exceptionally reserved country codes. +// Reference: +// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 +// - https://www.iso.org/obp/ui/#search/code/ + +static const char *country_renames[][2] = { + { "BU", "MM" }, // Burma, name changed to Myanmar. + { "KV", "XK" }, // Kosovo (temporary FIPS code to European Commission code), no official ISO code assigned. + { "TP", "TL" }, // East Timor, name changed to Timor-Leste. + { "UK", "GB" }, // United Kingdom, exceptionally reserved code. + { nullptr, nullptr } +}; + +// Country code, country name. +// Reference: +// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 +// - https://www.iso.org/obp/ui/#search/code/ + +static const char *country_names[][2] = { + { "AC", "Ascension Island" }, // Exceptionally reserved. + { "AD", "Andorra" }, + { "AE", "United Arab Emirates" }, + { "AF", "Afghanistan" }, + { "AG", "Antigua and Barbuda" }, + { "AI", "Anguilla" }, + { "AL", "Albania" }, + { "AM", "Armenia" }, + { "AN", "Netherlands Antilles" }, // Transitionally reserved, divided into BQ, CW and SX. + { "AO", "Angola" }, + { "AQ", "Antarctica" }, + { "AR", "Argentina" }, + { "AS", "American Samoa" }, + { "AT", "Austria" }, + { "AU", "Australia" }, + { "AW", "Aruba" }, + { "AX", "Åland Islands" }, + { "AZ", "Azerbaijan" }, + { "BA", "Bosnia and Herzegovina" }, + { "BB", "Barbados" }, + { "BD", "Bangladesh" }, + { "BE", "Belgium" }, + { "BF", "Burkina Faso" }, + { "BG", "Bulgaria" }, + { "BH", "Bahrain" }, + { "BI", "Burundi" }, + { "BJ", "Benin" }, + { "BL", "St. Barthélemy" }, + { "BM", "Bermuda" }, + { "BN", "Brunei" }, + { "BO", "Bolivia" }, + { "BQ", "Caribbean Netherlands" }, + { "BR", "Brazil" }, + { "BS", "Bahamas" }, + { "BT", "Bhutan" }, + { "BV", "Bouvet Island" }, + { "BW", "Botswana" }, + { "BY", "Belarus" }, + { "BZ", "Belize" }, + { "CA", "Canada" }, + { "CC", "Cocos (Keeling) Islands" }, + { "CD", "Congo - Kinshasa" }, + { "CF", "Central African Republic" }, + { "CG", "Congo - Brazzaville" }, + { "CH", "Switzerland" }, + { "CI", "Côte d'Ivoire" }, + { "CK", "Cook Islands" }, + { "CL", "Chile" }, + { "CM", "Cameroon" }, + { "CN", "China" }, + { "CO", "Colombia" }, + { "CP", "Clipperton Island" }, // Exceptionally reserved. + { "CR", "Costa Rica" }, + { "CQ", "Island of Sark" }, // Exceptionally reserved. + { "CU", "Cuba" }, + { "CV", "Cabo Verde" }, + { "CW", "Curaçao" }, + { "CX", "Christmas Island" }, + { "CY", "Cyprus" }, + { "CZ", "Czechia" }, + { "DE", "Germany" }, + { "DG", "Diego Garcia" }, // Exceptionally reserved. + { "DJ", "Djibouti" }, + { "DK", "Denmark" }, + { "DM", "Dominica" }, + { "DO", "Dominican Republic" }, + { "DZ", "Algeria" }, + { "EA", "Ceuta and Melilla" }, // Exceptionally reserved. + { "EC", "Ecuador" }, + { "EE", "Estonia" }, + { "EG", "Egypt" }, + { "EH", "Western Sahara" }, + { "ER", "Eritrea" }, + { "ES", "Spain" }, + { "ET", "Ethiopia" }, + { "EU", "European Union" }, // Exceptionally reserved. + { "EZ", "Eurozone" }, // Exceptionally reserved. + { "FI", "Finland" }, + { "FJ", "Fiji" }, + { "FK", "Falkland Islands" }, + { "FM", "Micronesia" }, + { "FO", "Faroe Islands" }, + { "FR", "France" }, + { "FX", "France, Metropolitan" }, // Exceptionally reserved. + { "GA", "Gabon" }, + { "GB", "United Kingdom" }, + { "GD", "Grenada" }, + { "GE", "Georgia" }, + { "GF", "French Guiana" }, + { "GG", "Guernsey" }, + { "GH", "Ghana" }, + { "GI", "Gibraltar" }, + { "GL", "Greenland" }, + { "GM", "Gambia" }, + { "GN", "Guinea" }, + { "GP", "Guadeloupe" }, + { "GQ", "Equatorial Guinea" }, + { "GR", "Greece" }, + { "GS", "South Georgia and South Sandwich Islands" }, + { "GT", "Guatemala" }, + { "GU", "Guam" }, + { "GW", "Guinea-Bissau" }, + { "GY", "Guyana" }, + { "HK", "Hong Kong" }, + { "HM", "Heard Island and McDonald Islands" }, + { "HN", "Honduras" }, + { "HR", "Croatia" }, + { "HT", "Haiti" }, + { "HU", "Hungary" }, + { "IC", "Canary Islands" }, // Exceptionally reserved. + { "ID", "Indonesia" }, + { "IE", "Ireland" }, + { "IL", "Israel" }, + { "IM", "Isle of Man" }, + { "IN", "India" }, + { "IO", "British Indian Ocean Territory" }, + { "IQ", "Iraq" }, + { "IR", "Iran" }, + { "IS", "Iceland" }, + { "IT", "Italy" }, + { "JE", "Jersey" }, + { "JM", "Jamaica" }, + { "JO", "Jordan" }, + { "JP", "Japan" }, + { "KE", "Kenya" }, + { "KG", "Kyrgyzstan" }, + { "KH", "Cambodia" }, + { "KI", "Kiribati" }, + { "KM", "Comoros" }, + { "KN", "St. Kitts and Nevis" }, + { "KP", "North Korea" }, + { "KR", "South Korea" }, + { "KW", "Kuwait" }, + { "KY", "Cayman Islands" }, + { "KZ", "Kazakhstan" }, + { "LA", "Laos" }, + { "LB", "Lebanon" }, + { "LC", "St. Lucia" }, + { "LI", "Liechtenstein" }, + { "LK", "Sri Lanka" }, + { "LR", "Liberia" }, + { "LS", "Lesotho" }, + { "LT", "Lithuania" }, + { "LU", "Luxembourg" }, + { "LV", "Latvia" }, + { "LY", "Libya" }, + { "MA", "Morocco" }, + { "MC", "Monaco" }, + { "MD", "Moldova" }, + { "ME", "Montenegro" }, + { "MF", "St. Martin" }, + { "MG", "Madagascar" }, + { "MH", "Marshall Islands" }, + { "MK", "North Macedonia" }, + { "ML", "Mali" }, + { "MM", "Myanmar" }, + { "MN", "Mongolia" }, + { "MO", "Macao" }, + { "MP", "Northern Mariana Islands" }, + { "MQ", "Martinique" }, + { "MR", "Mauritania" }, + { "MS", "Montserrat" }, + { "MT", "Malta" }, + { "MU", "Mauritius" }, + { "MV", "Maldives" }, + { "MW", "Malawi" }, + { "MX", "Mexico" }, + { "MY", "Malaysia" }, + { "MZ", "Mozambique" }, + { "NA", "Namibia" }, + { "NC", "New Caledonia" }, + { "NE", "Niger" }, + { "NF", "Norfolk Island" }, + { "NG", "Nigeria" }, + { "NI", "Nicaragua" }, + { "NL", "Netherlands" }, + { "NO", "Norway" }, + { "NP", "Nepal" }, + { "NR", "Nauru" }, + { "NU", "Niue" }, + { "NZ", "New Zealand" }, + { "OM", "Oman" }, + { "PA", "Panama" }, + { "PE", "Peru" }, + { "PF", "French Polynesia" }, + { "PG", "Papua New Guinea" }, + { "PH", "Philippines" }, + { "PK", "Pakistan" }, + { "PL", "Poland" }, + { "PM", "St. Pierre and Miquelon" }, + { "PN", "Pitcairn Islands" }, + { "PR", "Puerto Rico" }, + { "PS", "Palestine" }, + { "PT", "Portugal" }, + { "PW", "Palau" }, + { "PY", "Paraguay" }, + { "QA", "Qatar" }, + { "RE", "Réunion" }, + { "RO", "Romania" }, + { "RS", "Serbia" }, + { "RU", "Russia" }, + { "RW", "Rwanda" }, + { "SA", "Saudi Arabia" }, + { "SB", "Solomon Islands" }, + { "SC", "Seychelles" }, + { "SD", "Sudan" }, + { "SE", "Sweden" }, + { "SG", "Singapore" }, + { "SH", "St. Helena, Ascension and Tristan da Cunha" }, + { "SI", "Slovenia" }, + { "SJ", "Svalbard and Jan Mayen" }, + { "SK", "Slovakia" }, + { "SL", "Sierra Leone" }, + { "SM", "San Marino" }, + { "SN", "Senegal" }, + { "SO", "Somalia" }, + { "SR", "Suriname" }, + { "SS", "South Sudan" }, + { "ST", "Sao Tome and Principe" }, + { "SV", "El Salvador" }, + { "SX", "Sint Maarten" }, + { "SY", "Syria" }, + { "SZ", "Eswatini" }, + { "TA", "Tristan da Cunha" }, // Exceptionally reserved. + { "TC", "Turks and Caicos Islands" }, + { "TD", "Chad" }, + { "TF", "French Southern Territories" }, + { "TG", "Togo" }, + { "TH", "Thailand" }, + { "TJ", "Tajikistan" }, + { "TK", "Tokelau" }, + { "TL", "Timor-Leste" }, + { "TM", "Turkmenistan" }, + { "TN", "Tunisia" }, + { "TO", "Tonga" }, + { "TR", "Turkey" }, + { "TT", "Trinidad and Tobago" }, + { "TV", "Tuvalu" }, + { "TW", "Taiwan" }, + { "TZ", "Tanzania" }, + { "UA", "Ukraine" }, + { "UG", "Uganda" }, + { "UM", "U.S. Outlying Islands" }, + { "US", "United States of America" }, + { "UY", "Uruguay" }, + { "UZ", "Uzbekistan" }, + { "VA", "Holy See" }, + { "VC", "St. Vincent and the Grenadines" }, + { "VE", "Venezuela" }, + { "VG", "British Virgin Islands" }, + { "VI", "U.S. Virgin Islands" }, + { "VN", "Viet Nam" }, + { "VU", "Vanuatu" }, + { "WF", "Wallis and Futuna" }, + { "WS", "Samoa" }, + { "XK", "Kosovo" }, // Temporary code, no official ISO code assigned. + { "YE", "Yemen" }, + { "YT", "Mayotte" }, + { "ZA", "South Africa" }, + { "ZM", "Zambia" }, + { "ZW", "Zimbabwe" }, + { nullptr, nullptr } +}; + +// Languages code, language name. +// Reference: +// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes +// - https://www.localeplanet.com/icu/index.html +// - https://lh.2xlibre.net/locales/ + +static const char *language_list[][2] = { + { "aa", "Afar" }, + { "ab", "Abkhazian" }, + { "ace", "Achinese" }, + { "ach", "Acoli" }, + { "ada", "Adangme" }, + { "ady", "Adyghe" }, + { "ae", "Avestan" }, + { "aeb", "Tunisian Arabic" }, + { "af", "Afrikaans" }, + { "afh", "Afrihili" }, + { "agq", "Aghem" }, + { "ain", "Ainu" }, + { "agr", "Aguaruna" }, + { "ak", "Akan" }, + { "akk", "Akkadian" }, + { "akz", "Alabama" }, + { "ale", "Aleut" }, + { "aln", "Gheg Albanian" }, + { "alt", "Southern Altai" }, + { "am", "Amharic" }, + { "an", "Aragonese" }, + { "ang", "Old English" }, + { "anp", "Angika" }, + { "ar", "Arabic" }, + { "arc", "Aramaic" }, + { "arn", "Mapudungun" }, + { "aro", "Araona" }, + { "arp", "Arapaho" }, + { "arq", "Algerian Arabic" }, + { "ars", "Najdi Arabic" }, + { "arw", "Arawak" }, + { "ary", "Moroccan Arabic" }, + { "arz", "Egyptian Arabic" }, + { "as", "Assamese" }, + { "asa", "Asu" }, + { "ase", "American Sign Language" }, + { "ast", "Asturian" }, + { "av", "Avaric" }, + { "avk", "Kotava" }, + { "awa", "Awadhi" }, + { "ayc", "Southern Aymara" }, + { "ay", "Aymara" }, + { "az", "Azerbaijani" }, + { "ba", "Bashkir" }, + { "bal", "Baluchi" }, + { "ban", "Balinese" }, + { "bar", "Bavarian" }, + { "bas", "Bassa" }, + { "bax", "Bamun" }, + { "bbc", "Batak Toba" }, + { "bbj", "Ghomala" }, + { "be", "Belarusian" }, + { "bej", "Beja" }, + { "bem", "Bemba" }, + { "ber", "Berber" }, + { "bew", "Betawi" }, + { "bez", "Bena" }, + { "bfd", "Bafut" }, + { "bfq", "Badaga" }, + { "bg", "Bulgarian" }, + { "bhb", "Bhili" }, + { "bgn", "Western Balochi" }, + { "bho", "Bhojpuri" }, + { "bi", "Bislama" }, + { "bik", "Bikol" }, + { "bin", "Bini" }, + { "bjn", "Banjar" }, + { "bkm", "Kom" }, + { "bla", "Siksika" }, + { "bm", "Bambara" }, + { "bn", "Bengali" }, + { "bo", "Tibetan" }, + { "bpy", "Bishnupriya" }, + { "bqi", "Bakhtiari" }, + { "br", "Breton" }, + { "brh", "Brahui" }, + { "brx", "Bodo" }, + { "bs", "Bosnian" }, + { "bss", "Akoose" }, + { "bua", "Buriat" }, + { "bug", "Buginese" }, + { "bum", "Bulu" }, + { "byn", "Bilin" }, + { "byv", "Medumba" }, + { "ca", "Catalan" }, + { "cad", "Caddo" }, + { "car", "Carib" }, + { "cay", "Cayuga" }, + { "cch", "Atsam" }, + { "ccp", "Chakma" }, + { "ce", "Chechen" }, + { "ceb", "Cebuano" }, + { "cgg", "Chiga" }, + { "ch", "Chamorro" }, + { "chb", "Chibcha" }, + { "chg", "Chagatai" }, + { "chk", "Chuukese" }, + { "chm", "Mari" }, + { "chn", "Chinook Jargon" }, + { "cho", "Choctaw" }, + { "chp", "Chipewyan" }, + { "chr", "Cherokee" }, + { "chy", "Cheyenne" }, + { "cic", "Chickasaw" }, + { "ckb", "Central Kurdish" }, + { "csb", "Kashubian" }, + { "cmn", "Mandarin Chinese" }, + { "co", "Corsican" }, + { "cop", "Coptic" }, + { "cps", "Capiznon" }, + { "cr", "Cree" }, + { "crh", "Crimean Tatar" }, + { "crs", "Seselwa Creole French" }, + { "cs", "Czech" }, + { "csb", "Kashubian" }, + { "cu", "Church Slavic" }, + { "cv", "Chuvash" }, + { "cy", "Welsh" }, + { "da", "Danish" }, + { "dak", "Dakota" }, + { "dar", "Dargwa" }, + { "dav", "Taita" }, + { "de", "German" }, + { "del", "Delaware" }, + { "den", "Slave" }, + { "dgr", "Dogrib" }, + { "din", "Dinka" }, + { "dje", "Zarma" }, + { "doi", "Dogri" }, + { "dsb", "Lower Sorbian" }, + { "dtp", "Central Dusun" }, + { "dua", "Duala" }, + { "dum", "Middle Dutch" }, + { "dv", "Dhivehi" }, + { "dyo", "Jola-Fonyi" }, + { "dyu", "Dyula" }, + { "dz", "Dzongkha" }, + { "dzg", "Dazaga" }, + { "ebu", "Embu" }, + { "ee", "Ewe" }, + { "efi", "Efik" }, + { "egl", "Emilian" }, + { "egy", "Ancient Egyptian" }, + { "eka", "Ekajuk" }, + { "el", "Greek" }, + { "elx", "Elamite" }, + { "en", "English" }, + { "enm", "Middle English" }, + { "eo", "Esperanto" }, + { "es", "Spanish" }, + { "esu", "Central Yupik" }, + { "et", "Estonian" }, + { "eu", "Basque" }, + { "ewo", "Ewondo" }, + { "ext", "Extremaduran" }, + { "fa", "Persian" }, + { "fan", "Fang" }, + { "fat", "Fanti" }, + { "ff", "Fulah" }, + { "fi", "Finnish" }, + { "fil", "Filipino" }, + { "fit", "Tornedalen Finnish" }, + { "fj", "Fijian" }, + { "fo", "Faroese" }, + { "fon", "Fon" }, + { "fr", "French" }, + { "frc", "Cajun French" }, + { "frm", "Middle French" }, + { "fro", "Old French" }, + { "frp", "Arpitan" }, + { "frr", "Northern Frisian" }, + { "frs", "Eastern Frisian" }, + { "fur", "Friulian" }, + { "fy", "Western Frisian" }, + { "ga", "Irish" }, + { "gaa", "Ga" }, + { "gag", "Gagauz" }, + { "gan", "Gan Chinese" }, + { "gay", "Gayo" }, + { "gba", "Gbaya" }, + { "gbz", "Zoroastrian Dari" }, + { "gd", "Scottish Gaelic" }, + { "gez", "Geez" }, + { "gil", "Gilbertese" }, + { "gl", "Galician" }, + { "glk", "Gilaki" }, + { "gmh", "Middle High German" }, + { "gn", "Guarani" }, + { "goh", "Old High German" }, + { "gom", "Goan Konkani" }, + { "gon", "Gondi" }, + { "gor", "Gorontalo" }, + { "got", "Gothic" }, + { "grb", "Grebo" }, + { "grc", "Ancient Greek" }, + { "gsw", "Swiss German" }, + { "gu", "Gujarati" }, + { "guc", "Wayuu" }, + { "gur", "Frafra" }, + { "guz", "Gusii" }, + { "gv", "Manx" }, + { "gwi", "Gwichʼin" }, + { "ha", "Hausa" }, + { "hai", "Haida" }, + { "hak", "Hakka Chinese" }, + { "haw", "Hawaiian" }, + { "he", "Hebrew" }, + { "hi", "Hindi" }, + { "hif", "Fiji Hindi" }, + { "hil", "Hiligaynon" }, + { "hit", "Hittite" }, + { "hmn", "Hmong" }, + { "ho", "Hiri Motu" }, + { "hne", "Chhattisgarhi" }, + { "hr", "Croatian" }, + { "hsb", "Upper Sorbian" }, + { "hsn", "Xiang Chinese" }, + { "ht", "Haitian" }, + { "hu", "Hungarian" }, + { "hup", "Hupa" }, + { "hus", "Huastec" }, + { "hy", "Armenian" }, + { "hz", "Herero" }, + { "ia", "Interlingua" }, + { "iba", "Iban" }, + { "ibb", "Ibibio" }, + { "id", "Indonesian" }, + { "ie", "Interlingue" }, + { "ig", "Igbo" }, + { "ii", "Sichuan Yi" }, + { "ik", "Inupiaq" }, + { "ilo", "Iloko" }, + { "inh", "Ingush" }, + { "io", "Ido" }, + { "is", "Icelandic" }, + { "it", "Italian" }, + { "iu", "Inuktitut" }, + { "izh", "Ingrian" }, + { "ja", "Japanese" }, + { "jam", "Jamaican Creole English" }, + { "jbo", "Lojban" }, + { "jgo", "Ngomba" }, + { "jmc", "Machame" }, + { "jpr", "Judeo-Persian" }, + { "jrb", "Judeo-Arabic" }, + { "jut", "Jutish" }, + { "jv", "Javanese" }, + { "ka", "Georgian" }, + { "kaa", "Kara-Kalpak" }, + { "kab", "Kabyle" }, + { "kac", "Kachin" }, + { "kaj", "Jju" }, + { "kam", "Kamba" }, + { "kaw", "Kawi" }, + { "kbd", "Kabardian" }, + { "kbl", "Kanembu" }, + { "kcg", "Tyap" }, + { "kde", "Makonde" }, + { "kea", "Kabuverdianu" }, + { "ken", "Kenyang" }, + { "kfo", "Koro" }, + { "kg", "Kongo" }, + { "kgp", "Kaingang" }, + { "kha", "Khasi" }, + { "kho", "Khotanese" }, + { "khq", "Koyra Chiini" }, + { "khw", "Khowar" }, + { "ki", "Kikuyu" }, + { "kiu", "Kirmanjki" }, + { "kj", "Kuanyama" }, + { "kk", "Kazakh" }, + { "kkj", "Kako" }, + { "kl", "Kalaallisut" }, + { "kln", "Kalenjin" }, + { "km", "Central Khmer" }, + { "kmb", "Kimbundu" }, + { "kn", "Kannada" }, + { "ko", "Korean" }, + { "koi", "Komi-Permyak" }, + { "kok", "Konkani" }, + { "kos", "Kosraean" }, + { "kpe", "Kpelle" }, + { "kr", "Kanuri" }, + { "krc", "Karachay-Balkar" }, + { "kri", "Krio" }, + { "krj", "Kinaray-a" }, + { "krl", "Karelian" }, + { "kru", "Kurukh" }, + { "ks", "Kashmiri" }, + { "ksb", "Shambala" }, + { "ksf", "Bafia" }, + { "ksh", "Colognian" }, + { "ku", "Kurdish" }, + { "kum", "Kumyk" }, + { "kut", "Kutenai" }, + { "kv", "Komi" }, + { "kw", "Cornish" }, + { "ky", "Kirghiz" }, + { "lag", "Langi" }, + { "la", "Latin" }, + { "lad", "Ladino" }, + { "lag", "Langi" }, + { "lah", "Lahnda" }, + { "lam", "Lamba" }, + { "lb", "Luxembourgish" }, + { "lez", "Lezghian" }, + { "lfn", "Lingua Franca Nova" }, + { "lg", "Ganda" }, + { "li", "Limburgan" }, + { "lij", "Ligurian" }, + { "liv", "Livonian" }, + { "lkt", "Lakota" }, + { "lmo", "Lombard" }, + { "ln", "Lingala" }, + { "lo", "Lao" }, + { "lol", "Mongo" }, + { "lou", "Louisiana Creole" }, + { "loz", "Lozi" }, + { "lrc", "Northern Luri" }, + { "lt", "Lithuanian" }, + { "ltg", "Latgalian" }, + { "lu", "Luba-Katanga" }, + { "lua", "Luba-Lulua" }, + { "lui", "Luiseno" }, + { "lun", "Lunda" }, + { "luo", "Luo" }, + { "lus", "Mizo" }, + { "luy", "Luyia" }, + { "lv", "Latvian" }, + { "lzh", "Literary Chinese" }, + { "lzz", "Laz" }, + { "mad", "Madurese" }, + { "maf", "Mafa" }, + { "mag", "Magahi" }, + { "mai", "Maithili" }, + { "mak", "Makasar" }, + { "man", "Mandingo" }, + { "mas", "Masai" }, + { "mde", "Maba" }, + { "mdf", "Moksha" }, + { "mdr", "Mandar" }, + { "men", "Mende" }, + { "mer", "Meru" }, + { "mfe", "Morisyen" }, + { "mg", "Malagasy" }, + { "mga", "Middle Irish" }, + { "mgh", "Makhuwa-Meetto" }, + { "mgo", "Metaʼ" }, + { "mh", "Marshallese" }, + { "mhr", "Eastern Mari" }, + { "mi", "Māori" }, + { "mic", "Mi'kmaq" }, + { "min", "Minangkabau" }, + { "miq", "Mískito" }, + { "mjw", "Karbi" }, + { "mk", "Macedonian" }, + { "ml", "Malayalam" }, + { "mn", "Mongolian" }, + { "mnc", "Manchu" }, + { "mni", "Manipuri" }, + { "mnw", "Mon" }, + { "mos", "Mossi" }, + { "moh", "Mohawk" }, + { "mr", "Marathi" }, + { "mrj", "Western Mari" }, + { "ms", "Malay" }, + { "mt", "Maltese" }, + { "mua", "Mundang" }, + { "mus", "Muscogee" }, + { "mwl", "Mirandese" }, + { "mwr", "Marwari" }, + { "mwv", "Mentawai" }, + { "my", "Burmese" }, + { "mye", "Myene" }, + { "myv", "Erzya" }, + { "mzn", "Mazanderani" }, + { "na", "Nauru" }, + { "nah", "Nahuatl" }, + { "nan", "Min Nan Chinese" }, + { "nap", "Neapolitan" }, + { "naq", "Nama" }, + { "nan", "Min Nan Chinese" }, + { "nb", "Norwegian Bokmål" }, + { "nd", "North Ndebele" }, + { "nds", "Low German" }, + { "ne", "Nepali" }, + { "new", "Newari" }, + { "nhn", "Central Nahuatl" }, + { "ng", "Ndonga" }, + { "nia", "Nias" }, + { "niu", "Niuean" }, + { "njo", "Ao Naga" }, + { "nl", "Dutch" }, + { "nmg", "Kwasio" }, + { "nn", "Norwegian Nynorsk" }, + { "nnh", "Ngiemboon" }, + { "nog", "Nogai" }, + { "non", "Old Norse" }, + { "nov", "Novial" }, + { "nqo", "N'ko" }, + { "nr", "South Ndebele" }, + { "nso", "Pedi" }, + { "nus", "Nuer" }, + { "nv", "Navajo" }, + { "nwc", "Classical Newari" }, + { "ny", "Nyanja" }, + { "nym", "Nyamwezi" }, + { "nyn", "Nyankole" }, + { "nyo", "Nyoro" }, + { "nzi", "Nzima" }, + { "oc", "Occitan" }, + { "oj", "Ojibwa" }, + { "om", "Oromo" }, + { "or", "Odia" }, + { "os", "Ossetic" }, + { "osa", "Osage" }, + { "ota", "Ottoman Turkish" }, + { "pa", "Panjabi" }, + { "pag", "Pangasinan" }, + { "pal", "Pahlavi" }, + { "pam", "Pampanga" }, + { "pap", "Papiamento" }, + { "pau", "Palauan" }, + { "pcd", "Picard" }, + { "pcm", "Nigerian Pidgin" }, + { "pdc", "Pennsylvania German" }, + { "pdt", "Plautdietsch" }, + { "peo", "Old Persian" }, + { "pfl", "Palatine German" }, + { "phn", "Phoenician" }, + { "pi", "Pali" }, + { "pl", "Polish" }, + { "pms", "Piedmontese" }, + { "pnt", "Pontic" }, + { "pon", "Pohnpeian" }, + { "pr", "Pirate" }, + { "prg", "Prussian" }, + { "pro", "Old Provençal" }, + { "prs", "Dari" }, + { "ps", "Pushto" }, + { "pt", "Portuguese" }, + { "qu", "Quechua" }, + { "quc", "K'iche" }, + { "qug", "Chimborazo Highland Quichua" }, + { "quy", "Ayacucho Quechua" }, + { "quz", "Cusco Quechua" }, + { "raj", "Rajasthani" }, + { "rap", "Rapanui" }, + { "rar", "Rarotongan" }, + { "rgn", "Romagnol" }, + { "rif", "Riffian" }, + { "rm", "Romansh" }, + { "rn", "Rundi" }, + { "ro", "Romanian" }, + { "rof", "Rombo" }, + { "rom", "Romany" }, + { "rtm", "Rotuman" }, + { "ru", "Russian" }, + { "rue", "Rusyn" }, + { "rug", "Roviana" }, + { "rup", "Aromanian" }, + { "rw", "Kinyarwanda" }, + { "rwk", "Rwa" }, + { "sa", "Sanskrit" }, + { "sad", "Sandawe" }, + { "sah", "Sakha" }, + { "sam", "Samaritan Aramaic" }, + { "saq", "Samburu" }, + { "sas", "Sasak" }, + { "sat", "Santali" }, + { "saz", "Saurashtra" }, + { "sba", "Ngambay" }, + { "sbp", "Sangu" }, + { "sc", "Sardinian" }, + { "scn", "Sicilian" }, + { "sco", "Scots" }, + { "sd", "Sindhi" }, + { "sdc", "Sassarese Sardinian" }, + { "sdh", "Southern Kurdish" }, + { "se", "Northern Sami" }, + { "see", "Seneca" }, + { "seh", "Sena" }, + { "sei", "Seri" }, + { "sel", "Selkup" }, + { "ses", "Koyraboro Senni" }, + { "sg", "Sango" }, + { "sga", "Old Irish" }, + { "sgs", "Samogitian" }, + { "sh", "Serbo-Croatian" }, + { "shi", "Tachelhit" }, + { "shn", "Shan" }, + { "shs", "Shuswap" }, + { "shu", "Chadian Arabic" }, + { "si", "Sinhala" }, + { "sid", "Sidamo" }, + { "sk", "Slovak" }, + { "sl", "Slovenian" }, + { "sli", "Lower Silesian" }, + { "sly", "Selayar" }, + { "sm", "Samoan" }, + { "sma", "Southern Sami" }, + { "smj", "Lule Sami" }, + { "smn", "Inari Sami" }, + { "sms", "Skolt Sami" }, + { "sn", "Shona" }, + { "snk", "Soninke" }, + { "so", "Somali" }, + { "sog", "Sogdien" }, + { "son", "Songhai" }, + { "sq", "Albanian" }, + { "sr", "Serbian" }, + { "srn", "Sranan Tongo" }, + { "srr", "Serer" }, + { "ss", "Swati" }, + { "ssy", "Saho" }, + { "st", "Southern Sotho" }, + { "stq", "Saterland Frisian" }, + { "su", "Sundanese" }, + { "suk", "Sukuma" }, + { "sus", "Susu" }, + { "sux", "Sumerian" }, + { "sv", "Swedish" }, + { "sw", "Swahili" }, + { "swb", "Comorian" }, + { "swc", "Congo Swahili" }, + { "syc", "Classical Syriac" }, + { "syr", "Syriac" }, + { "szl", "Silesian" }, + { "ta", "Tamil" }, + { "tcy", "Tulu" }, + { "te", "Telugu" }, + { "tem", "Timne" }, + { "teo", "Teso" }, + { "ter", "Tereno" }, + { "tet", "Tetum" }, + { "tg", "Tajik" }, + { "th", "Thai" }, + { "the", "Chitwania Tharu" }, + { "ti", "Tigrinya" }, + { "tig", "Tigre" }, + { "tiv", "Tiv" }, + { "tk", "Turkmen" }, + { "tkl", "Tokelau" }, + { "tkr", "Tsakhur" }, + { "tl", "Tagalog" }, + { "tlh", "Klingon" }, + { "tli", "Tlingit" }, + { "tly", "Talysh" }, + { "tmh", "Tamashek" }, + { "tn", "Tswana" }, + { "to", "Tongan" }, + { "tog", "Nyasa Tonga" }, + { "tpi", "Tok Pisin" }, + { "tr", "Turkish" }, + { "tru", "Turoyo" }, + { "trv", "Taroko" }, + { "ts", "Tsonga" }, + { "tsd", "Tsakonian" }, + { "tsi", "Tsimshian" }, + { "tt", "Tatar" }, + { "ttt", "Muslim Tat" }, + { "tum", "Tumbuka" }, + { "tvl", "Tuvalu" }, + { "tw", "Twi" }, + { "twq", "Tasawaq" }, + { "ty", "Tahitian" }, + { "tyv", "Tuvinian" }, + { "tzm", "Central Atlas Tamazight" }, + { "udm", "Udmurt" }, + { "ug", "Uyghur" }, + { "uga", "Ugaritic" }, + { "uk", "Ukrainian" }, + { "umb", "Umbundu" }, + { "unm", "Unami" }, + { "ur", "Urdu" }, + { "uz", "Uzbek" }, + { "vai", "Vai" }, + { "ve", "Venda" }, + { "vec", "Venetian" }, + { "vep", "Veps" }, + { "vi", "Vietnamese" }, + { "vls", "West Flemish" }, + { "vmf", "Main-Franconian" }, + { "vo", "Volapük" }, + { "vot", "Votic" }, + { "vro", "Võro" }, + { "vun", "Vunjo" }, + { "wa", "Walloon" }, + { "wae", "Walser" }, + { "wal", "Wolaytta" }, + { "war", "Waray" }, + { "was", "Washo" }, + { "wbp", "Warlpiri" }, + { "wo", "Wolof" }, + { "wuu", "Wu Chinese" }, + { "xal", "Kalmyk" }, + { "xh", "Xhosa" }, + { "xmf", "Mingrelian" }, + { "xog", "Soga" }, + { "yao", "Yao" }, + { "yap", "Yapese" }, + { "yav", "Yangben" }, + { "ybb", "Yemba" }, + { "yi", "Yiddish" }, + { "yo", "Yoruba" }, + { "yrl", "Nheengatu" }, + { "yue", "Yue Chinese" }, + { "yuw", "Papua New Guinea" }, + { "za", "Zhuang" }, + { "zap", "Zapotec" }, + { "zbl", "Blissymbols" }, + { "zea", "Zeelandic" }, + { "zen", "Zenaga" }, + { "zgh", "Standard Moroccan Tamazight" }, + { "zh", "Chinese" }, + { "zu", "Zulu" }, + { "zun", "Zuni" }, + { "zza", "Zaza" }, + { nullptr, nullptr } +}; + +// Additional regional variants. +// Variant name, supported languages. + +static const char *locale_variants[][2] = { + { "valencia", "ca" }, + { "iqtelif", "tt" }, + { "saaho", "aa" }, + { "tradnl", "es" }, + { nullptr, nullptr }, +}; + +// Script names and codes (excludes typographic variants, special codes, reserved codes and aliases for combined scripts). +// Reference: +// - https://en.wikipedia.org/wiki/ISO_15924 + +static const char *script_list[][2] = { + { "Adlam", "Adlm" }, + { "Afaka", "Afak" }, + { "Caucasian Albanian", "Aghb" }, + { "Ahom", "Ahom" }, + { "Arabic", "Arab" }, + { "Imperial Aramaic", "Armi" }, + { "Armenian", "Armn" }, + { "Avestan", "Avst" }, + { "Balinese", "Bali" }, + { "Bamum", "Bamu" }, + { "Bassa Vah", "Bass" }, + { "Batak", "Batk" }, + { "Bengali", "Beng" }, + { "Bhaiksuki", "Bhks" }, + { "Blissymbols", "Blis" }, + { "Bopomofo", "Bopo" }, + { "Brahmi", "Brah" }, + { "Braille", "Brai" }, + { "Buginese", "Bugi" }, + { "Buhid", "Buhd" }, + { "Chakma", "Cakm" }, + { "Unified Canadian Aboriginal", "Cans" }, + { "Carian", "Cari" }, + { "Cham", "Cham" }, + { "Cherokee", "Cher" }, + { "Chorasmian", "Chrs" }, + { "Cirth", "Cirt" }, + { "Coptic", "Copt" }, + { "Cypro-Minoan", "Cpmn" }, + { "Cypriot", "Cprt" }, + { "Cyrillic", "Cyrl" }, + { "Devanagari", "Deva" }, + { "Dives Akuru", "Diak" }, + { "Dogra", "Dogr" }, + { "Deseret", "Dsrt" }, + { "Duployan", "Dupl" }, + { "Egyptian demotic", "Egyd" }, + { "Egyptian hieratic", "Egyh" }, + { "Egyptian hieroglyphs", "Egyp" }, + { "Elbasan", "Elba" }, + { "Elymaic", "Elym" }, + { "Ethiopic", "Ethi" }, + { "Khutsuri", "Geok" }, + { "Georgian", "Geor" }, + { "Glagolitic", "Glag" }, + { "Gunjala Gondi", "Gong" }, + { "Masaram Gondi", "Gonm" }, + { "Gothic", "Goth" }, + { "Grantha", "Gran" }, + { "Greek", "Grek" }, + { "Gujarati", "Gujr" }, + { "Gurmukhi", "Guru" }, + { "Hangul", "Hang" }, + { "Han", "Hani" }, + { "Hanunoo", "Hano" }, + { "Simplified", "Hans" }, + { "Traditional", "Hant" }, + { "Hatran", "Hatr" }, + { "Hebrew", "Hebr" }, + { "Hiragana", "Hira" }, + { "Anatolian Hieroglyphs", "Hluw" }, + { "Pahawh Hmong", "Hmng" }, + { "Nyiakeng Puachue Hmong", "Hmnp" }, + { "Old Hungarian", "Hung" }, + { "Indus", "Inds" }, + { "Old Italic", "Ital" }, + { "Javanese", "Java" }, + { "Jurchen", "Jurc" }, + { "Kayah Li", "Kali" }, + { "Katakana", "Kana" }, + { "Kharoshthi", "Khar" }, + { "Khmer", "Khmr" }, + { "Khojki", "Khoj" }, + { "Khitan large script", "Kitl" }, + { "Khitan small script", "Kits" }, + { "Kannada", "Knda" }, + { "Kpelle", "Kpel" }, + { "Kaithi", "Kthi" }, + { "Tai Tham", "Lana" }, + { "Lao", "Laoo" }, + { "Latin", "Latn" }, + { "Leke", "Leke" }, + { "Lepcha", "Lepc" }, + { "Limbu", "Limb" }, + { "Linear A", "Lina" }, + { "Linear B", "Linb" }, + { "Lisu", "Lisu" }, + { "Loma", "Loma" }, + { "Lycian", "Lyci" }, + { "Lydian", "Lydi" }, + { "Mahajani", "Mahj" }, + { "Makasar", "Maka" }, + { "Mandaic", "Mand" }, + { "Manichaean", "Mani" }, + { "Marchen", "Marc" }, + { "Mayan Hieroglyphs", "Maya" }, + { "Medefaidrin", "Medf" }, + { "Mende Kikakui", "Mend" }, + { "Meroitic Cursive", "Merc" }, + { "Meroitic Hieroglyphs", "Mero" }, + { "Malayalam", "Mlym" }, + { "Modi", "Modi" }, + { "Mongolian", "Mong" }, + { "Moon", "Moon" }, + { "Mro", "Mroo" }, + { "Meitei Mayek", "Mtei" }, + { "Multani", "Mult" }, + { "Myanmar (Burmese)", "Mymr" }, + { "Nandinagari", "Nand" }, + { "Old North Arabian", "Narb" }, + { "Nabataean", "Nbat" }, + { "Newa", "Newa" }, + { "Naxi Dongba", "Nkdb" }, + { "Nakhi Geba", "Nkgb" }, + { "N'ko", "Nkoo" }, + { "Nüshu", "Nshu" }, + { "Ogham", "Ogam" }, + { "Ol Chiki", "Olck" }, + { "Old Turkic", "Orkh" }, + { "Oriya", "Orya" }, + { "Osage", "Osge" }, + { "Osmanya", "Osma" }, + { "Old Uyghur", "Ougr" }, + { "Palmyrene", "Palm" }, + { "Pau Cin Hau", "Pauc" }, + { "Proto-Cuneiform", "Pcun" }, + { "Proto-Elamite", "Pelm" }, + { "Old Permic", "Perm" }, + { "Phags-pa", "Phag" }, + { "Inscriptional Pahlavi", "Phli" }, + { "Psalter Pahlavi", "Phlp" }, + { "Book Pahlavi", "Phlv" }, + { "Phoenician", "Phnx" }, + { "Klingon", "Piqd" }, + { "Miao", "Plrd" }, + { "Inscriptional Parthian", "Prti" }, + { "Proto-Sinaitic", "Psin" }, + { "Ranjana", "Ranj" }, + { "Rejang", "Rjng" }, + { "Hanifi Rohingya", "Rohg" }, + { "Rongorongo", "Roro" }, + { "Runic", "Runr" }, + { "Samaritan", "Samr" }, + { "Sarati", "Sara" }, + { "Old South Arabian", "Sarb" }, + { "Saurashtra", "Saur" }, + { "SignWriting", "Sgnw" }, + { "Shavian", "Shaw" }, + { "Sharada", "Shrd" }, + { "Shuishu", "Shui" }, + { "Siddham", "Sidd" }, + { "Khudawadi", "Sind" }, + { "Sinhala", "Sinh" }, + { "Sogdian", "Sogd" }, + { "Old Sogdian", "Sogo" }, + { "Sora Sompeng", "Sora" }, + { "Soyombo", "Soyo" }, + { "Sundanese", "Sund" }, + { "Syloti Nagri", "Sylo" }, + { "Syriac", "Syrc" }, + { "Tagbanwa", "Tagb" }, + { "Takri", "Takr" }, + { "Tai Le", "Tale" }, + { "New Tai Lue", "Talu" }, + { "Tamil", "Taml" }, + { "Tangut", "Tang" }, + { "Tai Viet", "Tavt" }, + { "Telugu", "Telu" }, + { "Tengwar", "Teng" }, + { "Tifinagh", "Tfng" }, + { "Tagalog", "Tglg" }, + { "Thaana", "Thaa" }, + { "Thai", "Thai" }, + { "Tibetan", "Tibt" }, + { "Tirhuta", "Tirh" }, + { "Tangsa", "Tnsa" }, + { "Toto", "Toto" }, + { "Ugaritic", "Ugar" }, + { "Vai", "Vaii" }, + { "Visible Speech", "Visp" }, + { "Vithkuqi", "Vith" }, + { "Warang Citi", "Wara" }, + { "Wancho", "Wcho" }, + { "Woleai", "Wole" }, + { "Old Persian", "Xpeo" }, + { "Cuneiform", "Xsux" }, + { "Yezidi", "Yezi" }, + { "Yi", "Yiii" }, + { "Zanabazar Square", "Zanb" }, + { nullptr, nullptr } +}; + +#endif // LOCALES_H diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp index 5fae13779e..30fa434fad 100644 --- a/core/string/node_path.cpp +++ b/core/string/node_path.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -199,6 +199,21 @@ Vector<StringName> NodePath::get_subnames() const { return Vector<StringName>(); } +StringName NodePath::get_concatenated_names() const { + ERR_FAIL_COND_V(!data, StringName()); + + if (!data->concatenated_path) { + int pc = data->path.size(); + String concatenated; + const StringName *sn = data->path.ptr(); + for (int i = 0; i < pc; i++) { + concatenated += i == 0 ? sn[i].operator String() : "/" + sn[i]; + } + data->concatenated_path = concatenated; + } + return data->concatenated_path; +} + StringName NodePath::get_concatenated_subnames() const { ERR_FAIL_COND_V(!data, StringName()); @@ -293,12 +308,12 @@ void NodePath::simplify() { break; } if (data->path[i].operator String() == ".") { - data->path.remove(i); + data->path.remove_at(i); i--; } else if (i > 0 && data->path[i].operator String() == ".." && data->path[i - 1].operator String() != "." && data->path[i - 1].operator String() != "..") { //remove both - data->path.remove(i - 1); - data->path.remove(i - 1); + data->path.remove_at(i - 1); + data->path.remove_at(i - 1); i -= 2; if (data->path.size() == 0) { data->path.push_back("."); @@ -368,7 +383,7 @@ NodePath::NodePath(const String &p_path) { for (int i = from; i <= path.length(); i++) { if (path[i] == ':' || path[i] == 0) { String str = path.substr(from, i - from); - if (str == "") { + if (str.is_empty()) { if (path[i] == 0) { continue; // Allow end-of-path : } diff --git a/core/string/node_path.h b/core/string/node_path.h index a277ab26fa..2bce33e21e 100644 --- a/core/string/node_path.h +++ b/core/string/node_path.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -39,6 +39,7 @@ class NodePath { SafeRefCount refcount; Vector<StringName> path; Vector<StringName> subpath; + StringName concatenated_path; StringName concatenated_subpath; bool absolute; bool has_slashes; @@ -59,6 +60,7 @@ public: StringName get_subname(int p_idx) const; Vector<StringName> get_names() const; Vector<StringName> get_subnames() const; + StringName get_concatenated_names() const; StringName get_concatenated_subnames() const; NodePath rel_path_to(const NodePath &p_np) const; diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp index f8be564740..07302cc8c3 100644 --- a/core/string/optimized_translation.cpp +++ b/core/string/optimized_translation.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -37,9 +37,9 @@ extern "C" { } struct CompressedString { - int orig_len; + int orig_len = 0; CharString compressed; - int offset; + int offset = 0; }; void OptimizedTranslation::generate(const Ref<Translation> &p_from) { @@ -53,7 +53,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { int size = Math::larger_prime(keys.size()); Vector<Vector<Pair<int, CharString>>> buckets; - Vector<Map<uint32_t, int>> table; + Vector<HashMap<uint32_t, int>> table; Vector<uint32_t> hfunc_table; Vector<CompressedString> compressed; @@ -108,7 +108,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { for (int i = 0; i < size; i++) { const Vector<Pair<int, CharString>> &b = buckets[i]; - Map<uint32_t, int> &t = table.write[i]; + HashMap<uint32_t, int> &t = table.write[i]; if (b.size() == 0) { continue; @@ -147,7 +147,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { int btindex = 0; for (int i = 0; i < size; i++) { - const Map<uint32_t, int> &t = table[i]; + const HashMap<uint32_t, int> &t = table[i]; if (t.size() == 0) { htw[i] = 0xFFFFFFFF; //nothing continue; diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h index bccf932383..f3dbfe8f5c 100644 --- a/core/string/optimized_translation.h +++ b/core/string/optimized_translation.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -38,7 +38,7 @@ class OptimizedTranslation : public Translation { //this translation uses a sort of modified perfect hash algorithm //it requires hashing strings twice and then does a binary search, - //so it's slower, but at the same time it has an extreemly high chance + //so it's slower, but at the same time it has an extremely high chance //of catching untranslated strings //load/store friendly types diff --git a/core/string/print_string.cpp b/core/string/print_string.cpp index 345371d733..592da58fe7 100644 --- a/core/string/print_string.cpp +++ b/core/string/print_string.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -30,13 +30,12 @@ #include "print_string.h" +#include "core/core_globals.h" #include "core/os/os.h" #include <stdio.h> static PrintHandlerList *print_handler_list = nullptr; -bool _print_line_enabled = true; -bool _print_error_enabled = true; void add_print_handler(PrintHandlerList *p_handler) { _global_lock(); @@ -45,7 +44,7 @@ void add_print_handler(PrintHandlerList *p_handler) { _global_unlock(); } -void remove_print_handler(PrintHandlerList *p_handler) { +void remove_print_handler(const PrintHandlerList *p_handler) { _global_lock(); PrintHandlerList *prev = nullptr; @@ -69,8 +68,8 @@ void remove_print_handler(PrintHandlerList *p_handler) { ERR_FAIL_COND(l == nullptr); } -void print_line(String p_string) { - if (!_print_line_enabled) { +void __print_line(String p_string) { + if (!CoreGlobals::print_line_enabled) { return; } @@ -79,7 +78,98 @@ void print_line(String p_string) { _global_lock(); PrintHandlerList *l = print_handler_list; while (l) { - l->printfunc(l->userdata, p_string, false); + l->printfunc(l->userdata, p_string, false, false); + l = l->next; + } + + _global_unlock(); +} + +void __print_line_rich(String p_string) { + if (!CoreGlobals::print_line_enabled) { + return; + } + + // Convert a subset of BBCode tags to ANSI escape codes for correct display in the terminal. + // Support of those ANSI escape codes varies across terminal emulators, + // especially for italic and strikethrough. + String p_string_ansi = p_string; + + p_string_ansi = p_string_ansi.replace("[b]", "\u001b[1m"); + p_string_ansi = p_string_ansi.replace("[/b]", "\u001b[22m"); + p_string_ansi = p_string_ansi.replace("[i]", "\u001b[3m"); + p_string_ansi = p_string_ansi.replace("[/i]", "\u001b[23m"); + p_string_ansi = p_string_ansi.replace("[u]", "\u001b[4m"); + p_string_ansi = p_string_ansi.replace("[/u]", "\u001b[24m"); + p_string_ansi = p_string_ansi.replace("[s]", "\u001b[9m"); + p_string_ansi = p_string_ansi.replace("[/s]", "\u001b[29m"); + + p_string_ansi = p_string_ansi.replace("[indent]", " "); + p_string_ansi = p_string_ansi.replace("[/indent]", ""); + p_string_ansi = p_string_ansi.replace("[code]", "\u001b[2m"); + p_string_ansi = p_string_ansi.replace("[/code]", "\u001b[22m"); + p_string_ansi = p_string_ansi.replace("[url]", ""); + p_string_ansi = p_string_ansi.replace("[/url]", ""); + p_string_ansi = p_string_ansi.replace("[center]", "\n\t\t\t"); + p_string_ansi = p_string_ansi.replace("[/center]", ""); + p_string_ansi = p_string_ansi.replace("[right]", "\n\t\t\t\t\t\t"); + p_string_ansi = p_string_ansi.replace("[/right]", ""); + + if (p_string_ansi.contains("[color")) { + p_string_ansi = p_string_ansi.replace("[color=black]", "\u001b[30m"); + p_string_ansi = p_string_ansi.replace("[color=red]", "\u001b[91m"); + p_string_ansi = p_string_ansi.replace("[color=green]", "\u001b[92m"); + p_string_ansi = p_string_ansi.replace("[color=lime]", "\u001b[92m"); + p_string_ansi = p_string_ansi.replace("[color=yellow]", "\u001b[93m"); + p_string_ansi = p_string_ansi.replace("[color=blue]", "\u001b[94m"); + p_string_ansi = p_string_ansi.replace("[color=magenta]", "\u001b[95m"); + p_string_ansi = p_string_ansi.replace("[color=pink]", "\u001b[38;5;218m"); + p_string_ansi = p_string_ansi.replace("[color=purple]", "\u001b[38;5;98m"); + p_string_ansi = p_string_ansi.replace("[color=cyan]", "\u001b[96m"); + p_string_ansi = p_string_ansi.replace("[color=white]", "\u001b[97m"); + p_string_ansi = p_string_ansi.replace("[color=orange]", "\u001b[38;5;208m"); + p_string_ansi = p_string_ansi.replace("[color=gray]", "\u001b[90m"); + p_string_ansi = p_string_ansi.replace("[/color]", "\u001b[39m"); + } + if (p_string_ansi.contains("[bgcolor")) { + p_string_ansi = p_string_ansi.replace("[bgcolor=black]", "\u001b[40m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=red]", "\u001b[101m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=green]", "\u001b[102m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=lime]", "\u001b[102m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=yellow]", "\u001b[103m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=blue]", "\u001b[104m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=magenta]", "\u001b[105m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=pink]", "\u001b[48;5;218m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=purple]", "\u001b[48;5;98m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=cyan]", "\u001b[106m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=white]", "\u001b[107m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=orange]", "\u001b[48;5;208m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=gray]", "\u001b[100m"); + p_string_ansi = p_string_ansi.replace("[/bgcolor]", "\u001b[49m"); + } + if (p_string_ansi.contains("[fgcolor")) { + p_string_ansi = p_string_ansi.replace("[fgcolor=black]", "\u001b[30;40m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=red]", "\u001b[91;101m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=green]", "\u001b[92;102m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=lime]", "\u001b[92;102m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=yellow]", "\u001b[93;103m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=blue]", "\u001b[94;104m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=magenta]", "\u001b[95;105m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=pink]", "\u001b[38;5;218;48;5;218m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=purple]", "\u001b[38;5;98;48;5;98m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=cyan]", "\u001b[96;106m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=white]", "\u001b[97;107m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=orange]", "\u001b[38;5;208;48;5;208m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=gray]", "\u001b[90;100m"); + p_string_ansi = p_string_ansi.replace("[/fgcolor]", "\u001b[39;49m"); + } + + OS::get_singleton()->print_rich("%s\n", p_string_ansi.utf8().get_data()); + + _global_lock(); + PrintHandlerList *l = print_handler_list; + while (l) { + l->printfunc(l->userdata, p_string, false, true); l = l->next; } @@ -87,7 +177,7 @@ void print_line(String p_string) { } void print_error(String p_string) { - if (!_print_error_enabled) { + if (!CoreGlobals::print_error_enabled) { return; } @@ -96,7 +186,7 @@ void print_error(String p_string) { _global_lock(); PrintHandlerList *l = print_handler_list; while (l) { - l->printfunc(l->userdata, p_string, true); + l->printfunc(l->userdata, p_string, true, false); l = l->next; } @@ -108,3 +198,7 @@ void print_verbose(String p_string) { print_line(p_string); } } + +String stringify_variants(Variant p_var) { + return p_var.operator String(); +} diff --git a/core/string/print_string.h b/core/string/print_string.h index 1a9ff1efd6..ca930a3a0f 100644 --- a/core/string/print_string.h +++ b/core/string/print_string.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -31,11 +31,11 @@ #ifndef PRINT_STRING_H #define PRINT_STRING_H -#include "core/string/ustring.h" +#include "core/variant/variant.h" extern void (*_print_func)(String); -typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error); +typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error, bool p_rich); struct PrintHandlerList { PrintHandlerFunc printfunc = nullptr; @@ -46,13 +46,37 @@ struct PrintHandlerList { PrintHandlerList() {} }; +String stringify_variants(Variant p_var); + +template <typename... Args> +String stringify_variants(Variant p_var, Args... p_args) { + return p_var.operator String() + " " + stringify_variants(p_args...); +} + void add_print_handler(PrintHandlerList *p_handler); -void remove_print_handler(PrintHandlerList *p_handler); +void remove_print_handler(const PrintHandlerList *p_handler); -extern bool _print_line_enabled; -extern bool _print_error_enabled; -extern void print_line(String p_string); +extern void __print_line(String p_string); +extern void __print_line_rich(String p_string); extern void print_error(String p_string); extern void print_verbose(String p_string); +inline void print_line(Variant v) { + __print_line(stringify_variants(v)); +} + +inline void print_line_rich(Variant v) { + __print_line_rich(stringify_variants(v)); +} + +template <typename... Args> +void print_line(Variant p_var, Args... p_args) { + __print_line(stringify_variants(p_var, p_args...)); +} + +template <typename... Args> +void print_line_rich(Variant p_var, Args... p_args) { + __print_line_rich(stringify_variants(p_var, p_args...)); +} + #endif // PRINT_STRING_H diff --git a/core/string/string_buffer.h b/core/string/string_buffer.h index 33897c3674..424952f786 100644 --- a/core/string/string_buffer.h +++ b/core/string/string_buffer.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/string_builder.cpp b/core/string/string_builder.cpp index 834c87c845..7359ff59e1 100644 --- a/core/string/string_builder.cpp +++ b/core/string/string_builder.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -33,7 +33,7 @@ #include <string.h> StringBuilder &StringBuilder::append(const String &p_string) { - if (p_string == String()) { + if (p_string.is_empty()) { return *this; } diff --git a/core/string/string_builder.h b/core/string/string_builder.h index 30ce2a06f7..897efa95ef 100644 --- a/core/string/string_builder.h +++ b/core/string/string_builder.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/string_name.cpp b/core/string/string_name.cpp index 9024f60dae..9c4fc4e1b7 100644 --- a/core/string/string_name.cpp +++ b/core/string/string_name.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -73,23 +73,38 @@ void StringName::cleanup() { d = d->next; } } - print_line("\nStringName Reference Ranking:\n"); + + print_line("\nStringName reference ranking (from most to least referenced):\n"); + data.sort_custom<DebugSortReferences>(); - for (int i = 0; i < MIN(100, data.size()); i++) { + int unreferenced_stringnames = 0; + int rarely_referenced_stringnames = 0; + for (int i = 0; i < data.size(); i++) { print_line(itos(i + 1) + ": " + data[i]->get_name() + " - " + itos(data[i]->debug_references)); + if (data[i]->debug_references == 0) { + unreferenced_stringnames += 1; + } else if (data[i]->debug_references < 5) { + rarely_referenced_stringnames += 1; + } } + + print_line(vformat("\nOut of %d StringNames, %d StringNames were never referenced during this run (0 times) (%.2f%%).", data.size(), unreferenced_stringnames, unreferenced_stringnames / float(data.size()) * 100)); + print_line(vformat("Out of %d StringNames, %d StringNames were rarely referenced during this run (1-4 times) (%.2f%%).", data.size(), rarely_referenced_stringnames, rarely_referenced_stringnames / float(data.size()) * 100)); } #endif int lost_strings = 0; for (int i = 0; i < STRING_TABLE_LEN; i++) { while (_table[i]) { _Data *d = _table[i]; - lost_strings++; - if (d->static_count.get() != d->refcount.get() && OS::get_singleton()->is_stdout_verbose()) { - if (d->cname) { - print_line("Orphan StringName: " + String(d->cname)); - } else { - print_line("Orphan StringName: " + String(d->name)); + if (d->static_count.get() != d->refcount.get()) { + lost_strings++; + + if (OS::get_singleton()->is_stdout_verbose()) { + if (d->cname) { + print_line("Orphan StringName: " + String(d->cname)); + } else { + print_line("Orphan StringName: " + String(d->name)); + } } } @@ -232,6 +247,7 @@ StringName::StringName(const char *p_name, bool p_static) { _data->cname = nullptr; _data->next = _table[idx]; _data->prev = nullptr; + #ifdef DEBUG_ENABLED if (unlikely(debug_stringname)) { // Keep in memory, force static. @@ -310,7 +326,7 @@ StringName::StringName(const String &p_name, bool p_static) { ERR_FAIL_COND(!configured); - if (p_name == String()) { + if (p_name.is_empty()) { return; } @@ -434,7 +450,7 @@ StringName StringName::search(const char32_t *p_name) { } StringName StringName::search(const String &p_name) { - ERR_FAIL_COND_V(p_name == "", StringName()); + ERR_FAIL_COND_V(p_name.is_empty(), StringName()); MutexLock lock(mutex); diff --git a/core/string/string_name.h b/core/string/string_name.h index ce7988744b..ff4c41af94 100644 --- a/core/string/string_name.h +++ b/core/string/string_name.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -35,6 +35,8 @@ #include "core/string/ustring.h" #include "core/templates/safe_refcount.h" +#define UNIQUE_NODE_PREFIX "%" + class Main; struct StaticCString { @@ -70,7 +72,7 @@ class StringName { _Data *_data = nullptr; union _HashUnion { - _Data *ptr; + _Data *ptr = nullptr; uint32_t hash; }; @@ -100,6 +102,17 @@ public: bool operator==(const String &p_name) const; bool operator==(const char *p_name) const; bool operator!=(const String &p_name) const; + + _FORCE_INLINE_ bool is_node_unique_name() const { + if (!_data) { + return false; + } + if (_data->cname != nullptr) { + return (char32_t)_data->cname[0] == (char32_t)UNIQUE_NODE_PREFIX[0]; + } else { + return (char32_t)_data->name[0] == (char32_t)UNIQUE_NODE_PREFIX[0]; + } + } _FORCE_INLINE_ bool operator<(const StringName &p_name) const { return _data < p_name._data; } @@ -181,6 +194,18 @@ bool operator!=(const char *p_name, const StringName &p_string_name); StringName _scs_create(const char *p_chr, bool p_static = false); +/* + * The SNAME macro is used to speed up StringName creation, as it allows caching it after the first usage in a very efficient way. + * It should NOT be used everywhere, but instead in places where high performance is required and the creation of a StringName + * can be costly. Places where it should be used are: + * - Control::get_theme_*(<name> and Window::get_theme_*(<name> functions. + * - emit_signal(<name>,..) function + * - call_deferred(<name>,..) function + * - Comparisons to a StringName in overridden _set and _get methods. + * + * Use in places that can be called hundreds of times per frame (or more) is recommended, but this situation is very rare. If in doubt, do not use. + */ + #define SNAME(m_arg) ([]() -> const StringName & { static StringName sname = _scs_create(m_arg, true); return sname; })() #endif // STRING_NAME_H diff --git a/core/string/translation.cpp b/core/string/translation.cpp index cf61467d08..b83b7c786f 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -33,793 +33,12 @@ #include "core/config/project_settings.h" #include "core/io/resource_loader.h" #include "core/os/os.h" +#include "core/string/locales.h" #ifdef TOOLS_ENABLED #include "main/main.h" #endif -// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes), -// with the addition of glibc locales with their regional identifiers. -// This list must match the language names (in English) of locale_names. -// -// References: -// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes -// - https://lh.2xlibre.net/locales/ -// - https://iso639-3.sil.org/ - -static const char *locale_list[] = { - "aa", // Afar - "aa_DJ", // Afar (Djibouti) - "aa_ER", // Afar (Eritrea) - "aa_ET", // Afar (Ethiopia) - "af", // Afrikaans - "af_ZA", // Afrikaans (South Africa) - "agr_PE", // Aguaruna (Peru) - "ak_GH", // Akan (Ghana) - "am_ET", // Amharic (Ethiopia) - "an_ES", // Aragonese (Spain) - "anp_IN", // Angika (India) - "ar", // Arabic - "ar_AE", // Arabic (United Arab Emirates) - "ar_BH", // Arabic (Bahrain) - "ar_DZ", // Arabic (Algeria) - "ar_EG", // Arabic (Egypt) - "ar_IN", // Arabic (India) - "ar_IQ", // Arabic (Iraq) - "ar_JO", // Arabic (Jordan) - "ar_KW", // Arabic (Kuwait) - "ar_LB", // Arabic (Lebanon) - "ar_LY", // Arabic (Libya) - "ar_MA", // Arabic (Morocco) - "ar_OM", // Arabic (Oman) - "ar_QA", // Arabic (Qatar) - "ar_SA", // Arabic (Saudi Arabia) - "ar_SD", // Arabic (Sudan) - "ar_SS", // Arabic (South Soudan) - "ar_SY", // Arabic (Syria) - "ar_TN", // Arabic (Tunisia) - "ar_YE", // Arabic (Yemen) - "as_IN", // Assamese (India) - "ast_ES", // Asturian (Spain) - "ayc_PE", // Southern Aymara (Peru) - "ay_PE", // Aymara (Peru) - "az", // Azerbaijani - "az_AZ", // Azerbaijani (Azerbaijan) - "be", // Belarusian - "be_BY", // Belarusian (Belarus) - "bem_ZM", // Bemba (Zambia) - "ber_DZ", // Berber languages (Algeria) - "ber_MA", // Berber languages (Morocco) - "bg", // Bulgarian - "bg_BG", // Bulgarian (Bulgaria) - "bhb_IN", // Bhili (India) - "bho_IN", // Bhojpuri (India) - "bi_TV", // Bislama (Tuvalu) - "bn", // Bengali - "bn_BD", // Bengali (Bangladesh) - "bn_IN", // Bengali (India) - "bo", // Tibetan - "bo_CN", // Tibetan (China) - "bo_IN", // Tibetan (India) - "br", // Breton - "br_FR", // Breton (France) - "brx_IN", // Bodo (India) - "bs_BA", // Bosnian (Bosnia and Herzegovina) - "byn_ER", // Bilin (Eritrea) - "ca", // Catalan - "ca_AD", // Catalan (Andorra) - "ca_ES", // Catalan (Spain) - "ca_FR", // Catalan (France) - "ca_IT", // Catalan (Italy) - "ce_RU", // Chechen (Russia) - "chr_US", // Cherokee (United States) - "cmn_TW", // Mandarin Chinese (Taiwan) - "crh_UA", // Crimean Tatar (Ukraine) - "csb_PL", // Kashubian (Poland) - "cs", // Czech - "cs_CZ", // Czech (Czech Republic) - "cv_RU", // Chuvash (Russia) - "cy_GB", // Welsh (United Kingdom) - "da", // Danish - "da_DK", // Danish (Denmark) - "de", // German - "de_AT", // German (Austria) - "de_BE", // German (Belgium) - "de_CH", // German (Switzerland) - "de_DE", // German (Germany) - "de_IT", // German (Italy) - "de_LU", // German (Luxembourg) - "doi_IN", // Dogri (India) - "dv_MV", // Dhivehi (Maldives) - "dz_BT", // Dzongkha (Bhutan) - "el", // Greek - "el_CY", // Greek (Cyprus) - "el_GR", // Greek (Greece) - "en", // English - "en_AG", // English (Antigua and Barbuda) - "en_AU", // English (Australia) - "en_BW", // English (Botswana) - "en_CA", // English (Canada) - "en_DK", // English (Denmark) - "en_GB", // English (United Kingdom) - "en_HK", // English (Hong Kong) - "en_IE", // English (Ireland) - "en_IL", // English (Israel) - "en_IN", // English (India) - "en_NG", // English (Nigeria) - "en_NZ", // English (New Zealand) - "en_PH", // English (Philippines) - "en_SG", // English (Singapore) - "en_US", // English (United States) - "en_ZA", // English (South Africa) - "en_ZM", // English (Zambia) - "en_ZW", // English (Zimbabwe) - "eo", // Esperanto - "es", // Spanish - "es_AR", // Spanish (Argentina) - "es_BO", // Spanish (Bolivia) - "es_CL", // Spanish (Chile) - "es_CO", // Spanish (Colombia) - "es_CR", // Spanish (Costa Rica) - "es_CU", // Spanish (Cuba) - "es_DO", // Spanish (Dominican Republic) - "es_EC", // Spanish (Ecuador) - "es_ES", // Spanish (Spain) - "es_GT", // Spanish (Guatemala) - "es_HN", // Spanish (Honduras) - "es_MX", // Spanish (Mexico) - "es_NI", // Spanish (Nicaragua) - "es_PA", // Spanish (Panama) - "es_PE", // Spanish (Peru) - "es_PR", // Spanish (Puerto Rico) - "es_PY", // Spanish (Paraguay) - "es_SV", // Spanish (El Salvador) - "es_US", // Spanish (United States) - "es_UY", // Spanish (Uruguay) - "es_VE", // Spanish (Venezuela) - "et", // Estonian - "et_EE", // Estonian (Estonia) - "eu", // Basque - "eu_ES", // Basque (Spain) - "fa", // Persian - "fa_IR", // Persian (Iran) - "ff_SN", // Fulah (Senegal) - "fi", // Finnish - "fi_FI", // Finnish (Finland) - "fil", // Filipino - "fil_PH", // Filipino (Philippines) - "fo_FO", // Faroese (Faroe Islands) - "fr", // French - "fr_BE", // French (Belgium) - "fr_CA", // French (Canada) - "fr_CH", // French (Switzerland) - "fr_FR", // French (France) - "fr_LU", // French (Luxembourg) - "fur_IT", // Friulian (Italy) - "fy_DE", // Western Frisian (Germany) - "fy_NL", // Western Frisian (Netherlands) - "ga", // Irish - "ga_IE", // Irish (Ireland) - "gd_GB", // Scottish Gaelic (United Kingdom) - "gez_ER", // Geez (Eritrea) - "gez_ET", // Geez (Ethiopia) - "gl", // Galician - "gl_ES", // Galician (Spain) - "gu_IN", // Gujarati (India) - "gv_GB", // Manx (United Kingdom) - "hak_TW", // Hakka Chinese (Taiwan) - "ha_NG", // Hausa (Nigeria) - "he", // Hebrew - "he_IL", // Hebrew (Israel) - "hi", // Hindi - "hi_IN", // Hindi (India) - "hne_IN", // Chhattisgarhi (India) - "hr", // Croatian - "hr_HR", // Croatian (Croatia) - "hsb_DE", // Upper Sorbian (Germany) - "ht_HT", // Haitian (Haiti) - "hu", // Hungarian - "hu_HU", // Hungarian (Hungary) - "hus_MX", // Huastec (Mexico) - "hy_AM", // Armenian (Armenia) - "ia_FR", // Interlingua (France) - "id", // Indonesian - "id_ID", // Indonesian (Indonesia) - "ig_NG", // Igbo (Nigeria) - "ik_CA", // Inupiaq (Canada) - "is", // Icelandic - "is_IS", // Icelandic (Iceland) - "it", // Italian - "it_CH", // Italian (Switzerland) - "it_IT", // Italian (Italy) - "iu_CA", // Inuktitut (Canada) - "ja", // Japanese - "ja_JP", // Japanese (Japan) - "kab_DZ", // Kabyle (Algeria) - "ka", // Georgian - "ka_GE", // Georgian (Georgia) - "kk_KZ", // Kazakh (Kazakhstan) - "kl_GL", // Kalaallisut (Greenland) - "km", // Central Khmer - "km_KH", // Central Khmer (Cambodia) - "kn_IN", // Kannada (India) - "kok_IN", // Konkani (India) - "ko", // Korean - "ko_KR", // Korean (South Korea) - "ks_IN", // Kashmiri (India) - "ku", // Kurdish - "ku_TR", // Kurdish (Turkey) - "kw_GB", // Cornish (United Kingdom) - "ky_KG", // Kirghiz (Kyrgyzstan) - "lb_LU", // Luxembourgish (Luxembourg) - "lg_UG", // Ganda (Uganda) - "li_BE", // Limburgan (Belgium) - "li_NL", // Limburgan (Netherlands) - "lij_IT", // Ligurian (Italy) - "ln_CD", // Lingala (Congo) - "lo_LA", // Lao (Laos) - "lt", // Lithuanian - "lt_LT", // Lithuanian (Lithuania) - "lv", // Latvian - "lv_LV", // Latvian (Latvia) - "lzh_TW", // Literary Chinese (Taiwan) - "mag_IN", // Magahi (India) - "mai_IN", // Maithili (India) - "mg_MG", // Malagasy (Madagascar) - "mh_MH", // Marshallese (Marshall Islands) - "mhr_RU", // Eastern Mari (Russia) - "mi", // Māori - "mi_NZ", // Māori (New Zealand) - "miq_NI", // Mískito (Nicaragua) - "mk", // Macedonian - "mk_MK", // Macedonian (Macedonia) - "ml", // Malayalam - "ml_IN", // Malayalam (India) - "mni_IN", // Manipuri (India) - "mn_MN", // Mongolian (Mongolia) - "mr", // Marathi - "mr_IN", // Marathi (India) - "ms", // Malay - "ms_MY", // Malay (Malaysia) - "mt", // Maltese - "mt_MT", // Maltese (Malta) - "my_MM", // Burmese (Myanmar) - "myv_RU", // Erzya (Russia) - "nah_MX", // Nahuatl languages (Mexico) - "nan_TW", // Min Nan Chinese (Taiwan) - "nb", // Norwegian Bokmål - "nb_NO", // Norwegian Bokmål (Norway) - "nds_DE", // Low German (Germany) - "nds_NL", // Low German (Netherlands) - "ne_NP", // Nepali (Nepal) - "nhn_MX", // Central Nahuatl (Mexico) - "niu_NU", // Niuean (Niue) - "niu_NZ", // Niuean (New Zealand) - "nl", // Dutch - "nl_AW", // Dutch (Aruba) - "nl_BE", // Dutch (Belgium) - "nl_NL", // Dutch (Netherlands) - "nn", // Norwegian Nynorsk - "nn_NO", // Norwegian Nynorsk (Norway) - "nr_ZA", // South Ndebele (South Africa) - "nso_ZA", // Pedi (South Africa) - "oc_FR", // Occitan (France) - "om", // Oromo - "om_ET", // Oromo (Ethiopia) - "om_KE", // Oromo (Kenya) - "or", // Oriya - "or_IN", // Oriya (India) - "os_RU", // Ossetian (Russia) - "pa_IN", // Panjabi (India) - "pap", // Papiamento - "pap_AN", // Papiamento (Netherlands Antilles) - "pap_AW", // Papiamento (Aruba) - "pap_CW", // Papiamento (Curaçao) - "pa_PK", // Panjabi (Pakistan) - "pl", // Polish - "pl_PL", // Polish (Poland) - "pr", // Pirate - "ps_AF", // Pushto (Afghanistan) - "pt", // Portuguese - "pt_BR", // Portuguese (Brazil) - "pt_PT", // Portuguese (Portugal) - "quy_PE", // Ayacucho Quechua (Peru) - "quz_PE", // Cusco Quechua (Peru) - "raj_IN", // Rajasthani (India) - "ro", // Romanian - "ro_RO", // Romanian (Romania) - "ru", // Russian - "ru_RU", // Russian (Russia) - "ru_UA", // Russian (Ukraine) - "rw_RW", // Kinyarwanda (Rwanda) - "sa_IN", // Sanskrit (India) - "sat_IN", // Santali (India) - "sc_IT", // Sardinian (Italy) - "sco", // Scots - "sd_IN", // Sindhi (India) - "se_NO", // Northern Sami (Norway) - "sgs_LT", // Samogitian (Lithuania) - "shs_CA", // Shuswap (Canada) - "sid_ET", // Sidamo (Ethiopia) - "si", // Sinhala - "si_LK", // Sinhala (Sri Lanka) - "sk", // Slovak - "sk_SK", // Slovak (Slovakia) - "sl", // Slovenian - "sl_SI", // Slovenian (Slovenia) - "so", // Somali - "so_DJ", // Somali (Djibouti) - "so_ET", // Somali (Ethiopia) - "so_KE", // Somali (Kenya) - "so_SO", // Somali (Somalia) - "son_ML", // Songhai languages (Mali) - "sq", // Albanian - "sq_AL", // Albanian (Albania) - "sq_KV", // Albanian (Kosovo) - "sq_MK", // Albanian (Macedonia) - "sr", // Serbian - "sr_Cyrl", // Serbian (Cyrillic) - "sr_Latn", // Serbian (Latin) - "sr_ME", // Serbian (Montenegro) - "sr_RS", // Serbian (Serbia) - "ss_ZA", // Swati (South Africa) - "st_ZA", // Southern Sotho (South Africa) - "sv", // Swedish - "sv_FI", // Swedish (Finland) - "sv_SE", // Swedish (Sweden) - "sw_KE", // Swahili (Kenya) - "sw_TZ", // Swahili (Tanzania) - "szl_PL", // Silesian (Poland) - "ta", // Tamil - "ta_IN", // Tamil (India) - "ta_LK", // Tamil (Sri Lanka) - "tcy_IN", // Tulu (India) - "te", // Telugu - "te_IN", // Telugu (India) - "tg_TJ", // Tajik (Tajikistan) - "the_NP", // Chitwania Tharu (Nepal) - "th", // Thai - "th_TH", // Thai (Thailand) - "ti", // Tigrinya - "ti_ER", // Tigrinya (Eritrea) - "ti_ET", // Tigrinya (Ethiopia) - "tig_ER", // Tigre (Eritrea) - "tk_TM", // Turkmen (Turkmenistan) - "tl_PH", // Tagalog (Philippines) - "tn_ZA", // Tswana (South Africa) - "tr", // Turkish - "tr_CY", // Turkish (Cyprus) - "tr_TR", // Turkish (Turkey) - "ts_ZA", // Tsonga (South Africa) - "tt", // Tatar - "tt_RU", // Tatar (Russia) - "tzm", // Central Atlas Tamazight - "tzm_MA", // Central Atlas Tamazight (Marrocos) - "ug_CN", // Uighur (China) - "uk", // Ukrainian - "uk_UA", // Ukrainian (Ukraine) - "unm_US", // Unami (United States) - "ur", // Urdu - "ur_IN", // Urdu (India) - "ur_PK", // Urdu (Pakistan) - "uz", // Uzbek - "uz_UZ", // Uzbek (Uzbekistan) - "ve_ZA", // Venda (South Africa) - "vi", // Vietnamese - "vi_VN", // Vietnamese (Vietnam) - "wa_BE", // Walloon (Belgium) - "wae_CH", // Walser (Switzerland) - "wal_ET", // Wolaytta (Ethiopia) - "wo_SN", // Wolof (Senegal) - "xh_ZA", // Xhosa (South Africa) - "yi_US", // Yiddish (United States) - "yo_NG", // Yoruba (Nigeria) - "yue_HK", // Yue Chinese (Hong Kong) - "zh", // Chinese - "zh_CN", // Chinese (China) - "zh_HK", // Chinese (Hong Kong) - "zh_SG", // Chinese (Singapore) - "zh_TW", // Chinese (Taiwan) - "zu_ZA", // Zulu (South Africa) - nullptr -}; - -static const char *locale_names[] = { - "Afar", - "Afar (Djibouti)", - "Afar (Eritrea)", - "Afar (Ethiopia)", - "Afrikaans", - "Afrikaans (South Africa)", - "Aguaruna (Peru)", - "Akan (Ghana)", - "Amharic (Ethiopia)", - "Aragonese (Spain)", - "Angika (India)", - "Arabic", - "Arabic (United Arab Emirates)", - "Arabic (Bahrain)", - "Arabic (Algeria)", - "Arabic (Egypt)", - "Arabic (India)", - "Arabic (Iraq)", - "Arabic (Jordan)", - "Arabic (Kuwait)", - "Arabic (Lebanon)", - "Arabic (Libya)", - "Arabic (Morocco)", - "Arabic (Oman)", - "Arabic (Qatar)", - "Arabic (Saudi Arabia)", - "Arabic (Sudan)", - "Arabic (South Soudan)", - "Arabic (Syria)", - "Arabic (Tunisia)", - "Arabic (Yemen)", - "Assamese (India)", - "Asturian (Spain)", - "Southern Aymara (Peru)", - "Aymara (Peru)", - "Azerbaijani", - "Azerbaijani (Azerbaijan)", - "Belarusian", - "Belarusian (Belarus)", - "Bemba (Zambia)", - "Berber languages (Algeria)", - "Berber languages (Morocco)", - "Bulgarian", - "Bulgarian (Bulgaria)", - "Bhili (India)", - "Bhojpuri (India)", - "Bislama (Tuvalu)", - "Bengali", - "Bengali (Bangladesh)", - "Bengali (India)", - "Tibetan", - "Tibetan (China)", - "Tibetan (India)", - "Breton", - "Breton (France)", - "Bodo (India)", - "Bosnian (Bosnia and Herzegovina)", - "Bilin (Eritrea)", - "Catalan", - "Catalan (Andorra)", - "Catalan (Spain)", - "Catalan (France)", - "Catalan (Italy)", - "Chechen (Russia)", - "Cherokee (United States)", - "Mandarin Chinese (Taiwan)", - "Crimean Tatar (Ukraine)", - "Kashubian (Poland)", - "Czech", - "Czech (Czech Republic)", - "Chuvash (Russia)", - "Welsh (United Kingdom)", - "Danish", - "Danish (Denmark)", - "German", - "German (Austria)", - "German (Belgium)", - "German (Switzerland)", - "German (Germany)", - "German (Italy)", - "German (Luxembourg)", - "Dogri (India)", - "Dhivehi (Maldives)", - "Dzongkha (Bhutan)", - "Greek", - "Greek (Cyprus)", - "Greek (Greece)", - "English", - "English (Antigua and Barbuda)", - "English (Australia)", - "English (Botswana)", - "English (Canada)", - "English (Denmark)", - "English (United Kingdom)", - "English (Hong Kong)", - "English (Ireland)", - "English (Israel)", - "English (India)", - "English (Nigeria)", - "English (New Zealand)", - "English (Philippines)", - "English (Singapore)", - "English (United States)", - "English (South Africa)", - "English (Zambia)", - "English (Zimbabwe)", - "Esperanto", - "Spanish", - "Spanish (Argentina)", - "Spanish (Bolivia)", - "Spanish (Chile)", - "Spanish (Colombia)", - "Spanish (Costa Rica)", - "Spanish (Cuba)", - "Spanish (Dominican Republic)", - "Spanish (Ecuador)", - "Spanish (Spain)", - "Spanish (Guatemala)", - "Spanish (Honduras)", - "Spanish (Mexico)", - "Spanish (Nicaragua)", - "Spanish (Panama)", - "Spanish (Peru)", - "Spanish (Puerto Rico)", - "Spanish (Paraguay)", - "Spanish (El Salvador)", - "Spanish (United States)", - "Spanish (Uruguay)", - "Spanish (Venezuela)", - "Estonian", - "Estonian (Estonia)", - "Basque", - "Basque (Spain)", - "Persian", - "Persian (Iran)", - "Fulah (Senegal)", - "Finnish", - "Finnish (Finland)", - "Filipino", - "Filipino (Philippines)", - "Faroese (Faroe Islands)", - "French", - "French (Belgium)", - "French (Canada)", - "French (Switzerland)", - "French (France)", - "French (Luxembourg)", - "Friulian (Italy)", - "Western Frisian (Germany)", - "Western Frisian (Netherlands)", - "Irish", - "Irish (Ireland)", - "Scottish Gaelic (United Kingdom)", - "Geez (Eritrea)", - "Geez (Ethiopia)", - "Galician", - "Galician (Spain)", - "Gujarati (India)", - "Manx (United Kingdom)", - "Hakka Chinese (Taiwan)", - "Hausa (Nigeria)", - "Hebrew", - "Hebrew (Israel)", - "Hindi", - "Hindi (India)", - "Chhattisgarhi (India)", - "Croatian", - "Croatian (Croatia)", - "Upper Sorbian (Germany)", - "Haitian (Haiti)", - "Hungarian", - "Hungarian (Hungary)", - "Huastec (Mexico)", - "Armenian (Armenia)", - "Interlingua (France)", - "Indonesian", - "Indonesian (Indonesia)", - "Igbo (Nigeria)", - "Inupiaq (Canada)", - "Icelandic", - "Icelandic (Iceland)", - "Italian", - "Italian (Switzerland)", - "Italian (Italy)", - "Inuktitut (Canada)", - "Japanese", - "Japanese (Japan)", - "Kabyle (Algeria)", - "Georgian", - "Georgian (Georgia)", - "Kazakh (Kazakhstan)", - "Kalaallisut (Greenland)", - "Central Khmer", - "Central Khmer (Cambodia)", - "Kannada (India)", - "Konkani (India)", - "Korean", - "Korean (South Korea)", - "Kashmiri (India)", - "Kurdish", - "Kurdish (Turkey)", - "Cornish (United Kingdom)", - "Kirghiz (Kyrgyzstan)", - "Luxembourgish (Luxembourg)", - "Ganda (Uganda)", - "Limburgan (Belgium)", - "Limburgan (Netherlands)", - "Ligurian (Italy)", - "Lingala (Congo)", - "Lao (Laos)", - "Lithuanian", - "Lithuanian (Lithuania)", - "Latvian", - "Latvian (Latvia)", - "Literary Chinese (Taiwan)", - "Magahi (India)", - "Maithili (India)", - "Malagasy (Madagascar)", - "Marshallese (Marshall Islands)", - "Eastern Mari (Russia)", - "Māori", - "Māori (New Zealand)", - "Mískito (Nicaragua)", - "Macedonian", - "Macedonian (Macedonia)", - "Malayalam", - "Malayalam (India)", - "Manipuri (India)", - "Mongolian (Mongolia)", - "Marathi", - "Marathi (India)", - "Malay", - "Malay (Malaysia)", - "Maltese", - "Maltese (Malta)", - "Burmese (Myanmar)", - "Erzya (Russia)", - "Nahuatl languages (Mexico)", - "Min Nan Chinese (Taiwan)", - "Norwegian Bokmål", - "Norwegian Bokmål (Norway)", - "Low German (Germany)", - "Low German (Netherlands)", - "Nepali (Nepal)", - "Central Nahuatl (Mexico)", - "Niuean (Niue)", - "Niuean (New Zealand)", - "Dutch", - "Dutch (Aruba)", - "Dutch (Belgium)", - "Dutch (Netherlands)", - "Norwegian Nynorsk", - "Norwegian Nynorsk (Norway)", - "South Ndebele (South Africa)", - "Pedi (South Africa)", - "Occitan (France)", - "Oromo", - "Oromo (Ethiopia)", - "Oromo (Kenya)", - "Oriya", - "Oriya (India)", - "Ossetian (Russia)", - "Panjabi (India)", - "Papiamento", - "Papiamento (Netherlands Antilles)", - "Papiamento (Aruba)", - "Papiamento (Curaçao)", - "Panjabi (Pakistan)", - "Polish", - "Polish (Poland)", - "Pirate", - "Pushto (Afghanistan)", - "Portuguese", - "Portuguese (Brazil)", - "Portuguese (Portugal)", - "Ayacucho Quechua (Peru)", - "Cusco Quechua (Peru)", - "Rajasthani (India)", - "Romanian", - "Romanian (Romania)", - "Russian", - "Russian (Russia)", - "Russian (Ukraine)", - "Kinyarwanda (Rwanda)", - "Sanskrit (India)", - "Santali (India)", - "Sardinian (Italy)", - "Scots (Scotland)", - "Sindhi (India)", - "Northern Sami (Norway)", - "Samogitian (Lithuania)", - "Shuswap (Canada)", - "Sidamo (Ethiopia)", - "Sinhala", - "Sinhala (Sri Lanka)", - "Slovak", - "Slovak (Slovakia)", - "Slovenian", - "Slovenian (Slovenia)", - "Somali", - "Somali (Djibouti)", - "Somali (Ethiopia)", - "Somali (Kenya)", - "Somali (Somalia)", - "Songhai languages (Mali)", - "Albanian", - "Albanian (Albania)", - "Albanian (Kosovo)", - "Albanian (Macedonia)", - "Serbian", - "Serbian (Cyrillic)", - "Serbian (Latin)", - "Serbian (Montenegro)", - "Serbian (Serbia)", - "Swati (South Africa)", - "Southern Sotho (South Africa)", - "Swedish", - "Swedish (Finland)", - "Swedish (Sweden)", - "Swahili (Kenya)", - "Swahili (Tanzania)", - "Silesian (Poland)", - "Tamil", - "Tamil (India)", - "Tamil (Sri Lanka)", - "Tulu (India)", - "Telugu", - "Telugu (India)", - "Tajik (Tajikistan)", - "Chitwania Tharu (Nepal)", - "Thai", - "Thai (Thailand)", - "Tigrinya", - "Tigrinya (Eritrea)", - "Tigrinya (Ethiopia)", - "Tigre (Eritrea)", - "Turkmen (Turkmenistan)", - "Tagalog (Philippines)", - "Tswana (South Africa)", - "Turkish", - "Turkish (Cyprus)", - "Turkish (Turkey)", - "Tsonga (South Africa)", - "Tatar", - "Tatar (Russia)", - "Central Atlas Tamazight", - "Central Atlas Tamazight (Marrocos)", - "Uighur (China)", - "Ukrainian", - "Ukrainian (Ukraine)", - "Unami (United States)", - "Urdu", - "Urdu (India)", - "Urdu (Pakistan)", - "Uzbek", - "Uzbek (Uzbekistan)", - "Venda (South Africa)", - "Vietnamese", - "Vietnamese (Vietnam)", - "Walloon (Belgium)", - "Walser (Switzerland)", - "Wolaytta (Ethiopia)", - "Wolof (Senegal)", - "Xhosa (South Africa)", - "Yiddish (United States)", - "Yoruba (Nigeria)", - "Yue Chinese (Hong Kong)", - "Chinese", - "Chinese (China)", - "Chinese (Hong Kong)", - "Chinese (Singapore)", - "Chinese (Taiwan)", - "Zulu (South Africa)", - nullptr -}; - -// Windows has some weird locale identifiers which do not honor the ISO 639-1 -// standardized nomenclature. Whenever those don't conflict with existing ISO -// identifiers, we override them. -// -// Reference: -// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx - -static const char *locale_renames[][2] = { - { "in", "id" }, // Indonesian - { "iw", "he" }, // Hebrew - { "no", "nb" }, // Norwegian Bokmål - { "C", "en" }, // "C" is the simple/default/untranslated Computer locale. - // ASCII-only, English, no currency symbols. Godot treats this as "en". - // See https://unix.stackexchange.com/a/87763/164141 "The C locale is"... - { nullptr, nullptr } -}; - -/////////////////////////////////////////////// - Dictionary Translation::_get_messages() const { Dictionary d; for (const KeyValue<StringName, StringName> &E : translation_map) { @@ -849,17 +68,7 @@ void Translation::_set_messages(const Dictionary &p_messages) { } void Translation::set_locale(const String &p_locale) { - String univ_locale = TranslationServer::standardize_locale(p_locale); - - if (!TranslationServer::is_locale_valid(univ_locale)) { - String trimmed_locale = TranslationServer::get_language_code(univ_locale); - - ERR_FAIL_COND_MSG(!TranslationServer::is_locale_valid(trimmed_locale), "Invalid locale: " + trimmed_locale + "."); - - locale = trimmed_locale; - } else { - locale = univ_locale; - } + locale = TranslationServer::get_singleton()->standardize_locale(p_locale); if (OS::get_singleton()->get_main_loop() && TranslationServer::get_singleton()->get_loaded_locales().has(this)) { OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); @@ -886,12 +95,12 @@ StringName Translation::get_message(const StringName &p_src_text, const StringNa WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); } - const Map<StringName, StringName>::Element *E = translation_map.find(p_src_text); + HashMap<StringName, StringName>::ConstIterator E = translation_map.find(p_src_text); if (!E) { return StringName(); } - return E->get(); + return E->value; } StringName Translation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { @@ -932,13 +141,13 @@ void Translation::_bind_methods() { ClassDB::bind_method(D_METHOD("erase_message", "src_message", "context"), &Translation::erase_message, DEFVAL("")); ClassDB::bind_method(D_METHOD("get_message_list"), &Translation::_get_message_list); ClassDB::bind_method(D_METHOD("get_message_count"), &Translation::get_message_count); - ClassDB::bind_method(D_METHOD("_set_messages"), &Translation::_set_messages); + ClassDB::bind_method(D_METHOD("_set_messages", "messages"), &Translation::_set_messages); ClassDB::bind_method(D_METHOD("_get_messages"), &Translation::_get_messages); GDVIRTUAL_BIND(_get_plural_message, "src_message", "src_plural_message", "n", "context"); GDVIRTUAL_BIND(_get_message, "src_message", "context"); - ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages"); + ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "messages", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL), "_set_messages", "_get_messages"); ADD_PROPERTY(PropertyInfo(Variant::STRING, "locale"), "set_locale", "get_locale"); } @@ -1004,121 +213,306 @@ static _character_accent_pair _character_to_accented[] = { { 'z', U"ź" }, }; -bool TranslationServer::is_locale_valid(const String &p_locale) { - const char **ptr = locale_list; +Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info; - while (*ptr) { - if (*ptr == p_locale) { - return true; +HashMap<String, String> TranslationServer::language_map; +HashMap<String, String> TranslationServer::script_map; +HashMap<String, String> TranslationServer::locale_rename_map; +HashMap<String, String> TranslationServer::country_name_map; +HashMap<String, String> TranslationServer::variant_map; +HashMap<String, String> TranslationServer::country_rename_map; + +void TranslationServer::init_locale_info() { + // Init locale info. + language_map.clear(); + int idx = 0; + while (language_list[idx][0] != nullptr) { + language_map[language_list[idx][0]] = String::utf8(language_list[idx][1]); + idx++; + } + + // Init locale-script map. + locale_script_info.clear(); + idx = 0; + while (locale_scripts[idx][0] != nullptr) { + LocaleScriptInfo info; + info.name = locale_scripts[idx][0]; + info.script = locale_scripts[idx][1]; + info.default_country = locale_scripts[idx][2]; + Vector<String> supported_countries = String(locale_scripts[idx][3]).split(",", false); + for (int i = 0; i < supported_countries.size(); i++) { + info.supported_countries.insert(supported_countries[i]); } - ptr++; + locale_script_info.push_back(info); + idx++; } - return false; -} + // Init supported script list. + script_map.clear(); + idx = 0; + while (script_list[idx][0] != nullptr) { + script_map[script_list[idx][1]] = String::utf8(script_list[idx][0]); + idx++; + } -String TranslationServer::standardize_locale(const String &p_locale) { - // Replaces '-' with '_' for macOS Sierra-style locales - String univ_locale = p_locale.replace("-", "_"); + // Init regional variant map. + variant_map.clear(); + idx = 0; + while (locale_variants[idx][0] != nullptr) { + variant_map[locale_variants[idx][0]] = locale_variants[idx][1]; + idx++; + } - // Handles known non-ISO locale names used e.g. on Windows - int idx = 0; + // Init locale renames. + locale_rename_map.clear(); + idx = 0; while (locale_renames[idx][0] != nullptr) { - if (locale_renames[idx][0] == univ_locale) { - univ_locale = locale_renames[idx][1]; - break; + if (!String(locale_renames[idx][1]).is_empty()) { + locale_rename_map[locale_renames[idx][0]] = locale_renames[idx][1]; } idx++; } - return univ_locale; + // Init country names. + country_name_map.clear(); + idx = 0; + while (country_names[idx][0] != nullptr) { + country_name_map[String(country_names[idx][0])] = String::utf8(country_names[idx][1]); + idx++; + } + + // Init country renames. + country_rename_map.clear(); + idx = 0; + while (country_renames[idx][0] != nullptr) { + if (!String(country_renames[idx][1]).is_empty()) { + country_rename_map[country_renames[idx][0]] = country_renames[idx][1]; + } + idx++; + } } -String TranslationServer::get_language_code(const String &p_locale) { - ERR_FAIL_COND_V_MSG(p_locale.length() < 2, p_locale, "Invalid locale '" + p_locale + "'."); - // Most language codes are two letters, but some are three, - // so we have to look for a regional code separator ('_' or '-') - // to extract the left part. - // For example we get 'nah_MX' as input and should return 'nah'. - int split = p_locale.find("_"); - if (split == -1) { - split = p_locale.find("-"); +String TranslationServer::standardize_locale(const String &p_locale) const { + // Replaces '-' with '_' for macOS style locales. + String univ_locale = p_locale.replace("-", "_"); + + // Extract locale elements. + String lang, script, country, variant; + Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_"); + lang = locale_elements[0]; + if (locale_elements.size() >= 2) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { + script = locale_elements[1]; + } + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { + country = locale_elements[1]; + } } - if (split == -1) { // No separator, so the locale is already only a language code. - return p_locale; + if (locale_elements.size() >= 3) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { + country = locale_elements[2]; + } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) { + variant = locale_elements[2].to_lower(); + } + } + if (locale_elements.size() >= 4) { + if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang) { + variant = locale_elements[3].to_lower(); + } } - return p_locale.left(split); -} -void TranslationServer::set_locale(const String &p_locale) { - String univ_locale = standardize_locale(p_locale); + // Try extract script and variant from the extra part. + Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";"); + for (int i = 0; i < script_extra.size(); i++) { + if (script_extra[i].to_lower() == "cyrillic") { + script = "Cyrl"; + break; + } else if (script_extra[i].to_lower() == "latin") { + script = "Latn"; + break; + } else if (script_extra[i].to_lower() == "devanagari") { + script = "Deva"; + break; + } else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang) { + variant = script_extra[i].to_lower(); + } + } - if (!is_locale_valid(univ_locale)) { - String trimmed_locale = get_language_code(univ_locale); - print_verbose(vformat("Unsupported locale '%s', falling back to '%s'.", p_locale, trimmed_locale)); + // Handles known non-ISO language names used e.g. on Windows. + if (locale_rename_map.has(lang)) { + lang = locale_rename_map[lang]; + } - if (!is_locale_valid(trimmed_locale)) { - ERR_PRINT(vformat("Unsupported locale '%s', falling back to 'en'.", trimmed_locale)); - locale = "en"; - } else { - locale = trimmed_locale; + // Handle country renames. + if (country_rename_map.has(country)) { + country = country_rename_map[country]; + } + + // Remove unsupported script codes. + if (!script_map.has(script)) { + script = ""; + } + + // Add script code base on language and country codes for some ambiguous cases. + if (script.is_empty()) { + for (int i = 0; i < locale_script_info.size(); i++) { + const LocaleScriptInfo &info = locale_script_info[i]; + if (info.name == lang) { + if (country.is_empty() || info.supported_countries.has(country)) { + script = info.script; + break; + } + } + } + } + if (!script.is_empty() && country.is_empty()) { + // Add conntry code based on script for some ambiguous cases. + for (int i = 0; i < locale_script_info.size(); i++) { + const LocaleScriptInfo &info = locale_script_info[i]; + if (info.name == lang && info.script == script) { + country = info.default_country; + break; + } + } + } + + // Combine results. + String locale = lang; + if (!script.is_empty()) { + locale = locale + "_" + script; + } + if (!country.is_empty()) { + locale = locale + "_" + country; + } + if (!variant.is_empty()) { + locale = locale + "_" + variant; + } + return locale; +} + +int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const { + String locale_a = standardize_locale(p_locale_a); + String locale_b = standardize_locale(p_locale_b); + + if (locale_a == locale_b) { + // Exact match. + return 10; + } + + Vector<String> locale_a_elements = locale_a.split("_"); + Vector<String> locale_b_elements = locale_b.split("_"); + if (locale_a_elements[0] == locale_b_elements[0]) { + // Matching language, both locales have extra parts. + // Return number of matching elements. + int matching_elements = 1; + for (int i = 1; i < locale_a_elements.size(); i++) { + for (int j = 1; j < locale_b_elements.size(); j++) { + if (locale_a_elements[i] == locale_b_elements[j]) { + matching_elements++; + } + } } + return matching_elements; } else { - locale = univ_locale; + // No match. + return 0; + } +} + +String TranslationServer::get_locale_name(const String &p_locale) const { + String locale = standardize_locale(p_locale); + + String lang, script, country; + Vector<String> locale_elements = locale.split("_"); + lang = locale_elements[0]; + if (locale_elements.size() >= 2) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { + script = locale_elements[1]; + } + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { + country = locale_elements[1]; + } + } + if (locale_elements.size() >= 3) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { + country = locale_elements[2]; + } } - if (OS::get_singleton()->get_main_loop()) { - OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); + String name = language_map[lang]; + if (!script.is_empty()) { + name = name + " (" + script_map[script] + ")"; + } + if (!country.is_empty()) { + name = name + ", " + country_name_map[country]; } + return name; +} - ResourceLoader::reload_translation_remaps(); +Vector<String> TranslationServer::get_all_languages() const { + Vector<String> languages; + + for (const KeyValue<String, String> &E : language_map) { + languages.push_back(E.key); + } + + return languages; } -String TranslationServer::get_locale() const { - return locale; +String TranslationServer::get_language_name(const String &p_language) const { + return language_map[p_language]; } -String TranslationServer::get_locale_name(const String &p_locale) const { - if (!locale_name_map.has(p_locale)) { - return String(); +Vector<String> TranslationServer::get_all_scripts() const { + Vector<String> scripts; + + for (const KeyValue<String, String> &E : script_map) { + scripts.push_back(E.key); } - return locale_name_map[p_locale]; + + return scripts; } -Array TranslationServer::get_loaded_locales() const { - Array locales; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); - ERR_FAIL_COND_V(t.is_null(), Array()); - String l = t->get_locale(); +String TranslationServer::get_script_name(const String &p_script) const { + return script_map[p_script]; +} - locales.push_back(l); +Vector<String> TranslationServer::get_all_countries() const { + Vector<String> countries; + + for (const KeyValue<String, String> &E : country_name_map) { + countries.push_back(E.key); } - return locales; + return countries; } -Vector<String> TranslationServer::get_all_locales() { - Vector<String> locales; +String TranslationServer::get_country_name(const String &p_country) const { + return country_name_map[p_country]; +} - const char **ptr = locale_list; +void TranslationServer::set_locale(const String &p_locale) { + locale = standardize_locale(p_locale); - while (*ptr) { - locales.push_back(*ptr); - ptr++; + if (OS::get_singleton()->get_main_loop()) { + OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); } - return locales; + ResourceLoader::reload_translation_remaps(); } -Vector<String> TranslationServer::get_all_locale_names() { - Vector<String> locales; +String TranslationServer::get_locale() const { + return locale; +} - const char **ptr = locale_names; +Array TranslationServer::get_loaded_locales() const { + Array locales; + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; + ERR_FAIL_COND_V(t.is_null(), Array()); + String l = t->get_locale(); - while (*ptr) { - locales.push_back(String::utf8(*ptr)); - ptr++; + locales.push_back(l); } return locales; @@ -1134,23 +528,20 @@ void TranslationServer::remove_translation(const Ref<Translation> &p_translation Ref<Translation> TranslationServer::get_translation_object(const String &p_locale) { Ref<Translation> res; - String lang = get_language_code(p_locale); - bool near_match_found = false; + int best_score = 0; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; ERR_FAIL_COND_V(t.is_null(), nullptr); String l = t->get_locale(); - // Exact match. - if (l == p_locale) { - return t; - } - - // If near match found, keep that match, but keep looking to try to look for perfect match. - if (get_language_code(l) == lang && !near_match_found) { + int score = compare_locales(p_locale, l); + if (score > 0 && score >= best_score) { res = t; - near_match_found = true; + best_score = score; + if (score == 10) { + break; // Exact match, skip the rest. + } } } return res; @@ -1167,8 +558,6 @@ StringName TranslationServer::translate(const StringName &p_message, const Strin return p_message; } - ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid."); - StringName res = _get_message_from_translations(p_message, p_context, locale, false); if (!res && fallback.length() >= 2) { @@ -1190,8 +579,6 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons return p_message_plural; } - ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid."); - StringName res = _get_message_from_translations(p_message, p_context, locale, true, p_message_plural, p_n); if (!res && fallback.length() >= 2) { @@ -1209,51 +596,30 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons } StringName TranslationServer::_get_message_from_translations(const StringName &p_message, const StringName &p_context, const String &p_locale, bool plural, const String &p_message_plural, int p_n) const { - // Locale can be of the form 'll_CC', i.e. language code and regional code, - // e.g. 'en_US', 'en_GB', etc. It might also be simply 'll', e.g. 'en'. - // To find the relevant translation, we look for those with locale starting - // with the language code, and then if any is an exact match for the long - // form. If not found, we fall back to a near match (another locale with - // same language code). - - // Note: ResourceLoader::_path_remap reproduces this locale near matching - // logic, so be sure to propagate changes there when changing things here. - StringName res; - String lang = get_language_code(p_locale); - bool near_match = false; + int best_score = 0; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; ERR_FAIL_COND_V(t.is_null(), p_message); String l = t->get_locale(); - bool exact_match = (l == p_locale); - if (!exact_match) { - if (near_match) { - continue; // Only near-match once, but keep looking for exact matches. + int score = compare_locales(p_locale, l); + if (score > 0 && score >= best_score) { + StringName r; + if (!plural) { + r = t->get_message(p_message, p_context); + } else { + r = t->get_plural_message(p_message, p_message_plural, p_n, p_context); } - if (get_language_code(l) != lang) { - continue; // Language code does not match. + if (!r) { + continue; + } + res = r; + best_score = score; + if (score == 10) { + break; // Exact match, skip the rest. } - } - - StringName r; - if (!plural) { - r = t->get_message(p_message, p_context); - } else { - r = t->get_plural_message(p_message, p_message_plural, p_n, p_context); - } - - if (!r) { - continue; - } - res = r; - - if (exact_match) { - break; - } else { - near_match = true; } } @@ -1287,7 +653,7 @@ bool TranslationServer::_load_translations(const String &p_from) { void TranslationServer::setup() { String test = GLOBAL_DEF("internationalization/locale/test", ""); test = test.strip_edges(); - if (test != "") { + if (!test.is_empty()) { set_locale(test); } else { set_locale(OS::get_singleton()->get_locale()); @@ -1305,18 +671,7 @@ void TranslationServer::setup() { pseudolocalization_skip_placeholders_enabled = GLOBAL_DEF("internationalization/pseudolocalization/skip_placeholders", true); #ifdef TOOLS_ENABLED - { - String options = ""; - int idx = 0; - while (locale_list[idx]) { - if (idx > 0) { - options += ","; - } - options += locale_list[idx]; - idx++; - } - ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_ENUM, options)); - } + ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_LOCALE_ID, "")); #endif } @@ -1330,8 +685,12 @@ Ref<Translation> TranslationServer::get_tool_translation() const { String TranslationServer::get_tool_locale() { #ifdef TOOLS_ENABLED - if (TranslationServer::get_singleton()->get_tool_translation().is_valid() && (Engine::get_singleton()->is_editor_hint() || Main::is_project_manager())) { - return tool_translation->get_locale(); + if (Engine::get_singleton()->is_editor_hint() || Engine::get_singleton()->is_project_manager_hint()) { + if (TranslationServer::get_singleton()->get_tool_translation().is_valid()) { + return tool_translation->get_locale(); + } else { + return "en"; + } } else { #else { @@ -1532,7 +891,7 @@ String TranslationServer::wrap_with_fakebidi_characters(String &p_message) const return res; } -String TranslationServer::add_padding(String &p_message, int p_length) const { +String TranslationServer::add_padding(const String &p_message, int p_length) const { String res; String prefix = pseudolocalization_prefix; String suffix; @@ -1548,7 +907,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const { } const char32_t *TranslationServer::get_accented_version(char32_t p_character) const { - if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) { + if (!is_ascii_char(p_character)) { return nullptr; } @@ -1562,14 +921,27 @@ const char32_t *TranslationServer::get_accented_version(char32_t p_character) co } bool TranslationServer::is_placeholder(String &p_message, int p_index) const { - return p_message[p_index] == '%' && p_index < p_message.size() - 1 && - (p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' || - p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f'); + return p_index < p_message.size() - 1 && p_message[p_index] == '%' && + (p_message[p_index + 1] == 's' || p_message[p_index + 1] == 'c' || p_message[p_index + 1] == 'd' || + p_message[p_index + 1] == 'o' || p_message[p_index + 1] == 'x' || p_message[p_index + 1] == 'X' || p_message[p_index + 1] == 'f'); } void TranslationServer::_bind_methods() { ClassDB::bind_method(D_METHOD("set_locale", "locale"), &TranslationServer::set_locale); ClassDB::bind_method(D_METHOD("get_locale"), &TranslationServer::get_locale); + ClassDB::bind_method(D_METHOD("get_tool_locale"), &TranslationServer::get_tool_locale); + + ClassDB::bind_method(D_METHOD("compare_locales", "locale_a", "locale_b"), &TranslationServer::compare_locales); + ClassDB::bind_method(D_METHOD("standardize_locale", "locale"), &TranslationServer::standardize_locale); + + ClassDB::bind_method(D_METHOD("get_all_languages"), &TranslationServer::get_all_languages); + ClassDB::bind_method(D_METHOD("get_language_name", "language"), &TranslationServer::get_language_name); + + ClassDB::bind_method(D_METHOD("get_all_scripts"), &TranslationServer::get_all_scripts); + ClassDB::bind_method(D_METHOD("get_script_name", "script"), &TranslationServer::get_script_name); + + ClassDB::bind_method(D_METHOD("get_all_countries"), &TranslationServer::get_all_countries); + ClassDB::bind_method(D_METHOD("get_country_name", "country"), &TranslationServer::get_country_name); ClassDB::bind_method(D_METHOD("get_locale_name", "locale"), &TranslationServer::get_locale_name); @@ -1603,8 +975,5 @@ void TranslationServer::load_translations() { TranslationServer::TranslationServer() { singleton = this; - - for (int i = 0; locale_list[i]; ++i) { - locale_name_map.insert(locale_list[i], String::utf8(locale_names[i])); - } + init_locale_info(); } diff --git a/core/string/translation.h b/core/string/translation.h index 6aec0bb8ea..20c6ebd5a5 100644 --- a/core/string/translation.h +++ b/core/string/translation.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -41,7 +41,7 @@ class Translation : public Resource { RES_BASE_EXTENSION("translation"); String locale = "en"; - Map<StringName, StringName> translation_map; + HashMap<StringName, StringName> translation_map; virtual Vector<String> _get_message_list() const; virtual Dictionary _get_messages() const; @@ -74,12 +74,10 @@ class TranslationServer : public Object { String locale = "en"; String fallback; - Set<Ref<Translation>> translations; + HashSet<Ref<Translation>> translations; Ref<Translation> tool_translation; Ref<Translation> doc_translation; - Map<String, String> locale_name_map; - bool enabled = true; bool pseudolocalization_enabled = false; @@ -98,7 +96,7 @@ class TranslationServer : public Object { String double_vowels(String &p_message) const; String replace_with_accented_string(String &p_message) const; String wrap_with_fakebidi_characters(String &p_message) const; - String add_padding(String &p_message, int p_length) const; + String add_padding(const String &p_message, int p_length) const; const char32_t *get_accented_version(char32_t p_character) const; bool is_placeholder(String &p_message, int p_index) const; @@ -109,6 +107,23 @@ class TranslationServer : public Object { static void _bind_methods(); + struct LocaleScriptInfo { + String name; + String script; + String default_country; + HashSet<String> supported_countries; + }; + static Vector<LocaleScriptInfo> locale_script_info; + + static HashMap<String, String> language_map; + static HashMap<String, String> script_map; + static HashMap<String, String> locale_rename_map; + static HashMap<String, String> country_name_map; + static HashMap<String, String> country_rename_map; + static HashMap<String, String> variant_map; + + void init_locale_info(); + public: _FORCE_INLINE_ static TranslationServer *get_singleton() { return singleton; } @@ -119,6 +134,15 @@ public: String get_locale() const; Ref<Translation> get_translation_object(const String &p_locale); + Vector<String> get_all_languages() const; + String get_language_name(const String &p_language) const; + + Vector<String> get_all_scripts() const; + String get_script_name(const String &p_script) const; + + Vector<String> get_all_countries() const; + String get_country_name(const String &p_country) const; + String get_locale_name(const String &p_locale) const; Array get_loaded_locales() const; @@ -136,11 +160,9 @@ public: void set_editor_pseudolocalization(bool p_enabled); void reload_pseudolocalization(); - static Vector<String> get_all_locales(); - static Vector<String> get_all_locale_names(); - static bool is_locale_valid(const String &p_locale); - static String standardize_locale(const String &p_locale); - static String get_language_code(const String &p_locale); + String standardize_locale(const String &p_locale) const; + + int compare_locales(const String &p_locale_a, const String &p_locale_b) const; String get_tool_locale(); void set_tool_translation(const Ref<Translation> &p_translation); diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp index 1da00aa54b..fa656b634d 100644 --- a/core/string/translation_po.cpp +++ b/core/string/translation_po.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -35,7 +35,7 @@ #ifdef DEBUG_TRANSLATION_PO void TranslationPO::print_translation_map() { Error err; - FileAccess *file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err); + Ref<FileAccess> file = FileAccess::open("translation_map_print_test.txt", FileAccess::WRITE, &err); if (err != OK) { ERR_PRINT("Failed to open translation_map_print_test.txt"); return; @@ -62,7 +62,6 @@ void TranslationPO::print_translation_map() { file->store_line(""); } } - file->close(); } #endif @@ -71,21 +70,14 @@ Dictionary TranslationPO::_get_messages() const { Dictionary d; - List<StringName> context_l; - translation_map.get_key_list(&context_l); - for (const StringName &ctx : context_l) { - const HashMap<StringName, Vector<StringName>> &id_str_map = translation_map[ctx]; - + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { Dictionary d2; - List<StringName> id_l; - id_str_map.get_key_list(&id_l); - // Save list of id and strs associated with a context in a temporary dictionary. - for (List<StringName>::Element *E2 = id_l.front(); E2; E2 = E2->next()) { - StringName id = E2->get(); - d2[id] = id_str_map[id]; + + for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) { + d2[E2.key] = E2.value; } - d[ctx] = d2; + d[E.key] = d2; } return d; @@ -275,31 +267,24 @@ void TranslationPO::get_message_list(List<StringName> *r_messages) const { // OptimizedTranslation uses this function to get the list of msgid. // Return all the keys of translation_map under "" context. - List<StringName> context_l; - translation_map.get_key_list(&context_l); - - for (const StringName &E : context_l) { - if (String(E) != "") { + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { + if (E.key != StringName()) { continue; } - List<StringName> msgid_l; - translation_map[E].get_key_list(&msgid_l); - - for (List<StringName>::Element *E2 = msgid_l.front(); E2; E2 = E2->next()) { - r_messages->push_back(E2->get()); + for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) { + r_messages->push_back(E2.key); } } } int TranslationPO::get_message_count() const { - List<StringName> context_l; - translation_map.get_key_list(&context_l); - int count = 0; - for (const StringName &E : context_l) { - count += translation_map[E].size(); + + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { + count += E.value.size(); } + return count; } diff --git a/core/string/translation_po.h b/core/string/translation_po.h index 0e1d03d6ca..7d63af2246 100644 --- a/core/string/translation_po.h +++ b/core/string/translation_po.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/ucaps.h b/core/string/ucaps.h index b785ac7879..357d36e703 100644 --- a/core/string/ucaps.h +++ b/core/string/ucaps.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 397743fb6e..c02be9e5b7 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -35,9 +35,11 @@ #include "core/math/math_funcs.h" #include "core/os/memory.h" #include "core/string/print_string.h" +#include "core/string/string_name.h" #include "core/string/translation.h" #include "core/string/ucaps.h" #include "core/variant/variant.h" +#include "core/version_generated.gen.h" #include <stdio.h> #include <stdlib.h> @@ -53,34 +55,14 @@ static const int MAX_DECIMALS = 32; -static _FORCE_INLINE_ bool is_digit(char32_t c) { - return (c >= '0' && c <= '9'); -} - -static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { - return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); -} - -static _FORCE_INLINE_ bool is_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); -} - -static _FORCE_INLINE_ bool is_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); -} - static _FORCE_INLINE_ char32_t lower_case(char32_t c) { - return (is_upper_case(c) ? (c + ('a' - 'A')) : c); + return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c); } const char CharString::_null = 0; const char16_t Char16String::_null = 0; const char32_t String::_null = 0; -bool is_symbol(char32_t c) { - return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); -} - bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) { const String &s = p_s; int beg = CLAMP(p_col, 0, s.length()); @@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const { } Char16String &Char16String::operator+=(char16_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char16_t *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -Char16String &Char16String::operator=(const char16_t *p_cstr) { +void Char16String::operator=(const char16_t *p_cstr) { copy_from(p_cstr); - return *this; } const char16_t *Char16String::get_data() const { @@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const { } CharString &CharString::operator+=(char p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -CharString &CharString::operator=(const char *p_cstr) { +void CharString::operator=(const char *p_cstr) { copy_from(p_cstr); - return *this; } const char *CharString::get_data() const { @@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) { return; } - int len = 0; - const char *ptr = p_cstr; - while (*(ptr++) != 0) { - len++; - } + const size_t len = strlen(p_cstr); if (len == 0) { resize(0); @@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) { char32_t *dst = this->ptrw(); - for (int i = 0; i < len + 1; i++) { - dst[i] = p_cstr[i]; + for (size_t i = 0; i <= len; i++) { + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0 && i < len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } } @@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) { char32_t *dst = this->ptrw(); for (int i = 0; i < len; i++) { - dst[i] = p_cstr[i]; + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } dst[len] = 0; } @@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) { } void String::copy_from(const char32_t &p_char) { - resize(2); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(0, 0xfffd); - } else { - set(0, p_char); + if (p_char == 0) { + print_unicode_error("NUL character", true); + return; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); } - set(1, 0); + + resize(2); + + char32_t *dst = ptrw(); + dst[0] = p_char; + dst[1] = 0; } void String::copy_from(const char32_t *p_cstr) { @@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) { // p_length <= p_char strlen void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { resize(p_length + 1); - set(p_length, 0); - char32_t *dst = ptrw(); + dst[p_length] = 0; for (int i = 0; i < p_length; i++) { - if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + "."); - dst[i] = 0xfffd; - } else { - dst[i] = p_char[i]; + if (p_char[i] == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + continue; + } + if ((p_char[i] & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i])); } + if (p_char[i] > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i])); + } + dst[i] = p_char[i]; } } @@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const { return res; } +String String::operator+(char32_t p_char) const { + String res = *this; + res += p_char; + return res; +} + String operator+(const char *p_chr, const String &p_str) { String tmp = p_chr; tmp += p_str; @@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) { // wchar_t is 16-bit String tmp = String::utf16((const char16_t *)p_chr); #else - // wchar_t is 32-bi + // wchar_t is 32-bit String tmp = (const char32_t *)p_chr; #endif tmp += p_str; @@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) { } String &String::operator+=(const String &p_str) { - if (is_empty()) { + const int lhs_len = length(); + if (lhs_len == 0) { *this = p_str; return *this; } - if (p_str.is_empty()) { + const int rhs_len = p_str.length(); + if (rhs_len == 0) { return *this; } - int from = length(); - - resize(length() + p_str.size()); + resize(lhs_len + rhs_len + 1); - const char32_t *src = p_str.get_data(); - char32_t *dst = ptrw(); + const char32_t *src = p_str.ptr(); + char32_t *dst = ptrw() + lhs_len; - set(length(), 0); - - for (int i = 0; i < p_str.length(); i++) { - dst[from + i] = src[i]; - } + // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination). + memcpy(dst, src, rhs_len * sizeof(char32_t)); + *(dst + rhs_len) = _null; return *this; } @@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) { return *this; } - int src_len = 0; - const char *ptr = p_str; - while (*(ptr++) != 0) { - src_len++; - } - - int from = length(); + const int lhs_len = length(); + const size_t rhs_len = strlen(p_str); - resize(from + src_len + 1); - - char32_t *dst = ptrw(); + resize(lhs_len + rhs_len + 1); - set(length(), 0); + char32_t *dst = ptrw() + lhs_len; - for (int i = 0; i < src_len; i++) { - dst[from + i] = p_str[i]; + for (size_t i = 0; i <= rhs_len; i++) { + uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]); + if (c == 0 && i < rhs_len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } return *this; @@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) { } String &String::operator+=(char32_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(length() - 1, 0xfffd); - } else { - set(length() - 1, p_char); + if (p_char == 0) { + print_unicode_error("NUL character", true); + return *this; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); } + const int lhs_len = length(); + resize(lhs_len + 2); + char32_t *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; + return *this; } @@ -952,10 +970,6 @@ const char32_t *String::get_data() const { return size() ? &operator[](0) : &zero; } -void String::erase(int p_pos, int p_chars) { - *this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars))); -} - String String::capitalize() const { String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges(); String cap; @@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const { int start_index = 0; for (int i = 1; i < this->size(); i++) { - bool is_upper = is_upper_case(cstr[i]); + bool is_upper = is_ascii_upper_case(cstr[i]); bool is_number = is_digit(cstr[i]); bool are_next_2_lower = false; bool is_next_lower = false; bool is_next_number = false; - bool was_precedent_upper = is_upper_case(cstr[i - 1]); + bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]); bool was_precedent_number = is_digit(cstr[i - 1]); if (i + 2 < this->size()) { - are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]); + are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]); } if (i + 1 < this->size()) { - is_next_lower = is_lower_case(cstr[i + 1]); + is_next_lower = is_ascii_lower_case(cstr[i + 1]); is_next_number = is_digit(cstr[i + 1]); } @@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { } String String::num_real(double p_num, bool p_trailing) { - if (Math::is_nan(p_num)) { - return "nan"; - } - - if (Math::is_inf(p_num)) { - if (signbit(p_num)) { - return "-inf"; + if (p_num == (double)(int64_t)p_num) { + if (p_trailing) { + return num_int64((int64_t)p_num) + ".0"; } else { - return "inf"; + return num_int64((int64_t)p_num); } } - - String s; - String sd; - - // Integer part. - - bool neg = p_num < 0; - p_num = ABS(p_num); - int64_t intn = (int64_t)p_num; - - // Decimal part. - - if (intn != p_num) { - double dec = p_num - (double)intn; - - int digit = 0; - #ifdef REAL_T_IS_DOUBLE - int decimals = 14; - double tolerance = 1e-14; + int decimals = 14; #else - int decimals = 6; - double tolerance = 1e-6; + int decimals = 6; #endif - // We want to align the digits to the above sane default, so we only - // need to subtract log10 for numbers with a positive power of ten. - if (p_num > 10) { - decimals -= (int)floor(log10(p_num)); - } - - if (decimals > MAX_DECIMALS) { - decimals = MAX_DECIMALS; - } - - // In case the value ends up ending in "99999", we want to add a - // tiny bit to the value we're checking when deciding when to stop, - // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15). - double check_multiplier = 1 + tolerance / 10; - - int64_t dec_int = 0; - int64_t dec_max = 0; - - while (true) { - dec *= 10.0; - dec_int = dec_int * 10 + (int64_t)dec % 10; - dec_max = dec_max * 10 + 9; - digit++; - - if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) { - break; - } - - if (digit == decimals) { - break; - } - } - - dec *= 10; - int last = (int64_t)dec % 10; - - if (last > 5) { - if (dec_int == dec_max) { - dec_int = 0; - intn++; - } else { - dec_int++; - } - } - - String decimal; - for (int i = 0; i < digit; i++) { - char num[2] = { 0, 0 }; - num[0] = '0' + dec_int % 10; - decimal = num + decimal; - dec_int /= 10; - } - sd = '.' + decimal; - } else if (p_trailing) { - sd = ".0"; - } else { - sd = ""; - } - - if (intn == 0) { - s = "0"; - } else { - while (intn) { - char32_t num = '0' + (intn % 10); - intn /= 10; - s = num + s; - } - } - - s = s + sd; - if (neg) { - s = "-" + s; + // We want to align the digits to the above sane default, so we only + // need to subtract log10 for numbers with a positive power of ten. + if (p_num > 10) { + decimals -= (int)floor(log10(p_num)); } - return s; + return num(p_num, decimals); } String String::num_scientific(double p_num) { @@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { return ret; } +void String::print_unicode_error(const String &p_message, bool p_critical) const { + if (p_critical) { + print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message)); + } else { + print_error(vformat("Unicode parsing error: %s", p_message)); + } +} + CharString String::ascii(bool p_allow_extended) const { if (!length()) { return CharString(); @@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const { if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { cs[i] = c; } else { - print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character."); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c)); cs[i] = 0x20; } } @@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) { return ret; } -bool String::parse_utf8(const char *p_utf8, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); - +Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) { if (!p_utf8) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } } + bool decode_error = false; + bool decode_failed = false; { const char *ptrtmp = p_utf8; const char *ptrtmp_limit = &p_utf8[p_len]; int skip = 0; + uint8_t c_start = 0; while (ptrtmp != ptrtmp_limit && *ptrtmp) { - if (skip == 0) { - uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + if (skip == 0) { + if (p_skip_cr && c == '\r') { + ptrtmp++; + continue; + } /* Determine the number of characters in sequence */ if ((c & 0x80) == 0) { skip = 0; @@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { skip = 2; } else if ((c & 0xf8) == 0xf0) { skip = 3; + } else if ((c & 0xfc) == 0xf8) { + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + skip = 5; } else { - _UNICERROR("invalid skip at " + num_int64(cstr_size)); - return true; //invalid utf8 + skip = 0; + print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true); + decode_failed = true; } + c_start = c; if (skip == 1 && (c & 0x1e) == 0) { - _UNICERROR("overlong rejected at " + num_int64(cstr_size)); - return true; //reject overlong + print_unicode_error(vformat("Overlong encoding (%x ...)", c)); + decode_error = true; } - str_size++; - } else { - --skip; + if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) { + print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c)); + decode_error = true; + } + if (c < 0x80 || c > 0xbf) { + print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true); + decode_failed = true; + skip = 0; + } else { + --skip; + } } cstr_size++; @@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } if (skip) { - _UNICERROR("no space left"); - return true; //not enough space + print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true); + decode_failed = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + int skip = 0; + uint32_t unichar = 0; while (cstr_size) { - int len = 0; - - /* Determine the number of characters in sequence */ - if ((*p_utf8 & 0x80) == 0) { - len = 1; - } else if ((*p_utf8 & 0xe0) == 0xc0) { - len = 2; - } else if ((*p_utf8 & 0xf0) == 0xe0) { - len = 3; - } else if ((*p_utf8 & 0xf8) == 0xf0) { - len = 4; - } else { - _UNICERROR("invalid len"); - return true; //invalid UTF8 - } - - if (len > cstr_size) { - _UNICERROR("no space left"); - return true; //not enough space - } + uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8); - if (len == 2 && (*p_utf8 & 0x1E) == 0) { - _UNICERROR("no space left"); - return true; //reject overlong - } - - /* Convert the first character */ - - uint32_t unichar = 0; - - if (len == 1) { - unichar = *p_utf8; + if (skip == 0) { + if (p_skip_cr && c == '\r') { + p_utf8++; + continue; + } + /* Determine the number of characters in sequence */ + if ((c & 0x80) == 0) { + *(dst++) = c; + unichar = 0; + skip = 0; + } else if ((c & 0xe0) == 0xc0) { + unichar = (0xff >> 3) & c; + skip = 1; + } else if ((c & 0xf0) == 0xe0) { + unichar = (0xff >> 4) & c; + skip = 2; + } else if ((c & 0xf8) == 0xf0) { + unichar = (0xff >> 5) & c; + skip = 3; + } else if ((c & 0xfc) == 0xf8) { + unichar = (0xff >> 6) & c; + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + unichar = (0xff >> 7) & c; + skip = 5; + } else { + *(dst++) = 0x20; + unichar = 0; + skip = 0; + } } else { - unichar = (0xff >> (len + 1)) & *p_utf8; - - for (int i = 1; i < len; i++) { - if ((p_utf8[i] & 0xc0) != 0x80) { - _UNICERROR("invalid utf8"); - return true; //invalid utf8 - } - if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) { - _UNICERROR("invalid utf8 overlong"); - return true; //no overlong + if (c < 0x80 || c > 0xbf) { + *(dst++) = 0x20; + skip = 0; + } else { + unichar = (unichar << 6) | (c & 0x3f); + --skip; + if (skip == 0) { + if (unichar == 0) { + print_unicode_error("NUL character", true); + decode_failed = true; + unichar = 0x20; + } + if ((unichar & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", unichar)); + decode_error = true; + } + if (unichar > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar)); + decode_error = true; + } + *(dst++) = unichar; } - unichar = (unichar << 6) | (p_utf8[i] & 0x3f); } } - if (unichar >= 0xd800 && unichar <= 0xdfff) { - _UNICERROR("invalid code point"); - return CharString(); - } - *(dst++) = unichar; - cstr_size -= len; - p_utf8 += len; + cstr_size--; + p_utf8++; + } + if (skip) { + *(dst++) = 0x20; } - return false; -#undef _UNICERROR + if (decode_failed) { + return ERR_INVALID_DATA; + } else if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } CharString String::utf8() const { @@ -1881,15 +1846,17 @@ CharString String::utf8() const { fl += 2; } else if (c <= 0xffff) { // 16 bits fl += 3; - } else if (c <= 0x0010ffff) { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits fl += 4; + } else if (c <= 0x03ffffff) { // 26 bits + fl += 5; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); + } else if (c <= 0x7fffffff) { // 31 bits + fl += 6; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); + fl += 1; + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true); } } @@ -1915,11 +1882,26 @@ CharString String::utf8() const { APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x03ffffff) { // 26 bits + APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x7fffffff) { // 31 bits + APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit. + APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) { return ret; } -bool String::parse_utf16(const char16_t *p_utf16, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); - +Error String::parse_utf16(const char16_t *p_utf16, int p_len) { if (!p_utf16) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } } + bool decode_error = false; { const char16_t *ptrtmp = p_utf16; const char16_t *ptrtmp_limit = &p_utf16[p_len]; - int skip = 0; + uint32_t c_prev = 0; + bool skip = false; while (ptrtmp != ptrtmp_limit && *ptrtmp) { uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp; - if (skip == 0) { - if ((c & 0xfffffc00) == 0xd800) { - skip = 1; // lead surrogate - } else if ((c & 0xfffffc00) == 0xdc00) { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 - } else { - skip = 0; + + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c)); + decode_error = true; } - str_size++; - } else { - if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate - --skip; + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + str_size--; } else { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 + print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c)); + decode_error = true; } + skip = false; + } else { + skip = false; } + c_prev = c; + str_size++; cstr_size++; ptrtmp++; } if (skip) { - _UNICERROR("no space left"); - return true; // not enough space + print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev)); + decode_error = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + bool skip = false; + uint32_t c_prev = 0; while (cstr_size) { - int len = 0; uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16; - if ((c & 0xfffffc00) == 0xd800) { - len = 2; + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + *(dst++) = c_prev; // unpaired, store as is + } + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair + } else { + *(dst++) = c; // unpaired, store as is + } + skip = false; } else { - len = 1; - } - - if (len > cstr_size) { - _UNICERROR("no space left"); - return true; //not enough space + *(dst++) = c; + skip = false; } - uint32_t unichar = 0; - if (len == 1) { - unichar = c; - } else { - uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1]; - unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000); - } + cstr_size--; + p_utf16++; + c_prev = c; + } - *(dst++) = unichar; - cstr_size -= len; - p_utf16 += len; + if (skip) { + *(dst++) = c_prev; } - return false; -#undef _UNICERROR + if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } Char16String String::utf16() const { @@ -2054,15 +2044,14 @@ Char16String String::utf16() const { uint32_t c = d[i]; if (c <= 0xffff) { // 16 bits. fl += 1; + if ((c & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", c)); + } } else if (c <= 0x10ffff) { // 32 bits. fl += 2; } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true); + fl += 1; } } @@ -2081,9 +2070,11 @@ Char16String String::utf16() const { if (c <= 0xffff) { // 16 bits. APPEND_CHAR(c); - } else { // 32 bits. + } else if (c <= 0x10ffff) { // 32 bits. APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate. APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); hex *= 16; hex += n; s++; @@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); binary *= 2; binary += n; s++; @@ -2217,7 +2208,7 @@ int64_t String::to_int() const { char32_t c = operator[](i); if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) { char c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) { wchar_t c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2308,7 +2299,7 @@ bool String::is_numeric() const { return false; } dot = true; - } else if (c < '0' || c > '9') { + } else if (!is_digit(c)) { return false; } } @@ -2317,28 +2308,33 @@ bool String::is_numeric() const { } template <class C> -static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number, - * optionally preceded by white space. Must - * have form "-I.FE-X", where I is the integer - * part of the mantissa, F is the fractional - * part of the mantissa, and X is the - * exponent. Either of the signs may be "+", - * "-", or omitted. Either I or F may be - * omitted, or both. The decimal point isn't - * necessary unless F is present. The "E" may - * actually be an "e". E and X may both be - * omitted (but not just one). */ - C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's - * address here. */ -{ - static const int maxExponent = 511; /* Largest possible base 10 exponent. Any - * exponent larger than this will already - * produce underflow or overflow, so there's - * no need to worry about additional digits. - */ - static const double powersOf10[] = { /* Table giving binary powers of 10. Entry */ - 10., /* is 10^2^i. Used to convert decimal */ - 100., /* exponents into floating-point numbers. */ +static double built_in_strtod( + /* A decimal ASCII floating-point number, + * optionally preceded by white space. Must + * have form "-I.FE-X", where I is the integer + * part of the mantissa, F is the fractional + * part of the mantissa, and X is the + * exponent. Either of the signs may be "+", + * "-", or omitted. Either I or F may be + * omitted, or both. The decimal point isn't + * necessary unless F is present. The "E" may + * actually be an "e". E and X may both be + * omitted (but not just one). */ + const C *string, + /* If non-nullptr, store terminating Cacter's + * address here. */ + C **endPtr = nullptr) { + /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. */ + static const int maxExponent = 511; + /* Table giving binary powers of 10. Entry + * is 10^2^i. Used to convert decimal + * exponents into floating-point numbers. */ + static const double powersOf10[] = { + 10., + 100., 1.0e4, 1.0e8, 1.0e16, @@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point const double *d; const C *p; int c; - int exp = 0; /* Exponent read from "EX" field. */ - int fracExp = 0; /* Exponent that derives from the fractional - * part. Under normal circumstances, it is - * the negative of the number of digits in F. - * However, if I is very long, the last digits - * of I get dropped (otherwise a long I with a - * large negative exponent could cause an - * unnecessary overflow on I alone). In this - * case, fracExp is incremented one for each - * dropped digit. */ - int mantSize; /* Number of digits in mantissa. */ - int decPt; /* Number of mantissa digits BEFORE decimal - * point. */ - const C *pExp; /* Temporarily holds location of exponent in - * string. */ + /* Exponent read from "EX" field. */ + int exp = 0; + /* Exponent that derives from the fractional + * part. Under normal circumstances, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int fracExp = 0; + /* Number of digits in mantissa. */ + int mantSize; + /* Number of mantissa digits BEFORE decimal point. */ + int decPt; + /* Temporarily holds location of exponent in string. */ + const C *pExp; /* - * Strip off leading blanks and check for a sign. - */ + * Strip off leading blanks and check for a sign. + */ p = string; while (*p == ' ' || *p == '\t' || *p == '\n') { @@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Count the number of digits in the mantissa (including the decimal - * point), and also locate the decimal point. - */ + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ decPt = -1; for (mantSize = 0;; mantSize += 1) { @@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Now suck up the digits in the mantissa. Use two integers to collect 9 - * digits each (this is faster than using floating-point). If the mantissa - * has more than 18 digits, ignore the extras, since they can't affect the - * value anyway. - */ + * Now suck up the digits in the mantissa. Use two integers to collect 9 + * digits each (this is faster than using floating-point). If the mantissa + * has more than 18 digits, ignore the extras, since they can't affect the + * value anyway. + */ pExp = p; p -= mantSize; @@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Skim off the exponent. - */ + * Skim off the exponent. + */ p = pExp; if ((*p == 'E') || (*p == 'e')) { @@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Generate a floating-point number that represents the exponent. Do this - * by processing the exponent one bit at a time to combine many powers of - * 2 of 10. Then combine the exponent with the fraction. - */ + * Generate a floating-point number that represents the exponent. Do this + * by processing the exponent one bit at a time to combine many powers of + * 2 of 10. Then combine the exponent with the fraction. + */ if (exp < 0) { expSign = true; @@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { return INT64_MIN; } } else { - ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); } } integer *= 10; @@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const { return _base_is_subsequence_of(p_string, false); } -bool String::is_subsequence_ofi(const String &p_string) const { +bool String::is_subsequence_ofn(const String &p_string) const { return _base_is_subsequence_of(p_string, true); } @@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const { String String::repeat(int p_count) const { ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number."); - String new_string; - const char32_t *src = this->get_data(); - - new_string.resize(length() * p_count + 1); - new_string[length() * p_count] = 0; - - for (int i = 0; i < p_count; i++) { - for (int j = 0; j < length(); j++) { - new_string[i * length() + j] = src[j]; - } - } - + int len = length(); + String new_string = *this; + new_string.resize(p_count * len + 1); + + char32_t *dst = new_string.ptrw(); + int offset = 1; + int stride = 1; + while (offset < p_count) { + memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t)); + offset += stride; + stride = MIN(stride * 2, p_count - offset); + } + dst[p_count * len] = _null; return new_string; } -String String::left(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::left(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(0, p_pos); + return substr(0, p_len); } -String String::right(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::right(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(length() - p_pos); + return substr(length() - p_len); } char32_t String::unicode_at(int p_idx) const { @@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const { return operator[](p_idx); } +String String::indent(const String &p_prefix) const { + String new_string; + int line_start = 0; + + for (int i = 0; i < length(); i++) { + const char32_t c = operator[](i); + if (c == '\n') { + if (i == line_start) { + new_string += c; // Leave empty lines empty. + } else { + new_string += p_prefix + substr(line_start, i - line_start + 1); + } + line_start = i + 1; + } + } + if (line_start != length()) { + new_string += p_prefix + substr(line_start); + } + return new_string; +} + String String::dedent() const { String new_string; String indent; @@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const { return substr(0, end + 1); } +bool String::is_network_share_path() const { + return begins_with("//") || begins_with("\\\\"); +} + String String::simplify_path() const { String s = *this; String drive; @@ -3637,6 +3662,9 @@ String String::simplify_path() const { } else if (s.begins_with("user://")) { drive = "user://"; s = s.substr(7, s.length()); + } else if (is_network_share_path()) { + drive = s.substr(0, 2); + s = s.substr(2, s.length() - 2); } else if (s.begins_with("/") || s.begins_with("\\")) { drive = s.substr(0, 1); s = s.substr(1, s.length() - 1); @@ -3665,15 +3693,15 @@ String String::simplify_path() const { for (int i = 0; i < dirs.size(); i++) { String d = dirs[i]; if (d == ".") { - dirs.remove(i); + dirs.remove_at(i); i--; } else if (d == "..") { if (i == 0) { - dirs.remove(i); + dirs.remove_at(i); i--; } else { - dirs.remove(i); - dirs.remove(i - 1); + dirs.remove_at(i); + dirs.remove_at(i - 1); i -= 2; } } @@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const { } } +static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) { + if (p_index == 0 && is_digit(p_char)) { + return false; // No start with number plz. + } + return is_ascii_identifier_char(p_char); +} + +String String::validate_identifier() const { + if (is_empty()) { + return "_"; // Empty string is not a valid identifier; + } + + String result = *this; + int len = result.length(); + char32_t *buffer = result.ptrw(); + + for (int i = 0; i < len; i++) { + if (!_is_valid_identifier_bit(i, buffer[i])) { + buffer[i] = '_'; + } + } + + return result; +} + bool String::is_valid_identifier() const { int len = length(); @@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const { const char32_t *str = &operator[](0); for (int i = 0; i < len; i++) { - if (i == 0) { - if (is_digit(str[0])) { - return false; // no start with number plz - } - } - - bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_'; - - if (!valid_char) { + if (!_is_valid_identifier_bit(i, str[i])) { return false; } } @@ -3775,18 +3820,15 @@ String String::uri_encode() const { const CharString temp = utf8(); String res; for (int i = 0; i < temp.length(); ++i) { - char ord = temp[i]; - if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) { + uint8_t ord = temp[i]; + if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) { res += ord; } else { - char h_Val[3]; -#if defined(__GNUC__) || defined(_MSC_VER) - snprintf(h_Val, 3, "%02hhX", ord); -#else - sprintf(h_Val, "%02hhX", ord); -#endif - res += "%"; - res += h_Val; + char p[4] = { '%', 0, 0, 0 }; + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + p[1] = hex[ord >> 4]; + p[2] = hex[ord & 0xF]; + res += p; } } return res; @@ -3798,9 +3840,9 @@ String String::uri_decode() const { for (int i = 0; i < src.length(); ++i) { if (src[i] == '%' && i + 2 < src.length()) { char ord1 = src[i + 1]; - if (is_digit(ord1) || is_upper_case(ord1)) { + if (is_digit(ord1) || is_ascii_upper_case(ord1)) { char ord2 = src[i + 2]; - if (is_digit(ord2) || is_upper_case(ord2)) { + if (is_digit(ord2) || is_ascii_upper_case(ord2)) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; @@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch for (int i = 2; i < p_src_len; i++) { eat = i + 1; char32_t ct = p_src[i]; - if (ct == ';' || ct < '0' || ct > '9') { + if (ct == ';' || !is_digit(ct)) { break; } } @@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const { int begin = 0; - while (begin < end && (s[begin] < '0' || s[begin] > '9')) { + while (begin < end && !is_digit(s[begin])) { begin++; } @@ -4102,7 +4144,7 @@ bool String::is_valid_int() const { } for (int i = from; i < len; i++) { - if (operator[](i) < '0' || operator[](i) > '9') { + if (!is_digit(operator[](i))) { return false; // no start with number plz } } @@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const { dst += "/"; } - String base; - if (src.begins_with("res://") && dst.begins_with("res://")) { - base = "res:/"; src = src.replace("res://", "/"); dst = dst.replace("res://", "/"); } else if (src.begins_with("user://") && dst.begins_with("user://")) { - base = "user:/"; src = src.replace("user://", "/"); dst = dst.replace("user://", "/"); @@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const { return p_path; //impossible to do this } - base = src_begin; src = src.substr(src_begin.length(), src.length()); dst = dst.substr(dst_begin.length(), dst.length()); } @@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const { return false; } - if (stripped == String()) { + if (stripped.is_empty()) { return false; } @@ -4338,13 +4375,13 @@ bool String::is_relative_path() const { String String::get_base_dir() const { int end = 0; - // url scheme style base + // URL scheme style base. int basepos = find("://"); if (basepos != -1) { end = basepos + 3; } - // windows top level directory base + // Windows top level directory base. if (end == 0) { basepos = find(":/"); if (basepos == -1) { @@ -4355,7 +4392,24 @@ String String::get_base_dir() const { } } - // unix root directory base + // Windows UNC network share path. + if (end == 0) { + if (is_network_share_path()) { + basepos = find("/", 2); + if (basepos == -1) { + basepos = find("\\", 2); + } + int servpos = find("/", basepos + 1); + if (servpos == -1) { + servpos = find("\\", basepos + 1); + } + if (servpos != -1) { + end = servpos + 1; + } + } + } + + // Unix root directory base. if (end == 0) { if (begins_with("/")) { end = 1; @@ -4412,7 +4466,7 @@ String String::property_name_encode() const { // as well as '"', '=' or ' ' (32) const char32_t *cstr = get_data(); for (int i = 0; cstr[i]; i++) { - if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) { + if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) { return "\"" + c_escape_multiline() + "\""; } } @@ -4421,7 +4475,7 @@ String String::property_name_encode() const { } // Changes made to the set of invalid characters must also be reflected in the String documentation. -const String String::invalid_node_name_characters = ". : @ / \""; +const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX; String String::validate_node_name() const { Vector<String> chars = String::invalid_node_name_characters.split(" "); @@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const { int min_chars = 0; int min_decimals = 0; bool in_decimals = false; - bool pad_with_zeroes = false; + bool pad_with_zeros = false; bool left_justified = false; bool show_sign = false; @@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const { } // Sign. - if (show_sign && value >= 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+"); - } else if (value < 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-"); + if (show_sign || value < 0) { + String sign_char = value < 0 ? "-" : "+"; + if (left_justified) { + str = str.insert(0, sign_char); + } else { + str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char); + } } formatted += str; @@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { - if (pad_with_zeroes) { - return "left justification cannot be used with zeros as the padding"; - } else { - str = str.rpad(pad_chars_count, pad_char); - } + str = str.rpad(pad_chars_count, pad_char); } else { str = str.lpad(pad_chars_count, pad_char); } @@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const { if (left_justified) { str = str.insert(0, sign_char); } else { - str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char); + str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char); } } @@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const { min_decimals += n; } else { if (c == '0' && min_chars == 0) { - pad_with_zeroes = true; + if (left_justified) { + WARN_PRINT("'0' flag ignored with '-' flag in string format"); + } else { + pad_with_zeros = true; + } } else { min_chars *= 10; min_chars += n; @@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const { // Back to defaults: min_chars = 0; min_decimals = 6; - pad_with_zeroes = false; + pad_with_zeros = false; left_justified = false; show_sign = false; in_decimals = false; @@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const { } #ifdef TOOLS_ENABLED +/** + * "Tools TRanslate". Performs string replacement for internationalization + * within the editor. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `TTRN()` instead of `TTR()`. + * + * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTR()` instead. + */ String TTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate(p_text, p_context); @@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Tools TRanslate for N items". Performs string replacement for + * internationalization within the editor. A translation context can optionally + * be specified to disambiguate between identical source strings in + * translations. Use `TTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTRN()` instead. + */ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); @@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St return p_text_plural; } +/** + * "Docs TRanslate". Used for the editor class reference documentation, + * handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTR(const String &p_text, const String &p_context) { // Comes straight from the XML, so remove indentation and any trailing whitespace. const String text = p_text.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate(text, p_context); + return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } +/** + * "Docs TRanslate for N items". Used for the editor class reference documentation + * (with support for plurals), handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { const String text = p_text.dedent().strip_edges(); const String text_plural = p_text_plural.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context); + return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } // Return message based on English plural rule if translation is not possible. if (p_n == 1) { - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } - return text_plural; + return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL); } #endif +/** + * "Run-time TRanslate". Performs string replacement for internationalization + * within a running project. The translation string must be supplied by the + * project, as Godot does not provide built-in translations for `RTR()` strings + * to keep binary size low. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `RTRN()` instead of `RTR()`. + * + * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTR()` instead. + */ String RTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context); - if (rtr == String() || rtr == p_text) { + if (rtr.is_empty() || rtr == p_text) { return TranslationServer::get_singleton()->translate(p_text, p_context); } else { return rtr; @@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Run-time TRanslate for N items". Performs string replacement for + * internationalization within a running project. The translation string must be + * supplied by the project, as Godot does not provide built-in translations for + * `RTRN()` strings to keep binary size low. A translation context can + * optionally be specified to disambiguate between identical source strings in + * translations. Use `RTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTRN()` instead. + */ String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); - if (rtr == String() || rtr == p_text || rtr == p_text_plural) { + if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) { return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context); } else { return rtr; diff --git a/core/string/ustring.h b/core/string/ustring.h index 1d80ccf58d..6c3169f136 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -28,10 +28,12 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ +// Note: _GODOT suffix added to avoid conflict with ICU header with the same guard. + #ifndef USTRING_GODOT_H #define USTRING_GODOT_H -// Note: Renamed to avoid conflict with ICU header with the same name. +#include "core/string/char_utils.h" #include "core/templates/cowdata.h" #include "core/templates/vector.h" #include "core/typedefs.h" @@ -108,13 +110,10 @@ public: _FORCE_INLINE_ Char16String() {} _FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); } - _FORCE_INLINE_ Char16String &operator=(const Char16String &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); } _FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); } - Char16String &operator=(const char16_t *p_cstr); + void operator=(const char16_t *p_cstr); bool operator<(const Char16String &p_right) const; Char16String &operator+=(char16_t p_char); int length() const { return size() ? size() - 1 : 0; } @@ -152,13 +151,10 @@ public: _FORCE_INLINE_ CharString() {} _FORCE_INLINE_ CharString(const CharString &p_str) { _cowdata._ref(p_str._cowdata); } - _FORCE_INLINE_ CharString &operator=(const CharString &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const CharString &p_str) { _cowdata._ref(p_str._cowdata); } _FORCE_INLINE_ CharString(const char *p_cstr) { copy_from(p_cstr); } - CharString &operator=(const char *p_cstr); + void operator=(const char *p_cstr); bool operator<(const CharString &p_right) const; CharString &operator+=(char p_char); int length() const { return size() ? size() - 1 : 0; } @@ -209,7 +205,7 @@ public: _FORCE_INLINE_ char32_t *ptrw() { return _cowdata.ptrw(); } _FORCE_INLINE_ const char32_t *ptr() const { return _cowdata.ptr(); } - void remove(int p_index) { _cowdata.remove(p_index); } + void remove_at(int p_index) { _cowdata.remove_at(p_index); } _FORCE_INLINE_ void clear() { resize(0); } @@ -230,6 +226,7 @@ public: bool operator==(const String &p_str) const; bool operator!=(const String &p_str) const; String operator+(const String &p_str) const; + String operator+(char32_t p_char) const; String &operator+=(const String &); String &operator+=(char32_t p_char); @@ -275,6 +272,9 @@ public: bool is_valid_string() const; + /* debug, error messages */ + void print_unicode_error(const String &p_message, bool p_critical = false) const; + /* complex helpers */ String substr(int p_from, int p_chars = -1) const; int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed @@ -291,7 +291,7 @@ public: bool ends_with(const String &p_string) const; bool is_enclosed_in(const String &p_string) const; bool is_subsequence_of(const String &p_string) const; - bool is_subsequence_ofi(const String &p_string) const; + bool is_subsequence_ofn(const String &p_string) const; bool is_quoted() const; Vector<String> bigrams() const; float similarity(const String &p_string) const; @@ -360,8 +360,9 @@ public: int count(const String &p_string, int p_from = 0, int p_to = 0) const; int countn(const String &p_string, int p_from = 0, int p_to = 0) const; - String left(int p_pos) const; - String right(int p_pos) const; + String left(int p_len) const; + String right(int p_len) const; + String indent(const String &p_prefix) const; String dedent() const; String strip_edges(bool left = true, bool right = true) const; String strip_escapes() const; @@ -376,11 +377,11 @@ public: CharString ascii(bool p_allow_extended = false) const; CharString utf8() const; - bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error + Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false); static String utf8(const char *p_utf8, int p_len = -1); Char16String utf16() const; - bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error + Error parse_utf16(const char16_t *p_utf16, int p_len = -1); static String utf16(const char16_t *p_utf16, int p_len = -1); static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */ @@ -399,6 +400,8 @@ public: Vector<uint8_t> sha256_buffer() const; _FORCE_INLINE_ bool is_empty() const { return length() == 0; } + _FORCE_INLINE_ bool contains(const char *p_str) const { return find(p_str) != -1; } + _FORCE_INLINE_ bool contains(const String &p_str) const { return find(p_str) != -1; } // path functions bool is_absolute_path() const; @@ -410,6 +413,7 @@ public: String get_file() const; static String humanize_size(uint64_t p_size); String simplify_path() const; + bool is_network_share_path() const; String xml_escape(bool p_escape_quotes = false) const; String xml_unescape() const; @@ -427,6 +431,7 @@ public: // node functions static const String invalid_node_name_characters; String validate_node_name() const; + String validate_identifier() const; bool is_valid_identifier() const; bool is_valid_int() const; @@ -442,11 +447,7 @@ public: _FORCE_INLINE_ String() {} _FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); } - - String &operator=(const String &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const String &p_str) { _cowdata._ref(p_str._cowdata); } Vector<uint8_t> to_ascii_buffer() const; Vector<uint8_t> to_utf8_buffer() const; @@ -527,19 +528,24 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St #define TTRGET(m_value) TTR(m_value) #else -#define TTR(m_value) String() -#define TTRN(m_value) String() -#define DTR(m_value) String() -#define DTRN(m_value) String() #define TTRC(m_value) (m_value) #define TTRGET(m_value) (m_value) #endif +// Use this to mark property names for editor translation. +// Often for dynamic properties defined in _get_property_list(). +// Property names defined directly inside EDITOR_DEF, GLOBAL_DEF, and ADD_PROPERTY macros don't need this. +#define PNAME(m_value) (m_value) + +// Similar to PNAME, but to mark groups, i.e. properties with PROPERTY_USAGE_GROUP. +// Groups defined directly inside ADD_GROUP macros don't need this. +// The arguments are the same as ADD_GROUP. m_prefix is only used for extraction. +#define GNAME(m_value, m_prefix) (m_value) + // Runtime translate for the public node API. String RTR(const String &p_text, const String &p_context = ""); String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = ""); -bool is_symbol(char32_t c); bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end); _FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) { |