diff options
Diffstat (limited to 'core/string')
-rw-r--r-- | core/string/char_utils.h | 92 | ||||
-rw-r--r-- | core/string/locales.h | 1197 | ||||
-rw-r--r-- | core/string/node_path.cpp | 6 | ||||
-rw-r--r-- | core/string/node_path.h | 4 | ||||
-rw-r--r-- | core/string/optimized_translation.cpp | 4 | ||||
-rw-r--r-- | core/string/optimized_translation.h | 6 | ||||
-rw-r--r-- | core/string/print_string.cpp | 4 | ||||
-rw-r--r-- | core/string/print_string.h | 4 | ||||
-rw-r--r-- | core/string/string_buffer.h | 4 | ||||
-rw-r--r-- | core/string/string_builder.cpp | 6 | ||||
-rw-r--r-- | core/string/string_builder.h | 4 | ||||
-rw-r--r-- | core/string/string_name.cpp | 39 | ||||
-rw-r--r-- | core/string/string_name.h | 16 | ||||
-rw-r--r-- | core/string/translation.cpp | 1241 | ||||
-rw-r--r-- | core/string/translation.h | 40 | ||||
-rw-r--r-- | core/string/translation_po.cpp | 4 | ||||
-rw-r--r-- | core/string/translation_po.h | 4 | ||||
-rw-r--r-- | core/string/ucaps.h | 4 | ||||
-rw-r--r-- | core/string/ustring.cpp | 345 | ||||
-rw-r--r-- | core/string/ustring.h | 32 |
20 files changed, 1841 insertions, 1215 deletions
diff --git a/core/string/char_utils.h b/core/string/char_utils.h new file mode 100644 index 0000000000..0afd058f01 --- /dev/null +++ b/core/string/char_utils.h @@ -0,0 +1,92 @@ +/*************************************************************************/ +/* char_utils.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CHAR_UTILS_H +#define CHAR_UTILS_H + +#include "core/typedefs.h" + +static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) { + return (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) { + return (c >= 'a' && c <= 'z'); +} + +static _FORCE_INLINE_ bool is_digit(char32_t c) { + return (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { + return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); +} + +static _FORCE_INLINE_ bool is_binary_digit(char32_t c) { + return (c == '0' || c == '1'); +} + +static _FORCE_INLINE_ bool is_ascii_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +static _FORCE_INLINE_ bool is_symbol(char32_t c) { + return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); +} + +static _FORCE_INLINE_ bool is_control(char32_t p_char) { + return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); +} + +static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) { + return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); +} + +static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) { + return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); +} + +static _FORCE_INLINE_ bool is_punct(char32_t p_char) { + return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); +} + +static _FORCE_INLINE_ bool is_underscore(char32_t p_char) { + return (p_char == '_'); +} + +#endif // CHAR_UTILS_H diff --git a/core/string/locales.h b/core/string/locales.h new file mode 100644 index 0000000000..32d6608ec2 --- /dev/null +++ b/core/string/locales.h @@ -0,0 +1,1197 @@ +/*************************************************************************/ +/* locales.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef LOCALES_H +#define LOCALES_H + +// Windows has some weird locale identifiers which do not honor the ISO 639-1 +// standardized nomenclature. Whenever those don't conflict with existing ISO +// identifiers, we override them. +// +// Reference: +// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx + +static const char *locale_renames[][2] = { + { "in", "id" }, // Indonesian + { "iw", "he" }, // Hebrew + { "no", "nb" }, // Norwegian Bokmål + { "C", "en" }, // Locale is not set, fallback to English. + { nullptr, nullptr } +}; + +// Additional script information to preferred scripts. +// Language code, script code, default country, supported countries. +// Reference: +// - https://lh.2xlibre.net/locales/ +// - https://www.localeplanet.com/icu/index.html +// - https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f + +static const char *locale_scripts[][4] = { + { "az", "Latn", "", "AZ" }, + { "az", "Arab", "", "IR" }, + { "bs", "Latn", "", "BA" }, + { "ff", "Latn", "", "BF,CM,GH,GM,GN,GW,LR,MR,NE,NG,SL,SN" }, + { "pa", "Arab", "PK", "PK" }, + { "pa", "Guru", "IN", "IN" }, + { "sd", "Arab", "PK", "PK" }, + { "sd", "Deva", "IN", "IN" }, + { "shi", "Tfng", "", "MA" }, + { "sr", "Cyrl", "", "BA,RS,XK" }, + { "sr", "Latn", "", "ME" }, + { "uz", "Latn", "", "UZ" }, + { "uz", "Arab", "AF", "AF" }, + { "vai", "Vaii", "", "LR" }, + { "yue", "Hans", "CN", "CN" }, + { "yue", "Hant", "HK", "HK" }, + { "zh", "Hans", "CN", "CN,SG" }, + { "zh", "Hant", "TW", "HK,MO,TW" }, + { nullptr, nullptr, nullptr, nullptr } +}; + +// Additional mapping for outdated, temporary or exceptionally reserved country codes. +// Reference: +// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 +// - https://www.iso.org/obp/ui/#search/code/ + +static const char *country_renames[][2] = { + { "BU", "MM" }, // Burma, name changed to Myanmar. + { "KV", "XK" }, // Kosovo (temporary FIPS code to European Commission code), no official ISO code assigned. + { "TP", "TL" }, // East Timor, name changed to Timor-Leste. + { "UK", "GB" }, // United Kingdom, exceptionally reserved code. + { nullptr, nullptr } +}; + +// Country code, country name. +// Reference: +// - https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 +// - https://www.iso.org/obp/ui/#search/code/ + +static const char *country_names[][2] = { + { "AC", "Ascension Island" }, // Exceptionally reserved. + { "AD", "Andorra" }, + { "AE", "United Arab Emirates" }, + { "AF", "Afghanistan" }, + { "AG", "Antigua and Barbuda" }, + { "AI", "Anguilla" }, + { "AL", "Albania" }, + { "AM", "Armenia" }, + { "AN", "Netherlands Antilles" }, // Transitionally reserved, divided into BQ, CW and SX. + { "AO", "Angola" }, + { "AQ", "Antarctica" }, + { "AR", "Argentina" }, + { "AS", "American Samoa" }, + { "AT", "Austria" }, + { "AU", "Australia" }, + { "AW", "Aruba" }, + { "AX", "Åland Islands" }, + { "AZ", "Azerbaijan" }, + { "BA", "Bosnia and Herzegovina" }, + { "BB", "Barbados" }, + { "BD", "Bangladesh" }, + { "BE", "Belgium" }, + { "BF", "Burkina Faso" }, + { "BG", "Bulgaria" }, + { "BH", "Bahrain" }, + { "BI", "Burundi" }, + { "BJ", "Benin" }, + { "BL", "St. Barthélemy" }, + { "BM", "Bermuda" }, + { "BN", "Brunei" }, + { "BO", "Bolivia" }, + { "BQ", "Caribbean Netherlands" }, + { "BR", "Brazil" }, + { "BS", "Bahamas" }, + { "BT", "Bhutan" }, + { "BV", "Bouvet Island" }, + { "BW", "Botswana" }, + { "BY", "Belarus" }, + { "BZ", "Belize" }, + { "CA", "Canada" }, + { "CC", "Cocos (Keeling) Islands" }, + { "CD", "Congo - Kinshasa" }, + { "CF", "Central African Republic" }, + { "CG", "Congo - Brazzaville" }, + { "CH", "Switzerland" }, + { "CI", "Côte d'Ivoire" }, + { "CK", "Cook Islands" }, + { "CL", "Chile" }, + { "CM", "Cameroon" }, + { "CN", "China" }, + { "CO", "Colombia" }, + { "CP", "Clipperton Island" }, // Exceptionally reserved. + { "CR", "Costa Rica" }, + { "CQ", "Island of Sark" }, // Exceptionally reserved. + { "CU", "Cuba" }, + { "CV", "Cabo Verde" }, + { "CW", "Curaçao" }, + { "CX", "Christmas Island" }, + { "CY", "Cyprus" }, + { "CZ", "Czechia" }, + { "DE", "Germany" }, + { "DG", "Diego Garcia" }, // Exceptionally reserved. + { "DJ", "Djibouti" }, + { "DK", "Denmark" }, + { "DM", "Dominica" }, + { "DO", "Dominican Republic" }, + { "DZ", "Algeria" }, + { "EA", "Ceuta and Melilla" }, // Exceptionally reserved. + { "EC", "Ecuador" }, + { "EE", "Estonia" }, + { "EG", "Egypt" }, + { "EH", "Western Sahara" }, + { "ER", "Eritrea" }, + { "ES", "Spain" }, + { "ET", "Ethiopia" }, + { "EU", "European Union" }, // Exceptionally reserved. + { "EZ", "Eurozone" }, // Exceptionally reserved. + { "FI", "Finland" }, + { "FJ", "Fiji" }, + { "FK", "Falkland Islands" }, + { "FM", "Micronesia" }, + { "FO", "Faroe Islands" }, + { "FR", "France" }, + { "FX", "France, Metropolitan" }, // Exceptionally reserved. + { "GA", "Gabon" }, + { "GB", "United Kingdom" }, + { "GD", "Grenada" }, + { "GE", "Georgia" }, + { "GF", "French Guiana" }, + { "GG", "Guernsey" }, + { "GH", "Ghana" }, + { "GI", "Gibraltar" }, + { "GL", "Greenland" }, + { "GM", "Gambia" }, + { "GN", "Guinea" }, + { "GP", "Guadeloupe" }, + { "GQ", "Equatorial Guinea" }, + { "GR", "Greece" }, + { "GS", "South Georgia and South Sandwich Islands" }, + { "GT", "Guatemala" }, + { "GU", "Guam" }, + { "GW", "Guinea-Bissau" }, + { "GY", "Guyana" }, + { "HK", "Hong Kong" }, + { "HM", "Heard Island and McDonald Islands" }, + { "HN", "Honduras" }, + { "HR", "Croatia" }, + { "HT", "Haiti" }, + { "HU", "Hungary" }, + { "IC", "Canary Islands" }, // Exceptionally reserved. + { "ID", "Indonesia" }, + { "IE", "Ireland" }, + { "IL", "Israel" }, + { "IM", "Isle of Man" }, + { "IN", "India" }, + { "IO", "British Indian Ocean Territory" }, + { "IQ", "Iraq" }, + { "IR", "Iran" }, + { "IS", "Iceland" }, + { "IT", "Italy" }, + { "JE", "Jersey" }, + { "JM", "Jamaica" }, + { "JO", "Jordan" }, + { "JP", "Japan" }, + { "KE", "Kenya" }, + { "KG", "Kyrgyzstan" }, + { "KH", "Cambodia" }, + { "KI", "Kiribati" }, + { "KM", "Comoros" }, + { "KN", "St. Kitts and Nevis" }, + { "KP", "North Korea" }, + { "KR", "South Korea" }, + { "KW", "Kuwait" }, + { "KY", "Cayman Islands" }, + { "KZ", "Kazakhstan" }, + { "LA", "Laos" }, + { "LB", "Lebanon" }, + { "LC", "St. Lucia" }, + { "LI", "Liechtenstein" }, + { "LK", "Sri Lanka" }, + { "LR", "Liberia" }, + { "LS", "Lesotho" }, + { "LT", "Lithuania" }, + { "LU", "Luxembourg" }, + { "LV", "Latvia" }, + { "LY", "Libya" }, + { "MA", "Morocco" }, + { "MC", "Monaco" }, + { "MD", "Moldova" }, + { "ME", "Montenegro" }, + { "MF", "St. Martin" }, + { "MG", "Madagascar" }, + { "MH", "Marshall Islands" }, + { "MK", "North Macedonia" }, + { "ML", "Mali" }, + { "MM", "Myanmar" }, + { "MN", "Mongolia" }, + { "MO", "Macao" }, + { "MP", "Northern Mariana Islands" }, + { "MQ", "Martinique" }, + { "MR", "Mauritania" }, + { "MS", "Montserrat" }, + { "MT", "Malta" }, + { "MU", "Mauritius" }, + { "MV", "Maldives" }, + { "MW", "Malawi" }, + { "MX", "Mexico" }, + { "MY", "Malaysia" }, + { "MZ", "Mozambique" }, + { "NA", "Namibia" }, + { "NC", "New Caledonia" }, + { "NE", "Niger" }, + { "NF", "Norfolk Island" }, + { "NG", "Nigeria" }, + { "NI", "Nicaragua" }, + { "NL", "Netherlands" }, + { "NO", "Norway" }, + { "NP", "Nepal" }, + { "NR", "Nauru" }, + { "NU", "Niue" }, + { "NZ", "New Zealand" }, + { "OM", "Oman" }, + { "PA", "Panama" }, + { "PE", "Peru" }, + { "PF", "French Polynesia" }, + { "PG", "Papua New Guinea" }, + { "PH", "Philippines" }, + { "PK", "Pakistan" }, + { "PL", "Poland" }, + { "PM", "St. Pierre and Miquelon" }, + { "PN", "Pitcairn Islands" }, + { "PR", "Puerto Rico" }, + { "PS", "Palestine" }, + { "PT", "Portugal" }, + { "PW", "Palau" }, + { "PY", "Paraguay" }, + { "QA", "Qatar" }, + { "RE", "Réunion" }, + { "RO", "Romania" }, + { "RS", "Serbia" }, + { "RU", "Russia" }, + { "RW", "Rwanda" }, + { "SA", "Saudi Arabia" }, + { "SB", "Solomon Islands" }, + { "SC", "Seychelles" }, + { "SD", "Sudan" }, + { "SE", "Sweden" }, + { "SG", "Singapore" }, + { "SH", "St. Helena, Ascension and Tristan da Cunha" }, + { "SI", "Slovenia" }, + { "SJ", "Svalbard and Jan Mayen" }, + { "SK", "Slovakia" }, + { "SL", "Sierra Leone" }, + { "SM", "San Marino" }, + { "SN", "Senegal" }, + { "SO", "Somalia" }, + { "SR", "Suriname" }, + { "SS", "South Sudan" }, + { "ST", "Sao Tome and Principe" }, + { "SV", "El Salvador" }, + { "SX", "Sint Maarten" }, + { "SY", "Syria" }, + { "SZ", "Eswatini" }, + { "TA", "Tristan da Cunha" }, // Exceptionally reserved. + { "TC", "Turks and Caicos Islands" }, + { "TD", "Chad" }, + { "TF", "French Southern Territories" }, + { "TG", "Togo" }, + { "TH", "Thailand" }, + { "TJ", "Tajikistan" }, + { "TK", "Tokelau" }, + { "TL", "Timor-Leste" }, + { "TM", "Turkmenistan" }, + { "TN", "Tunisia" }, + { "TO", "Tonga" }, + { "TR", "Turkey" }, + { "TT", "Trinidad and Tobago" }, + { "TV", "Tuvalu" }, + { "TW", "Taiwan" }, + { "TZ", "Tanzania" }, + { "UA", "Ukraine" }, + { "UG", "Uganda" }, + { "UM", "U.S. Outlying Islands" }, + { "US", "United States of America" }, + { "UY", "Uruguay" }, + { "UZ", "Uzbekistan" }, + { "VA", "Holy See" }, + { "VC", "St. Vincent and the Grenadines" }, + { "VE", "Venezuela" }, + { "VG", "British Virgin Islands" }, + { "VI", "U.S. Virgin Islands" }, + { "VN", "Viet Nam" }, + { "VU", "Vanuatu" }, + { "WF", "Wallis and Futuna" }, + { "WS", "Samoa" }, + { "XK", "Kosovo" }, // Temporary code, no official ISO code assigned. + { "YE", "Yemen" }, + { "YT", "Mayotte" }, + { "ZA", "South Africa" }, + { "ZM", "Zambia" }, + { "ZW", "Zimbabwe" }, + { nullptr, nullptr } +}; + +// Languages code, language name. +// Reference: +// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes +// - https://www.localeplanet.com/icu/index.html +// - https://lh.2xlibre.net/locales/ + +static const char *language_list[][2] = { + { "aa", "Afar" }, + { "ab", "Abkhazian" }, + { "ace", "Achinese" }, + { "ach", "Acoli" }, + { "ada", "Adangme" }, + { "ady", "Adyghe" }, + { "ae", "Avestan" }, + { "aeb", "Tunisian Arabic" }, + { "af", "Afrikaans" }, + { "afh", "Afrihili" }, + { "agq", "Aghem" }, + { "ain", "Ainu" }, + { "agr", "Aguaruna" }, + { "ak", "Akan" }, + { "akk", "Akkadian" }, + { "akz", "Alabama" }, + { "ale", "Aleut" }, + { "aln", "Gheg Albanian" }, + { "alt", "Southern Altai" }, + { "am", "Amharic" }, + { "an", "Aragonese" }, + { "ang", "Old English" }, + { "anp", "Angika" }, + { "ar", "Arabic" }, + { "arc", "Aramaic" }, + { "arn", "Mapudungun" }, + { "aro", "Araona" }, + { "arp", "Arapaho" }, + { "arq", "Algerian Arabic" }, + { "ars", "Najdi Arabic" }, + { "arw", "Arawak" }, + { "ary", "Moroccan Arabic" }, + { "arz", "Egyptian Arabic" }, + { "as", "Assamese" }, + { "asa", "Asu" }, + { "ase", "American Sign Language" }, + { "ast", "Asturian" }, + { "av", "Avaric" }, + { "avk", "Kotava" }, + { "awa", "Awadhi" }, + { "ayc", "Southern Aymara" }, + { "ay", "Aymara" }, + { "az", "Azerbaijani" }, + { "ba", "Bashkir" }, + { "bal", "Baluchi" }, + { "ban", "Balinese" }, + { "bar", "Bavarian" }, + { "bas", "Bassa" }, + { "bax", "Bamun" }, + { "bbc", "Batak Toba" }, + { "bbj", "Ghomala" }, + { "be", "Belarusian" }, + { "bej", "Beja" }, + { "bem", "Bemba" }, + { "ber", "Berber" }, + { "bew", "Betawi" }, + { "bez", "Bena" }, + { "bfd", "Bafut" }, + { "bfq", "Badaga" }, + { "bg", "Bulgarian" }, + { "bhb", "Bhili" }, + { "bgn", "Western Balochi" }, + { "bho", "Bhojpuri" }, + { "bi", "Bislama" }, + { "bik", "Bikol" }, + { "bin", "Bini" }, + { "bjn", "Banjar" }, + { "bkm", "Kom" }, + { "bla", "Siksika" }, + { "bm", "Bambara" }, + { "bn", "Bengali" }, + { "bo", "Tibetan" }, + { "bpy", "Bishnupriya" }, + { "bqi", "Bakhtiari" }, + { "br", "Breton" }, + { "brh", "Brahui" }, + { "brx", "Bodo" }, + { "bs", "Bosnian" }, + { "bss", "Akoose" }, + { "bua", "Buriat" }, + { "bug", "Buginese" }, + { "bum", "Bulu" }, + { "byn", "Bilin" }, + { "byv", "Medumba" }, + { "ca", "Catalan" }, + { "cad", "Caddo" }, + { "car", "Carib" }, + { "cay", "Cayuga" }, + { "cch", "Atsam" }, + { "ccp", "Chakma" }, + { "ce", "Chechen" }, + { "ceb", "Cebuano" }, + { "cgg", "Chiga" }, + { "ch", "Chamorro" }, + { "chb", "Chibcha" }, + { "chg", "Chagatai" }, + { "chk", "Chuukese" }, + { "chm", "Mari" }, + { "chn", "Chinook Jargon" }, + { "cho", "Choctaw" }, + { "chp", "Chipewyan" }, + { "chr", "Cherokee" }, + { "chy", "Cheyenne" }, + { "cic", "Chickasaw" }, + { "ckb", "Central Kurdish" }, + { "csb", "Kashubian" }, + { "cmn", "Mandarin Chinese" }, + { "co", "Corsican" }, + { "cop", "Coptic" }, + { "cps", "Capiznon" }, + { "cr", "Cree" }, + { "crh", "Crimean Tatar" }, + { "crs", "Seselwa Creole French" }, + { "cs", "Czech" }, + { "csb", "Kashubian" }, + { "cu", "Church Slavic" }, + { "cv", "Chuvash" }, + { "cy", "Welsh" }, + { "da", "Danish" }, + { "dak", "Dakota" }, + { "dar", "Dargwa" }, + { "dav", "Taita" }, + { "de", "German" }, + { "del", "Delaware" }, + { "den", "Slave" }, + { "dgr", "Dogrib" }, + { "din", "Dinka" }, + { "dje", "Zarma" }, + { "doi", "Dogri" }, + { "dsb", "Lower Sorbian" }, + { "dtp", "Central Dusun" }, + { "dua", "Duala" }, + { "dum", "Middle Dutch" }, + { "dv", "Dhivehi" }, + { "dyo", "Jola-Fonyi" }, + { "dyu", "Dyula" }, + { "dz", "Dzongkha" }, + { "dzg", "Dazaga" }, + { "ebu", "Embu" }, + { "ee", "Ewe" }, + { "efi", "Efik" }, + { "egl", "Emilian" }, + { "egy", "Ancient Egyptian" }, + { "eka", "Ekajuk" }, + { "el", "Greek" }, + { "elx", "Elamite" }, + { "en", "English" }, + { "enm", "Middle English" }, + { "eo", "Esperanto" }, + { "es", "Spanish" }, + { "esu", "Central Yupik" }, + { "et", "Estonian" }, + { "eu", "Basque" }, + { "ewo", "Ewondo" }, + { "ext", "Extremaduran" }, + { "fa", "Persian" }, + { "fan", "Fang" }, + { "fat", "Fanti" }, + { "ff", "Fulah" }, + { "fi", "Finnish" }, + { "fil", "Filipino" }, + { "fit", "Tornedalen Finnish" }, + { "fj", "Fijian" }, + { "fo", "Faroese" }, + { "fon", "Fon" }, + { "fr", "French" }, + { "frc", "Cajun French" }, + { "frm", "Middle French" }, + { "fro", "Old French" }, + { "frp", "Arpitan" }, + { "frr", "Northern Frisian" }, + { "frs", "Eastern Frisian" }, + { "fur", "Friulian" }, + { "fy", "Western Frisian" }, + { "ga", "Irish" }, + { "gaa", "Ga" }, + { "gag", "Gagauz" }, + { "gan", "Gan Chinese" }, + { "gay", "Gayo" }, + { "gba", "Gbaya" }, + { "gbz", "Zoroastrian Dari" }, + { "gd", "Scottish Gaelic" }, + { "gez", "Geez" }, + { "gil", "Gilbertese" }, + { "gl", "Galician" }, + { "glk", "Gilaki" }, + { "gmh", "Middle High German" }, + { "gn", "Guarani" }, + { "goh", "Old High German" }, + { "gom", "Goan Konkani" }, + { "gon", "Gondi" }, + { "gor", "Gorontalo" }, + { "got", "Gothic" }, + { "grb", "Grebo" }, + { "grc", "Ancient Greek" }, + { "gsw", "Swiss German" }, + { "gu", "Gujarati" }, + { "guc", "Wayuu" }, + { "gur", "Frafra" }, + { "guz", "Gusii" }, + { "gv", "Manx" }, + { "gwi", "Gwichʼin" }, + { "ha", "Hausa" }, + { "hai", "Haida" }, + { "hak", "Hakka Chinese" }, + { "haw", "Hawaiian" }, + { "he", "Hebrew" }, + { "hi", "Hindi" }, + { "hif", "Fiji Hindi" }, + { "hil", "Hiligaynon" }, + { "hit", "Hittite" }, + { "hmn", "Hmong" }, + { "ho", "Hiri Motu" }, + { "hne", "Chhattisgarhi" }, + { "hr", "Croatian" }, + { "hsb", "Upper Sorbian" }, + { "hsn", "Xiang Chinese" }, + { "ht", "Haitian" }, + { "hu", "Hungarian" }, + { "hup", "Hupa" }, + { "hus", "Huastec" }, + { "hy", "Armenian" }, + { "hz", "Herero" }, + { "ia", "Interlingua" }, + { "iba", "Iban" }, + { "ibb", "Ibibio" }, + { "id", "Indonesian" }, + { "ie", "Interlingue" }, + { "ig", "Igbo" }, + { "ii", "Sichuan Yi" }, + { "ik", "Inupiaq" }, + { "ilo", "Iloko" }, + { "inh", "Ingush" }, + { "io", "Ido" }, + { "is", "Icelandic" }, + { "it", "Italian" }, + { "iu", "Inuktitut" }, + { "izh", "Ingrian" }, + { "ja", "Japanese" }, + { "jam", "Jamaican Creole English" }, + { "jbo", "Lojban" }, + { "jgo", "Ngomba" }, + { "jmc", "Machame" }, + { "jpr", "Judeo-Persian" }, + { "jrb", "Judeo-Arabic" }, + { "jut", "Jutish" }, + { "jv", "Javanese" }, + { "ka", "Georgian" }, + { "kaa", "Kara-Kalpak" }, + { "kab", "Kabyle" }, + { "kac", "Kachin" }, + { "kaj", "Jju" }, + { "kam", "Kamba" }, + { "kaw", "Kawi" }, + { "kbd", "Kabardian" }, + { "kbl", "Kanembu" }, + { "kcg", "Tyap" }, + { "kde", "Makonde" }, + { "kea", "Kabuverdianu" }, + { "ken", "Kenyang" }, + { "kfo", "Koro" }, + { "kg", "Kongo" }, + { "kgp", "Kaingang" }, + { "kha", "Khasi" }, + { "kho", "Khotanese" }, + { "khq", "Koyra Chiini" }, + { "khw", "Khowar" }, + { "ki", "Kikuyu" }, + { "kiu", "Kirmanjki" }, + { "kj", "Kuanyama" }, + { "kk", "Kazakh" }, + { "kkj", "Kako" }, + { "kl", "Kalaallisut" }, + { "kln", "Kalenjin" }, + { "km", "Central Khmer" }, + { "kmb", "Kimbundu" }, + { "kn", "Kannada" }, + { "ko", "Korean" }, + { "koi", "Komi-Permyak" }, + { "kok", "Konkani" }, + { "kos", "Kosraean" }, + { "kpe", "Kpelle" }, + { "kr", "Kanuri" }, + { "krc", "Karachay-Balkar" }, + { "kri", "Krio" }, + { "krj", "Kinaray-a" }, + { "krl", "Karelian" }, + { "kru", "Kurukh" }, + { "ks", "Kashmiri" }, + { "ksb", "Shambala" }, + { "ksf", "Bafia" }, + { "ksh", "Colognian" }, + { "ku", "Kurdish" }, + { "kum", "Kumyk" }, + { "kut", "Kutenai" }, + { "kv", "Komi" }, + { "kw", "Cornish" }, + { "ky", "Kirghiz" }, + { "lag", "Langi" }, + { "la", "Latin" }, + { "lad", "Ladino" }, + { "lag", "Langi" }, + { "lah", "Lahnda" }, + { "lam", "Lamba" }, + { "lb", "Luxembourgish" }, + { "lez", "Lezghian" }, + { "lfn", "Lingua Franca Nova" }, + { "lg", "Ganda" }, + { "li", "Limburgan" }, + { "lij", "Ligurian" }, + { "liv", "Livonian" }, + { "lkt", "Lakota" }, + { "lmo", "Lombard" }, + { "ln", "Lingala" }, + { "lo", "Lao" }, + { "lol", "Mongo" }, + { "lou", "Louisiana Creole" }, + { "loz", "Lozi" }, + { "lrc", "Northern Luri" }, + { "lt", "Lithuanian" }, + { "ltg", "Latgalian" }, + { "lu", "Luba-Katanga" }, + { "lua", "Luba-Lulua" }, + { "lui", "Luiseno" }, + { "lun", "Lunda" }, + { "luo", "Luo" }, + { "lus", "Mizo" }, + { "luy", "Luyia" }, + { "lv", "Latvian" }, + { "lzh", "Literary Chinese" }, + { "lzz", "Laz" }, + { "mad", "Madurese" }, + { "maf", "Mafa" }, + { "mag", "Magahi" }, + { "mai", "Maithili" }, + { "mak", "Makasar" }, + { "man", "Mandingo" }, + { "mas", "Masai" }, + { "mde", "Maba" }, + { "mdf", "Moksha" }, + { "mdr", "Mandar" }, + { "men", "Mende" }, + { "mer", "Meru" }, + { "mfe", "Morisyen" }, + { "mg", "Malagasy" }, + { "mga", "Middle Irish" }, + { "mgh", "Makhuwa-Meetto" }, + { "mgo", "Metaʼ" }, + { "mh", "Marshallese" }, + { "mhr", "Eastern Mari" }, + { "mi", "Māori" }, + { "mic", "Mi'kmaq" }, + { "min", "Minangkabau" }, + { "miq", "Mískito" }, + { "mjw", "Karbi" }, + { "mk", "Macedonian" }, + { "ml", "Malayalam" }, + { "mn", "Mongolian" }, + { "mnc", "Manchu" }, + { "mni", "Manipuri" }, + { "mnw", "Mon" }, + { "mos", "Mossi" }, + { "moh", "Mohawk" }, + { "mr", "Marathi" }, + { "mrj", "Western Mari" }, + { "ms", "Malay" }, + { "mt", "Maltese" }, + { "mua", "Mundang" }, + { "mus", "Muscogee" }, + { "mwl", "Mirandese" }, + { "mwr", "Marwari" }, + { "mwv", "Mentawai" }, + { "my", "Burmese" }, + { "mye", "Myene" }, + { "myv", "Erzya" }, + { "mzn", "Mazanderani" }, + { "na", "Nauru" }, + { "nah", "Nahuatl" }, + { "nan", "Min Nan Chinese" }, + { "nap", "Neapolitan" }, + { "naq", "Nama" }, + { "nan", "Min Nan Chinese" }, + { "nb", "Norwegian Bokmål" }, + { "nd", "North Ndebele" }, + { "nds", "Low German" }, + { "ne", "Nepali" }, + { "new", "Newari" }, + { "nhn", "Central Nahuatl" }, + { "ng", "Ndonga" }, + { "nia", "Nias" }, + { "niu", "Niuean" }, + { "njo", "Ao Naga" }, + { "nl", "Dutch" }, + { "nmg", "Kwasio" }, + { "nn", "Norwegian Nynorsk" }, + { "nnh", "Ngiemboon" }, + { "nog", "Nogai" }, + { "non", "Old Norse" }, + { "nov", "Novial" }, + { "nqo", "N'ko" }, + { "nr", "South Ndebele" }, + { "nso", "Pedi" }, + { "nus", "Nuer" }, + { "nv", "Navajo" }, + { "nwc", "Classical Newari" }, + { "ny", "Nyanja" }, + { "nym", "Nyamwezi" }, + { "nyn", "Nyankole" }, + { "nyo", "Nyoro" }, + { "nzi", "Nzima" }, + { "oc", "Occitan" }, + { "oj", "Ojibwa" }, + { "om", "Oromo" }, + { "or", "Odia" }, + { "os", "Ossetic" }, + { "osa", "Osage" }, + { "ota", "Ottoman Turkish" }, + { "pa", "Panjabi" }, + { "pag", "Pangasinan" }, + { "pal", "Pahlavi" }, + { "pam", "Pampanga" }, + { "pap", "Papiamento" }, + { "pau", "Palauan" }, + { "pcd", "Picard" }, + { "pcm", "Nigerian Pidgin" }, + { "pdc", "Pennsylvania German" }, + { "pdt", "Plautdietsch" }, + { "peo", "Old Persian" }, + { "pfl", "Palatine German" }, + { "phn", "Phoenician" }, + { "pi", "Pali" }, + { "pl", "Polish" }, + { "pms", "Piedmontese" }, + { "pnt", "Pontic" }, + { "pon", "Pohnpeian" }, + { "pr", "Pirate" }, + { "prg", "Prussian" }, + { "pro", "Old Provençal" }, + { "prs", "Dari" }, + { "ps", "Pushto" }, + { "pt", "Portuguese" }, + { "qu", "Quechua" }, + { "quc", "K'iche" }, + { "qug", "Chimborazo Highland Quichua" }, + { "quy", "Ayacucho Quechua" }, + { "quz", "Cusco Quechua" }, + { "raj", "Rajasthani" }, + { "rap", "Rapanui" }, + { "rar", "Rarotongan" }, + { "rgn", "Romagnol" }, + { "rif", "Riffian" }, + { "rm", "Romansh" }, + { "rn", "Rundi" }, + { "ro", "Romanian" }, + { "rof", "Rombo" }, + { "rom", "Romany" }, + { "rtm", "Rotuman" }, + { "ru", "Russian" }, + { "rue", "Rusyn" }, + { "rug", "Roviana" }, + { "rup", "Aromanian" }, + { "rw", "Kinyarwanda" }, + { "rwk", "Rwa" }, + { "sa", "Sanskrit" }, + { "sad", "Sandawe" }, + { "sah", "Sakha" }, + { "sam", "Samaritan Aramaic" }, + { "saq", "Samburu" }, + { "sas", "Sasak" }, + { "sat", "Santali" }, + { "saz", "Saurashtra" }, + { "sba", "Ngambay" }, + { "sbp", "Sangu" }, + { "sc", "Sardinian" }, + { "scn", "Sicilian" }, + { "sco", "Scots" }, + { "sd", "Sindhi" }, + { "sdc", "Sassarese Sardinian" }, + { "sdh", "Southern Kurdish" }, + { "se", "Northern Sami" }, + { "see", "Seneca" }, + { "seh", "Sena" }, + { "sei", "Seri" }, + { "sel", "Selkup" }, + { "ses", "Koyraboro Senni" }, + { "sg", "Sango" }, + { "sga", "Old Irish" }, + { "sgs", "Samogitian" }, + { "sh", "Serbo-Croatian" }, + { "shi", "Tachelhit" }, + { "shn", "Shan" }, + { "shs", "Shuswap" }, + { "shu", "Chadian Arabic" }, + { "si", "Sinhala" }, + { "sid", "Sidamo" }, + { "sk", "Slovak" }, + { "sl", "Slovenian" }, + { "sli", "Lower Silesian" }, + { "sly", "Selayar" }, + { "sm", "Samoan" }, + { "sma", "Southern Sami" }, + { "smj", "Lule Sami" }, + { "smn", "Inari Sami" }, + { "sms", "Skolt Sami" }, + { "sn", "Shona" }, + { "snk", "Soninke" }, + { "so", "Somali" }, + { "sog", "Sogdien" }, + { "son", "Songhai" }, + { "sq", "Albanian" }, + { "sr", "Serbian" }, + { "srn", "Sranan Tongo" }, + { "srr", "Serer" }, + { "ss", "Swati" }, + { "ssy", "Saho" }, + { "st", "Southern Sotho" }, + { "stq", "Saterland Frisian" }, + { "su", "Sundanese" }, + { "suk", "Sukuma" }, + { "sus", "Susu" }, + { "sux", "Sumerian" }, + { "sv", "Swedish" }, + { "sw", "Swahili" }, + { "swb", "Comorian" }, + { "swc", "Congo Swahili" }, + { "syc", "Classical Syriac" }, + { "syr", "Syriac" }, + { "szl", "Silesian" }, + { "ta", "Tamil" }, + { "tcy", "Tulu" }, + { "te", "Telugu" }, + { "tem", "Timne" }, + { "teo", "Teso" }, + { "ter", "Tereno" }, + { "tet", "Tetum" }, + { "tg", "Tajik" }, + { "th", "Thai" }, + { "the", "Chitwania Tharu" }, + { "ti", "Tigrinya" }, + { "tig", "Tigre" }, + { "tiv", "Tiv" }, + { "tk", "Turkmen" }, + { "tkl", "Tokelau" }, + { "tkr", "Tsakhur" }, + { "tl", "Tagalog" }, + { "tlh", "Klingon" }, + { "tli", "Tlingit" }, + { "tly", "Talysh" }, + { "tmh", "Tamashek" }, + { "tn", "Tswana" }, + { "to", "Tongan" }, + { "tog", "Nyasa Tonga" }, + { "tpi", "Tok Pisin" }, + { "tr", "Turkish" }, + { "tru", "Turoyo" }, + { "trv", "Taroko" }, + { "ts", "Tsonga" }, + { "tsd", "Tsakonian" }, + { "tsi", "Tsimshian" }, + { "tt", "Tatar" }, + { "ttt", "Muslim Tat" }, + { "tum", "Tumbuka" }, + { "tvl", "Tuvalu" }, + { "tw", "Twi" }, + { "twq", "Tasawaq" }, + { "ty", "Tahitian" }, + { "tyv", "Tuvinian" }, + { "tzm", "Central Atlas Tamazight" }, + { "udm", "Udmurt" }, + { "ug", "Uyghur" }, + { "uga", "Ugaritic" }, + { "uk", "Ukrainian" }, + { "umb", "Umbundu" }, + { "unm", "Unami" }, + { "ur", "Urdu" }, + { "uz", "Uzbek" }, + { "vai", "Vai" }, + { "ve", "Venda" }, + { "vec", "Venetian" }, + { "vep", "Veps" }, + { "vi", "Vietnamese" }, + { "vls", "West Flemish" }, + { "vmf", "Main-Franconian" }, + { "vo", "Volapük" }, + { "vot", "Votic" }, + { "vro", "Võro" }, + { "vun", "Vunjo" }, + { "wa", "Walloon" }, + { "wae", "Walser" }, + { "wal", "Wolaytta" }, + { "war", "Waray" }, + { "was", "Washo" }, + { "wbp", "Warlpiri" }, + { "wo", "Wolof" }, + { "wuu", "Wu Chinese" }, + { "xal", "Kalmyk" }, + { "xh", "Xhosa" }, + { "xmf", "Mingrelian" }, + { "xog", "Soga" }, + { "yao", "Yao" }, + { "yap", "Yapese" }, + { "yav", "Yangben" }, + { "ybb", "Yemba" }, + { "yi", "Yiddish" }, + { "yo", "Yoruba" }, + { "yrl", "Nheengatu" }, + { "yue", "Yue Chinese" }, + { "yuw", "Papua New Guinea" }, + { "za", "Zhuang" }, + { "zap", "Zapotec" }, + { "zbl", "Blissymbols" }, + { "zea", "Zeelandic" }, + { "zen", "Zenaga" }, + { "zgh", "Standard Moroccan Tamazight" }, + { "zh", "Chinese" }, + { "zu", "Zulu" }, + { "zun", "Zuni" }, + { "zza", "Zaza" }, + { nullptr, nullptr } +}; + +// Additional regional variants. +// Variant name, supported languages. + +static const char *locale_variants[][2] = { + { "valencia", "ca" }, + { "iqtelif", "tt" }, + { "saaho", "aa" }, + { "tradnl", "es" }, + { nullptr, nullptr }, +}; + +// Script names and codes (excludes typographic variants, special codes, reserved codes and aliases for combined scripts). +// Reference: +// - https://en.wikipedia.org/wiki/ISO_15924 + +static const char *script_list[][2] = { + { "Adlam", "Adlm" }, + { "Afaka", "Afak" }, + { "Caucasian Albanian", "Aghb" }, + { "Ahom", "Ahom" }, + { "Arabic", "Arab" }, + { "Imperial Aramaic", "Armi" }, + { "Armenian", "Armn" }, + { "Avestan", "Avst" }, + { "Balinese", "Bali" }, + { "Bamum", "Bamu" }, + { "Bassa Vah", "Bass" }, + { "Batak", "Batk" }, + { "Bengali", "Beng" }, + { "Bhaiksuki", "Bhks" }, + { "Blissymbols", "Blis" }, + { "Bopomofo", "Bopo" }, + { "Brahmi", "Brah" }, + { "Braille", "Brai" }, + { "Buginese", "Bugi" }, + { "Buhid", "Buhd" }, + { "Chakma", "Cakm" }, + { "Unified Canadian Aboriginal", "Cans" }, + { "Carian", "Cari" }, + { "Cham", "Cham" }, + { "Cherokee", "Cher" }, + { "Chorasmian", "Chrs" }, + { "Cirth", "Cirt" }, + { "Coptic", "Copt" }, + { "Cypro-Minoan", "Cpmn" }, + { "Cypriot", "Cprt" }, + { "Cyrillic", "Cyrl" }, + { "Devanagari", "Deva" }, + { "Dives Akuru", "Diak" }, + { "Dogra", "Dogr" }, + { "Deseret", "Dsrt" }, + { "Duployan", "Dupl" }, + { "Egyptian demotic", "Egyd" }, + { "Egyptian hieratic", "Egyh" }, + { "Egyptian hieroglyphs", "Egyp" }, + { "Elbasan", "Elba" }, + { "Elymaic", "Elym" }, + { "Ethiopic", "Ethi" }, + { "Khutsuri", "Geok" }, + { "Georgian", "Geor" }, + { "Glagolitic", "Glag" }, + { "Gunjala Gondi", "Gong" }, + { "Masaram Gondi", "Gonm" }, + { "Gothic", "Goth" }, + { "Grantha", "Gran" }, + { "Greek", "Grek" }, + { "Gujarati", "Gujr" }, + { "Gurmukhi", "Guru" }, + { "Hangul", "Hang" }, + { "Han", "Hani" }, + { "Hanunoo", "Hano" }, + { "Simplified", "Hans" }, + { "Traditional", "Hant" }, + { "Hatran", "Hatr" }, + { "Hebrew", "Hebr" }, + { "Hiragana", "Hira" }, + { "Anatolian Hieroglyphs", "Hluw" }, + { "Pahawh Hmong", "Hmng" }, + { "Nyiakeng Puachue Hmong", "Hmnp" }, + { "Old Hungarian", "Hung" }, + { "Indus", "Inds" }, + { "Old Italic", "Ital" }, + { "Javanese", "Java" }, + { "Jurchen", "Jurc" }, + { "Kayah Li", "Kali" }, + { "Katakana", "Kana" }, + { "Kharoshthi", "Khar" }, + { "Khmer", "Khmr" }, + { "Khojki", "Khoj" }, + { "Khitan large script", "Kitl" }, + { "Khitan small script", "Kits" }, + { "Kannada", "Knda" }, + { "Kpelle", "Kpel" }, + { "Kaithi", "Kthi" }, + { "Tai Tham", "Lana" }, + { "Lao", "Laoo" }, + { "Latin", "Latn" }, + { "Leke", "Leke" }, + { "Lepcha", "Lepc" }, + { "Limbu", "Limb" }, + { "Linear A", "Lina" }, + { "Linear B", "Linb" }, + { "Lisu", "Lisu" }, + { "Loma", "Loma" }, + { "Lycian", "Lyci" }, + { "Lydian", "Lydi" }, + { "Mahajani", "Mahj" }, + { "Makasar", "Maka" }, + { "Mandaic", "Mand" }, + { "Manichaean", "Mani" }, + { "Marchen", "Marc" }, + { "Mayan Hieroglyphs", "Maya" }, + { "Medefaidrin", "Medf" }, + { "Mende Kikakui", "Mend" }, + { "Meroitic Cursive", "Merc" }, + { "Meroitic Hieroglyphs", "Mero" }, + { "Malayalam", "Mlym" }, + { "Modi", "Modi" }, + { "Mongolian", "Mong" }, + { "Moon", "Moon" }, + { "Mro", "Mroo" }, + { "Meitei Mayek", "Mtei" }, + { "Multani", "Mult" }, + { "Myanmar (Burmese)", "Mymr" }, + { "Nandinagari", "Nand" }, + { "Old North Arabian", "Narb" }, + { "Nabataean", "Nbat" }, + { "Newa", "Newa" }, + { "Naxi Dongba", "Nkdb" }, + { "Nakhi Geba", "Nkgb" }, + { "N'ko", "Nkoo" }, + { "Nüshu", "Nshu" }, + { "Ogham", "Ogam" }, + { "Ol Chiki", "Olck" }, + { "Old Turkic", "Orkh" }, + { "Oriya", "Orya" }, + { "Osage", "Osge" }, + { "Osmanya", "Osma" }, + { "Old Uyghur", "Ougr" }, + { "Palmyrene", "Palm" }, + { "Pau Cin Hau", "Pauc" }, + { "Proto-Cuneiform", "Pcun" }, + { "Proto-Elamite", "Pelm" }, + { "Old Permic", "Perm" }, + { "Phags-pa", "Phag" }, + { "Inscriptional Pahlavi", "Phli" }, + { "Psalter Pahlavi", "Phlp" }, + { "Book Pahlavi", "Phlv" }, + { "Phoenician", "Phnx" }, + { "Klingon", "Piqd" }, + { "Miao", "Plrd" }, + { "Inscriptional Parthian", "Prti" }, + { "Proto-Sinaitic", "Psin" }, + { "Ranjana", "Ranj" }, + { "Rejang", "Rjng" }, + { "Hanifi Rohingya", "Rohg" }, + { "Rongorongo", "Roro" }, + { "Runic", "Runr" }, + { "Samaritan", "Samr" }, + { "Sarati", "Sara" }, + { "Old South Arabian", "Sarb" }, + { "Saurashtra", "Saur" }, + { "SignWriting", "Sgnw" }, + { "Shavian", "Shaw" }, + { "Sharada", "Shrd" }, + { "Shuishu", "Shui" }, + { "Siddham", "Sidd" }, + { "Khudawadi", "Sind" }, + { "Sinhala", "Sinh" }, + { "Sogdian", "Sogd" }, + { "Old Sogdian", "Sogo" }, + { "Sora Sompeng", "Sora" }, + { "Soyombo", "Soyo" }, + { "Sundanese", "Sund" }, + { "Syloti Nagri", "Sylo" }, + { "Syriac", "Syrc" }, + { "Tagbanwa", "Tagb" }, + { "Takri", "Takr" }, + { "Tai Le", "Tale" }, + { "New Tai Lue", "Talu" }, + { "Tamil", "Taml" }, + { "Tangut", "Tang" }, + { "Tai Viet", "Tavt" }, + { "Telugu", "Telu" }, + { "Tengwar", "Teng" }, + { "Tifinagh", "Tfng" }, + { "Tagalog", "Tglg" }, + { "Thaana", "Thaa" }, + { "Thai", "Thai" }, + { "Tibetan", "Tibt" }, + { "Tirhuta", "Tirh" }, + { "Tangsa", "Tnsa" }, + { "Toto", "Toto" }, + { "Ugaritic", "Ugar" }, + { "Vai", "Vaii" }, + { "Visible Speech", "Visp" }, + { "Vithkuqi", "Vith" }, + { "Warang Citi", "Wara" }, + { "Wancho", "Wcho" }, + { "Woleai", "Wole" }, + { "Old Persian", "Xpeo" }, + { "Cuneiform", "Xsux" }, + { "Yezidi", "Yezi" }, + { "Yi", "Yiii" }, + { "Zanabazar Square", "Zanb" }, + { nullptr, nullptr } +}; + +#endif // LOCALES_H diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp index 7ab85ac9d0..238897c2b1 100644 --- a/core/string/node_path.cpp +++ b/core/string/node_path.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -368,7 +368,7 @@ NodePath::NodePath(const String &p_path) { for (int i = from; i <= path.length(); i++) { if (path[i] == ':' || path[i] == 0) { String str = path.substr(from, i - from); - if (str == "") { + if (str.is_empty()) { if (path[i] == 0) { continue; // Allow end-of-path : } diff --git a/core/string/node_path.h b/core/string/node_path.h index a277ab26fa..53976bd524 100644 --- a/core/string/node_path.h +++ b/core/string/node_path.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp index f8be564740..07b58f2418 100644 --- a/core/string/optimized_translation.cpp +++ b/core/string/optimized_translation.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h index bccf932383..f3dbfe8f5c 100644 --- a/core/string/optimized_translation.h +++ b/core/string/optimized_translation.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -38,7 +38,7 @@ class OptimizedTranslation : public Translation { //this translation uses a sort of modified perfect hash algorithm //it requires hashing strings twice and then does a binary search, - //so it's slower, but at the same time it has an extreemly high chance + //so it's slower, but at the same time it has an extremely high chance //of catching untranslated strings //load/store friendly types diff --git a/core/string/print_string.cpp b/core/string/print_string.cpp index adc218f597..97e119bcf6 100644 --- a/core/string/print_string.cpp +++ b/core/string/print_string.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/print_string.h b/core/string/print_string.h index 3cd170b68e..669d2ea316 100644 --- a/core/string/print_string.h +++ b/core/string/print_string.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/string_buffer.h b/core/string/string_buffer.h index 33897c3674..424952f786 100644 --- a/core/string/string_buffer.h +++ b/core/string/string_buffer.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/string_builder.cpp b/core/string/string_builder.cpp index 834c87c845..7359ff59e1 100644 --- a/core/string/string_builder.cpp +++ b/core/string/string_builder.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -33,7 +33,7 @@ #include <string.h> StringBuilder &StringBuilder::append(const String &p_string) { - if (p_string == String()) { + if (p_string.is_empty()) { return *this; } diff --git a/core/string/string_builder.h b/core/string/string_builder.h index 30ce2a06f7..897efa95ef 100644 --- a/core/string/string_builder.h +++ b/core/string/string_builder.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/string_name.cpp b/core/string/string_name.cpp index 9024f60dae..2e941b8037 100644 --- a/core/string/string_name.cpp +++ b/core/string/string_name.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -73,23 +73,38 @@ void StringName::cleanup() { d = d->next; } } - print_line("\nStringName Reference Ranking:\n"); + + print_line("\nStringName reference ranking (from most to least referenced):\n"); + data.sort_custom<DebugSortReferences>(); - for (int i = 0; i < MIN(100, data.size()); i++) { + int unreferenced_stringnames = 0; + int rarely_referenced_stringnames = 0; + for (int i = 0; i < data.size(); i++) { print_line(itos(i + 1) + ": " + data[i]->get_name() + " - " + itos(data[i]->debug_references)); + if (data[i]->debug_references == 0) { + unreferenced_stringnames += 1; + } else if (data[i]->debug_references < 5) { + rarely_referenced_stringnames += 1; + } } + + print_line(vformat("\nOut of %d StringNames, %d StringNames were never referenced during this run (0 times) (%.2f%%).", data.size(), unreferenced_stringnames, unreferenced_stringnames / float(data.size()) * 100)); + print_line(vformat("Out of %d StringNames, %d StringNames were rarely referenced during this run (1-4 times) (%.2f%%).", data.size(), rarely_referenced_stringnames, rarely_referenced_stringnames / float(data.size()) * 100)); } #endif int lost_strings = 0; for (int i = 0; i < STRING_TABLE_LEN; i++) { while (_table[i]) { _Data *d = _table[i]; - lost_strings++; - if (d->static_count.get() != d->refcount.get() && OS::get_singleton()->is_stdout_verbose()) { - if (d->cname) { - print_line("Orphan StringName: " + String(d->cname)); - } else { - print_line("Orphan StringName: " + String(d->name)); + if (d->static_count.get() != d->refcount.get()) { + lost_strings++; + + if (OS::get_singleton()->is_stdout_verbose()) { + if (d->cname) { + print_line("Orphan StringName: " + String(d->cname)); + } else { + print_line("Orphan StringName: " + String(d->name)); + } } } @@ -310,7 +325,7 @@ StringName::StringName(const String &p_name, bool p_static) { ERR_FAIL_COND(!configured); - if (p_name == String()) { + if (p_name.is_empty()) { return; } @@ -434,7 +449,7 @@ StringName StringName::search(const char32_t *p_name) { } StringName StringName::search(const String &p_name) { - ERR_FAIL_COND_V(p_name == "", StringName()); + ERR_FAIL_COND_V(p_name.is_empty(), StringName()); MutexLock lock(mutex); diff --git a/core/string/string_name.h b/core/string/string_name.h index ce7988744b..6f08d32981 100644 --- a/core/string/string_name.h +++ b/core/string/string_name.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -181,6 +181,18 @@ bool operator!=(const char *p_name, const StringName &p_string_name); StringName _scs_create(const char *p_chr, bool p_static = false); +/* + * The SNAME macro is used to speed up StringName creation, as it allows caching it after the first usage in a very efficient way. + * It should NOT be used everywhere, but instead in places where high performance is required and the creation of a StringName + * can be costly. Places where it should be used are: + * - Control::get_theme_*(<name> and Window::get_theme_*(<name> functions. + * - emit_signal(<name>,..) function + * - call_deferred(<name>,..) function + * - Comparisons to a StringName in overridden _set and _get methods. + * + * Use in places that can be called hundreds of times per frame (or more) is recommended, but this situation is very rare. If in doubt, do not use. + */ + #define SNAME(m_arg) ([]() -> const StringName & { static StringName sname = _scs_create(m_arg, true); return sname; })() #endif // STRING_NAME_H diff --git a/core/string/translation.cpp b/core/string/translation.cpp index b98aad9b58..c41828de05 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -33,793 +33,12 @@ #include "core/config/project_settings.h" #include "core/io/resource_loader.h" #include "core/os/os.h" +#include "core/string/locales.h" #ifdef TOOLS_ENABLED #include "main/main.h" #endif -// ISO 639-1 language codes (and a couple of three-letter ISO 639-2 codes), -// with the addition of glibc locales with their regional identifiers. -// This list must match the language names (in English) of locale_names. -// -// References: -// - https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes -// - https://lh.2xlibre.net/locales/ -// - https://iso639-3.sil.org/ - -static const char *locale_list[] = { - "aa", // Afar - "aa_DJ", // Afar (Djibouti) - "aa_ER", // Afar (Eritrea) - "aa_ET", // Afar (Ethiopia) - "af", // Afrikaans - "af_ZA", // Afrikaans (South Africa) - "agr_PE", // Aguaruna (Peru) - "ak_GH", // Akan (Ghana) - "am_ET", // Amharic (Ethiopia) - "an_ES", // Aragonese (Spain) - "anp_IN", // Angika (India) - "ar", // Arabic - "ar_AE", // Arabic (United Arab Emirates) - "ar_BH", // Arabic (Bahrain) - "ar_DZ", // Arabic (Algeria) - "ar_EG", // Arabic (Egypt) - "ar_IN", // Arabic (India) - "ar_IQ", // Arabic (Iraq) - "ar_JO", // Arabic (Jordan) - "ar_KW", // Arabic (Kuwait) - "ar_LB", // Arabic (Lebanon) - "ar_LY", // Arabic (Libya) - "ar_MA", // Arabic (Morocco) - "ar_OM", // Arabic (Oman) - "ar_QA", // Arabic (Qatar) - "ar_SA", // Arabic (Saudi Arabia) - "ar_SD", // Arabic (Sudan) - "ar_SS", // Arabic (South Soudan) - "ar_SY", // Arabic (Syria) - "ar_TN", // Arabic (Tunisia) - "ar_YE", // Arabic (Yemen) - "as_IN", // Assamese (India) - "ast_ES", // Asturian (Spain) - "ayc_PE", // Southern Aymara (Peru) - "ay_PE", // Aymara (Peru) - "az", // Azerbaijani - "az_AZ", // Azerbaijani (Azerbaijan) - "be", // Belarusian - "be_BY", // Belarusian (Belarus) - "bem_ZM", // Bemba (Zambia) - "ber_DZ", // Berber languages (Algeria) - "ber_MA", // Berber languages (Morocco) - "bg", // Bulgarian - "bg_BG", // Bulgarian (Bulgaria) - "bhb_IN", // Bhili (India) - "bho_IN", // Bhojpuri (India) - "bi_TV", // Bislama (Tuvalu) - "bn", // Bengali - "bn_BD", // Bengali (Bangladesh) - "bn_IN", // Bengali (India) - "bo", // Tibetan - "bo_CN", // Tibetan (China) - "bo_IN", // Tibetan (India) - "br", // Breton - "br_FR", // Breton (France) - "brx_IN", // Bodo (India) - "bs_BA", // Bosnian (Bosnia and Herzegovina) - "byn_ER", // Bilin (Eritrea) - "ca", // Catalan - "ca_AD", // Catalan (Andorra) - "ca_ES", // Catalan (Spain) - "ca_FR", // Catalan (France) - "ca_IT", // Catalan (Italy) - "ce_RU", // Chechen (Russia) - "chr_US", // Cherokee (United States) - "cmn_TW", // Mandarin Chinese (Taiwan) - "crh_UA", // Crimean Tatar (Ukraine) - "csb_PL", // Kashubian (Poland) - "cs", // Czech - "cs_CZ", // Czech (Czech Republic) - "cv_RU", // Chuvash (Russia) - "cy_GB", // Welsh (United Kingdom) - "da", // Danish - "da_DK", // Danish (Denmark) - "de", // German - "de_AT", // German (Austria) - "de_BE", // German (Belgium) - "de_CH", // German (Switzerland) - "de_DE", // German (Germany) - "de_IT", // German (Italy) - "de_LU", // German (Luxembourg) - "doi_IN", // Dogri (India) - "dv_MV", // Dhivehi (Maldives) - "dz_BT", // Dzongkha (Bhutan) - "el", // Greek - "el_CY", // Greek (Cyprus) - "el_GR", // Greek (Greece) - "en", // English - "en_AG", // English (Antigua and Barbuda) - "en_AU", // English (Australia) - "en_BW", // English (Botswana) - "en_CA", // English (Canada) - "en_DK", // English (Denmark) - "en_GB", // English (United Kingdom) - "en_HK", // English (Hong Kong) - "en_IE", // English (Ireland) - "en_IL", // English (Israel) - "en_IN", // English (India) - "en_NG", // English (Nigeria) - "en_NZ", // English (New Zealand) - "en_PH", // English (Philippines) - "en_SG", // English (Singapore) - "en_US", // English (United States) - "en_ZA", // English (South Africa) - "en_ZM", // English (Zambia) - "en_ZW", // English (Zimbabwe) - "eo", // Esperanto - "es", // Spanish - "es_AR", // Spanish (Argentina) - "es_BO", // Spanish (Bolivia) - "es_CL", // Spanish (Chile) - "es_CO", // Spanish (Colombia) - "es_CR", // Spanish (Costa Rica) - "es_CU", // Spanish (Cuba) - "es_DO", // Spanish (Dominican Republic) - "es_EC", // Spanish (Ecuador) - "es_ES", // Spanish (Spain) - "es_GT", // Spanish (Guatemala) - "es_HN", // Spanish (Honduras) - "es_MX", // Spanish (Mexico) - "es_NI", // Spanish (Nicaragua) - "es_PA", // Spanish (Panama) - "es_PE", // Spanish (Peru) - "es_PR", // Spanish (Puerto Rico) - "es_PY", // Spanish (Paraguay) - "es_SV", // Spanish (El Salvador) - "es_US", // Spanish (United States) - "es_UY", // Spanish (Uruguay) - "es_VE", // Spanish (Venezuela) - "et", // Estonian - "et_EE", // Estonian (Estonia) - "eu", // Basque - "eu_ES", // Basque (Spain) - "fa", // Persian - "fa_IR", // Persian (Iran) - "ff_SN", // Fulah (Senegal) - "fi", // Finnish - "fi_FI", // Finnish (Finland) - "fil", // Filipino - "fil_PH", // Filipino (Philippines) - "fo_FO", // Faroese (Faroe Islands) - "fr", // French - "fr_BE", // French (Belgium) - "fr_CA", // French (Canada) - "fr_CH", // French (Switzerland) - "fr_FR", // French (France) - "fr_LU", // French (Luxembourg) - "fur_IT", // Friulian (Italy) - "fy_DE", // Western Frisian (Germany) - "fy_NL", // Western Frisian (Netherlands) - "ga", // Irish - "ga_IE", // Irish (Ireland) - "gd_GB", // Scottish Gaelic (United Kingdom) - "gez_ER", // Geez (Eritrea) - "gez_ET", // Geez (Ethiopia) - "gl", // Galician - "gl_ES", // Galician (Spain) - "gu_IN", // Gujarati (India) - "gv_GB", // Manx (United Kingdom) - "hak_TW", // Hakka Chinese (Taiwan) - "ha_NG", // Hausa (Nigeria) - "he", // Hebrew - "he_IL", // Hebrew (Israel) - "hi", // Hindi - "hi_IN", // Hindi (India) - "hne_IN", // Chhattisgarhi (India) - "hr", // Croatian - "hr_HR", // Croatian (Croatia) - "hsb_DE", // Upper Sorbian (Germany) - "ht_HT", // Haitian (Haiti) - "hu", // Hungarian - "hu_HU", // Hungarian (Hungary) - "hus_MX", // Huastec (Mexico) - "hy_AM", // Armenian (Armenia) - "ia_FR", // Interlingua (France) - "id", // Indonesian - "id_ID", // Indonesian (Indonesia) - "ig_NG", // Igbo (Nigeria) - "ik_CA", // Inupiaq (Canada) - "is", // Icelandic - "is_IS", // Icelandic (Iceland) - "it", // Italian - "it_CH", // Italian (Switzerland) - "it_IT", // Italian (Italy) - "iu_CA", // Inuktitut (Canada) - "ja", // Japanese - "ja_JP", // Japanese (Japan) - "kab_DZ", // Kabyle (Algeria) - "ka", // Georgian - "ka_GE", // Georgian (Georgia) - "kk_KZ", // Kazakh (Kazakhstan) - "kl_GL", // Kalaallisut (Greenland) - "km", // Central Khmer - "km_KH", // Central Khmer (Cambodia) - "kn_IN", // Kannada (India) - "kok_IN", // Konkani (India) - "ko", // Korean - "ko_KR", // Korean (South Korea) - "ks_IN", // Kashmiri (India) - "ku", // Kurdish - "ku_TR", // Kurdish (Turkey) - "kw_GB", // Cornish (United Kingdom) - "ky_KG", // Kirghiz (Kyrgyzstan) - "lb_LU", // Luxembourgish (Luxembourg) - "lg_UG", // Ganda (Uganda) - "li_BE", // Limburgan (Belgium) - "li_NL", // Limburgan (Netherlands) - "lij_IT", // Ligurian (Italy) - "ln_CD", // Lingala (Congo) - "lo_LA", // Lao (Laos) - "lt", // Lithuanian - "lt_LT", // Lithuanian (Lithuania) - "lv", // Latvian - "lv_LV", // Latvian (Latvia) - "lzh_TW", // Literary Chinese (Taiwan) - "mag_IN", // Magahi (India) - "mai_IN", // Maithili (India) - "mg_MG", // Malagasy (Madagascar) - "mh_MH", // Marshallese (Marshall Islands) - "mhr_RU", // Eastern Mari (Russia) - "mi", // Māori - "mi_NZ", // Māori (New Zealand) - "miq_NI", // Mískito (Nicaragua) - "mk", // Macedonian - "mk_MK", // Macedonian (Macedonia) - "ml", // Malayalam - "ml_IN", // Malayalam (India) - "mni_IN", // Manipuri (India) - "mn_MN", // Mongolian (Mongolia) - "mr", // Marathi - "mr_IN", // Marathi (India) - "ms", // Malay - "ms_MY", // Malay (Malaysia) - "mt", // Maltese - "mt_MT", // Maltese (Malta) - "my_MM", // Burmese (Myanmar) - "myv_RU", // Erzya (Russia) - "nah_MX", // Nahuatl languages (Mexico) - "nan_TW", // Min Nan Chinese (Taiwan) - "nb", // Norwegian Bokmål - "nb_NO", // Norwegian Bokmål (Norway) - "nds_DE", // Low German (Germany) - "nds_NL", // Low German (Netherlands) - "ne_NP", // Nepali (Nepal) - "nhn_MX", // Central Nahuatl (Mexico) - "niu_NU", // Niuean (Niue) - "niu_NZ", // Niuean (New Zealand) - "nl", // Dutch - "nl_AW", // Dutch (Aruba) - "nl_BE", // Dutch (Belgium) - "nl_NL", // Dutch (Netherlands) - "nn", // Norwegian Nynorsk - "nn_NO", // Norwegian Nynorsk (Norway) - "nr_ZA", // South Ndebele (South Africa) - "nso_ZA", // Pedi (South Africa) - "oc_FR", // Occitan (France) - "om", // Oromo - "om_ET", // Oromo (Ethiopia) - "om_KE", // Oromo (Kenya) - "or", // Oriya - "or_IN", // Oriya (India) - "os_RU", // Ossetian (Russia) - "pa_IN", // Panjabi (India) - "pap", // Papiamento - "pap_AN", // Papiamento (Netherlands Antilles) - "pap_AW", // Papiamento (Aruba) - "pap_CW", // Papiamento (Curaçao) - "pa_PK", // Panjabi (Pakistan) - "pl", // Polish - "pl_PL", // Polish (Poland) - "pr", // Pirate - "ps_AF", // Pushto (Afghanistan) - "pt", // Portuguese - "pt_BR", // Portuguese (Brazil) - "pt_PT", // Portuguese (Portugal) - "quy_PE", // Ayacucho Quechua (Peru) - "quz_PE", // Cusco Quechua (Peru) - "raj_IN", // Rajasthani (India) - "ro", // Romanian - "ro_RO", // Romanian (Romania) - "ru", // Russian - "ru_RU", // Russian (Russia) - "ru_UA", // Russian (Ukraine) - "rw_RW", // Kinyarwanda (Rwanda) - "sa_IN", // Sanskrit (India) - "sat_IN", // Santali (India) - "sc_IT", // Sardinian (Italy) - "sco", // Scots - "sd_IN", // Sindhi (India) - "se_NO", // Northern Sami (Norway) - "sgs_LT", // Samogitian (Lithuania) - "shs_CA", // Shuswap (Canada) - "sid_ET", // Sidamo (Ethiopia) - "si", // Sinhala - "si_LK", // Sinhala (Sri Lanka) - "sk", // Slovak - "sk_SK", // Slovak (Slovakia) - "sl", // Slovenian - "sl_SI", // Slovenian (Slovenia) - "so", // Somali - "so_DJ", // Somali (Djibouti) - "so_ET", // Somali (Ethiopia) - "so_KE", // Somali (Kenya) - "so_SO", // Somali (Somalia) - "son_ML", // Songhai languages (Mali) - "sq", // Albanian - "sq_AL", // Albanian (Albania) - "sq_KV", // Albanian (Kosovo) - "sq_MK", // Albanian (Macedonia) - "sr", // Serbian - "sr_Cyrl", // Serbian (Cyrillic) - "sr_Latn", // Serbian (Latin) - "sr_ME", // Serbian (Montenegro) - "sr_RS", // Serbian (Serbia) - "ss_ZA", // Swati (South Africa) - "st_ZA", // Southern Sotho (South Africa) - "sv", // Swedish - "sv_FI", // Swedish (Finland) - "sv_SE", // Swedish (Sweden) - "sw_KE", // Swahili (Kenya) - "sw_TZ", // Swahili (Tanzania) - "szl_PL", // Silesian (Poland) - "ta", // Tamil - "ta_IN", // Tamil (India) - "ta_LK", // Tamil (Sri Lanka) - "tcy_IN", // Tulu (India) - "te", // Telugu - "te_IN", // Telugu (India) - "tg_TJ", // Tajik (Tajikistan) - "the_NP", // Chitwania Tharu (Nepal) - "th", // Thai - "th_TH", // Thai (Thailand) - "ti", // Tigrinya - "ti_ER", // Tigrinya (Eritrea) - "ti_ET", // Tigrinya (Ethiopia) - "tig_ER", // Tigre (Eritrea) - "tk_TM", // Turkmen (Turkmenistan) - "tl_PH", // Tagalog (Philippines) - "tn_ZA", // Tswana (South Africa) - "tr", // Turkish - "tr_CY", // Turkish (Cyprus) - "tr_TR", // Turkish (Turkey) - "ts_ZA", // Tsonga (South Africa) - "tt", // Tatar - "tt_RU", // Tatar (Russia) - "tzm", // Central Atlas Tamazight - "tzm_MA", // Central Atlas Tamazight (Marrocos) - "ug_CN", // Uighur (China) - "uk", // Ukrainian - "uk_UA", // Ukrainian (Ukraine) - "unm_US", // Unami (United States) - "ur", // Urdu - "ur_IN", // Urdu (India) - "ur_PK", // Urdu (Pakistan) - "uz", // Uzbek - "uz_UZ", // Uzbek (Uzbekistan) - "ve_ZA", // Venda (South Africa) - "vi", // Vietnamese - "vi_VN", // Vietnamese (Vietnam) - "wa_BE", // Walloon (Belgium) - "wae_CH", // Walser (Switzerland) - "wal_ET", // Wolaytta (Ethiopia) - "wo_SN", // Wolof (Senegal) - "xh_ZA", // Xhosa (South Africa) - "yi_US", // Yiddish (United States) - "yo_NG", // Yoruba (Nigeria) - "yue_HK", // Yue Chinese (Hong Kong) - "zh", // Chinese - "zh_CN", // Chinese (China) - "zh_HK", // Chinese (Hong Kong) - "zh_SG", // Chinese (Singapore) - "zh_TW", // Chinese (Taiwan) - "zu_ZA", // Zulu (South Africa) - nullptr -}; - -static const char *locale_names[] = { - "Afar", - "Afar (Djibouti)", - "Afar (Eritrea)", - "Afar (Ethiopia)", - "Afrikaans", - "Afrikaans (South Africa)", - "Aguaruna (Peru)", - "Akan (Ghana)", - "Amharic (Ethiopia)", - "Aragonese (Spain)", - "Angika (India)", - "Arabic", - "Arabic (United Arab Emirates)", - "Arabic (Bahrain)", - "Arabic (Algeria)", - "Arabic (Egypt)", - "Arabic (India)", - "Arabic (Iraq)", - "Arabic (Jordan)", - "Arabic (Kuwait)", - "Arabic (Lebanon)", - "Arabic (Libya)", - "Arabic (Morocco)", - "Arabic (Oman)", - "Arabic (Qatar)", - "Arabic (Saudi Arabia)", - "Arabic (Sudan)", - "Arabic (South Soudan)", - "Arabic (Syria)", - "Arabic (Tunisia)", - "Arabic (Yemen)", - "Assamese (India)", - "Asturian (Spain)", - "Southern Aymara (Peru)", - "Aymara (Peru)", - "Azerbaijani", - "Azerbaijani (Azerbaijan)", - "Belarusian", - "Belarusian (Belarus)", - "Bemba (Zambia)", - "Berber languages (Algeria)", - "Berber languages (Morocco)", - "Bulgarian", - "Bulgarian (Bulgaria)", - "Bhili (India)", - "Bhojpuri (India)", - "Bislama (Tuvalu)", - "Bengali", - "Bengali (Bangladesh)", - "Bengali (India)", - "Tibetan", - "Tibetan (China)", - "Tibetan (India)", - "Breton", - "Breton (France)", - "Bodo (India)", - "Bosnian (Bosnia and Herzegovina)", - "Bilin (Eritrea)", - "Catalan", - "Catalan (Andorra)", - "Catalan (Spain)", - "Catalan (France)", - "Catalan (Italy)", - "Chechen (Russia)", - "Cherokee (United States)", - "Mandarin Chinese (Taiwan)", - "Crimean Tatar (Ukraine)", - "Kashubian (Poland)", - "Czech", - "Czech (Czech Republic)", - "Chuvash (Russia)", - "Welsh (United Kingdom)", - "Danish", - "Danish (Denmark)", - "German", - "German (Austria)", - "German (Belgium)", - "German (Switzerland)", - "German (Germany)", - "German (Italy)", - "German (Luxembourg)", - "Dogri (India)", - "Dhivehi (Maldives)", - "Dzongkha (Bhutan)", - "Greek", - "Greek (Cyprus)", - "Greek (Greece)", - "English", - "English (Antigua and Barbuda)", - "English (Australia)", - "English (Botswana)", - "English (Canada)", - "English (Denmark)", - "English (United Kingdom)", - "English (Hong Kong)", - "English (Ireland)", - "English (Israel)", - "English (India)", - "English (Nigeria)", - "English (New Zealand)", - "English (Philippines)", - "English (Singapore)", - "English (United States)", - "English (South Africa)", - "English (Zambia)", - "English (Zimbabwe)", - "Esperanto", - "Spanish", - "Spanish (Argentina)", - "Spanish (Bolivia)", - "Spanish (Chile)", - "Spanish (Colombia)", - "Spanish (Costa Rica)", - "Spanish (Cuba)", - "Spanish (Dominican Republic)", - "Spanish (Ecuador)", - "Spanish (Spain)", - "Spanish (Guatemala)", - "Spanish (Honduras)", - "Spanish (Mexico)", - "Spanish (Nicaragua)", - "Spanish (Panama)", - "Spanish (Peru)", - "Spanish (Puerto Rico)", - "Spanish (Paraguay)", - "Spanish (El Salvador)", - "Spanish (United States)", - "Spanish (Uruguay)", - "Spanish (Venezuela)", - "Estonian", - "Estonian (Estonia)", - "Basque", - "Basque (Spain)", - "Persian", - "Persian (Iran)", - "Fulah (Senegal)", - "Finnish", - "Finnish (Finland)", - "Filipino", - "Filipino (Philippines)", - "Faroese (Faroe Islands)", - "French", - "French (Belgium)", - "French (Canada)", - "French (Switzerland)", - "French (France)", - "French (Luxembourg)", - "Friulian (Italy)", - "Western Frisian (Germany)", - "Western Frisian (Netherlands)", - "Irish", - "Irish (Ireland)", - "Scottish Gaelic (United Kingdom)", - "Geez (Eritrea)", - "Geez (Ethiopia)", - "Galician", - "Galician (Spain)", - "Gujarati (India)", - "Manx (United Kingdom)", - "Hakka Chinese (Taiwan)", - "Hausa (Nigeria)", - "Hebrew", - "Hebrew (Israel)", - "Hindi", - "Hindi (India)", - "Chhattisgarhi (India)", - "Croatian", - "Croatian (Croatia)", - "Upper Sorbian (Germany)", - "Haitian (Haiti)", - "Hungarian", - "Hungarian (Hungary)", - "Huastec (Mexico)", - "Armenian (Armenia)", - "Interlingua (France)", - "Indonesian", - "Indonesian (Indonesia)", - "Igbo (Nigeria)", - "Inupiaq (Canada)", - "Icelandic", - "Icelandic (Iceland)", - "Italian", - "Italian (Switzerland)", - "Italian (Italy)", - "Inuktitut (Canada)", - "Japanese", - "Japanese (Japan)", - "Kabyle (Algeria)", - "Georgian", - "Georgian (Georgia)", - "Kazakh (Kazakhstan)", - "Kalaallisut (Greenland)", - "Central Khmer", - "Central Khmer (Cambodia)", - "Kannada (India)", - "Konkani (India)", - "Korean", - "Korean (South Korea)", - "Kashmiri (India)", - "Kurdish", - "Kurdish (Turkey)", - "Cornish (United Kingdom)", - "Kirghiz (Kyrgyzstan)", - "Luxembourgish (Luxembourg)", - "Ganda (Uganda)", - "Limburgan (Belgium)", - "Limburgan (Netherlands)", - "Ligurian (Italy)", - "Lingala (Congo)", - "Lao (Laos)", - "Lithuanian", - "Lithuanian (Lithuania)", - "Latvian", - "Latvian (Latvia)", - "Literary Chinese (Taiwan)", - "Magahi (India)", - "Maithili (India)", - "Malagasy (Madagascar)", - "Marshallese (Marshall Islands)", - "Eastern Mari (Russia)", - "Māori", - "Māori (New Zealand)", - "Mískito (Nicaragua)", - "Macedonian", - "Macedonian (Macedonia)", - "Malayalam", - "Malayalam (India)", - "Manipuri (India)", - "Mongolian (Mongolia)", - "Marathi", - "Marathi (India)", - "Malay", - "Malay (Malaysia)", - "Maltese", - "Maltese (Malta)", - "Burmese (Myanmar)", - "Erzya (Russia)", - "Nahuatl languages (Mexico)", - "Min Nan Chinese (Taiwan)", - "Norwegian Bokmål", - "Norwegian Bokmål (Norway)", - "Low German (Germany)", - "Low German (Netherlands)", - "Nepali (Nepal)", - "Central Nahuatl (Mexico)", - "Niuean (Niue)", - "Niuean (New Zealand)", - "Dutch", - "Dutch (Aruba)", - "Dutch (Belgium)", - "Dutch (Netherlands)", - "Norwegian Nynorsk", - "Norwegian Nynorsk (Norway)", - "South Ndebele (South Africa)", - "Pedi (South Africa)", - "Occitan (France)", - "Oromo", - "Oromo (Ethiopia)", - "Oromo (Kenya)", - "Oriya", - "Oriya (India)", - "Ossetian (Russia)", - "Panjabi (India)", - "Papiamento", - "Papiamento (Netherlands Antilles)", - "Papiamento (Aruba)", - "Papiamento (Curaçao)", - "Panjabi (Pakistan)", - "Polish", - "Polish (Poland)", - "Pirate", - "Pushto (Afghanistan)", - "Portuguese", - "Portuguese (Brazil)", - "Portuguese (Portugal)", - "Ayacucho Quechua (Peru)", - "Cusco Quechua (Peru)", - "Rajasthani (India)", - "Romanian", - "Romanian (Romania)", - "Russian", - "Russian (Russia)", - "Russian (Ukraine)", - "Kinyarwanda (Rwanda)", - "Sanskrit (India)", - "Santali (India)", - "Sardinian (Italy)", - "Scots (Scotland)", - "Sindhi (India)", - "Northern Sami (Norway)", - "Samogitian (Lithuania)", - "Shuswap (Canada)", - "Sidamo (Ethiopia)", - "Sinhala", - "Sinhala (Sri Lanka)", - "Slovak", - "Slovak (Slovakia)", - "Slovenian", - "Slovenian (Slovenia)", - "Somali", - "Somali (Djibouti)", - "Somali (Ethiopia)", - "Somali (Kenya)", - "Somali (Somalia)", - "Songhai languages (Mali)", - "Albanian", - "Albanian (Albania)", - "Albanian (Kosovo)", - "Albanian (Macedonia)", - "Serbian", - "Serbian (Cyrillic)", - "Serbian (Latin)", - "Serbian (Montenegro)", - "Serbian (Serbia)", - "Swati (South Africa)", - "Southern Sotho (South Africa)", - "Swedish", - "Swedish (Finland)", - "Swedish (Sweden)", - "Swahili (Kenya)", - "Swahili (Tanzania)", - "Silesian (Poland)", - "Tamil", - "Tamil (India)", - "Tamil (Sri Lanka)", - "Tulu (India)", - "Telugu", - "Telugu (India)", - "Tajik (Tajikistan)", - "Chitwania Tharu (Nepal)", - "Thai", - "Thai (Thailand)", - "Tigrinya", - "Tigrinya (Eritrea)", - "Tigrinya (Ethiopia)", - "Tigre (Eritrea)", - "Turkmen (Turkmenistan)", - "Tagalog (Philippines)", - "Tswana (South Africa)", - "Turkish", - "Turkish (Cyprus)", - "Turkish (Turkey)", - "Tsonga (South Africa)", - "Tatar", - "Tatar (Russia)", - "Central Atlas Tamazight", - "Central Atlas Tamazight (Marrocos)", - "Uighur (China)", - "Ukrainian", - "Ukrainian (Ukraine)", - "Unami (United States)", - "Urdu", - "Urdu (India)", - "Urdu (Pakistan)", - "Uzbek", - "Uzbek (Uzbekistan)", - "Venda (South Africa)", - "Vietnamese", - "Vietnamese (Vietnam)", - "Walloon (Belgium)", - "Walser (Switzerland)", - "Wolaytta (Ethiopia)", - "Wolof (Senegal)", - "Xhosa (South Africa)", - "Yiddish (United States)", - "Yoruba (Nigeria)", - "Yue Chinese (Hong Kong)", - "Chinese", - "Chinese (China)", - "Chinese (Hong Kong)", - "Chinese (Singapore)", - "Chinese (Taiwan)", - "Zulu (South Africa)", - nullptr -}; - -// Windows has some weird locale identifiers which do not honor the ISO 639-1 -// standardized nomenclature. Whenever those don't conflict with existing ISO -// identifiers, we override them. -// -// Reference: -// - https://msdn.microsoft.com/en-us/library/windows/desktop/ms693062(v=vs.85).aspx - -static const char *locale_renames[][2] = { - { "in", "id" }, // Indonesian - { "iw", "he" }, // Hebrew - { "no", "nb" }, // Norwegian Bokmål - { "C", "en" }, // "C" is the simple/default/untranslated Computer locale. - // ASCII-only, English, no currency symbols. Godot treats this as "en". - // See https://unix.stackexchange.com/a/87763/164141 "The C locale is"... - { nullptr, nullptr } -}; - -/////////////////////////////////////////////// - Dictionary Translation::_get_messages() const { Dictionary d; for (const KeyValue<StringName, StringName> &E : translation_map) { @@ -849,17 +68,7 @@ void Translation::_set_messages(const Dictionary &p_messages) { } void Translation::set_locale(const String &p_locale) { - String univ_locale = TranslationServer::standardize_locale(p_locale); - - if (!TranslationServer::is_locale_valid(univ_locale)) { - String trimmed_locale = TranslationServer::get_language_code(univ_locale); - - ERR_FAIL_COND_MSG(!TranslationServer::is_locale_valid(trimmed_locale), "Invalid locale: " + trimmed_locale + "."); - - locale = trimmed_locale; - } else { - locale = univ_locale; - } + locale = TranslationServer::get_singleton()->standardize_locale(p_locale); if (OS::get_singleton()->get_main_loop() && TranslationServer::get_singleton()->get_loaded_locales().has(this)) { OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); @@ -1004,121 +213,306 @@ static _character_accent_pair _character_to_accented[] = { { 'z', U"ź" }, }; -bool TranslationServer::is_locale_valid(const String &p_locale) { - const char **ptr = locale_list; +Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info; - while (*ptr) { - if (*ptr == p_locale) { - return true; +Map<String, String> TranslationServer::language_map; +Map<String, String> TranslationServer::script_map; +Map<String, String> TranslationServer::locale_rename_map; +Map<String, String> TranslationServer::country_name_map; +Map<String, String> TranslationServer::variant_map; +Map<String, String> TranslationServer::country_rename_map; + +void TranslationServer::init_locale_info() { + // Init locale info. + language_map.clear(); + int idx = 0; + while (language_list[idx][0] != nullptr) { + language_map[language_list[idx][0]] = String::utf8(language_list[idx][1]); + idx++; + } + + // Init locale-script map. + locale_script_info.clear(); + idx = 0; + while (locale_scripts[idx][0] != nullptr) { + LocaleScriptInfo info; + info.name = locale_scripts[idx][0]; + info.script = locale_scripts[idx][1]; + info.default_country = locale_scripts[idx][2]; + Vector<String> supported_countries = String(locale_scripts[idx][3]).split(",", false); + for (int i = 0; i < supported_countries.size(); i++) { + info.supported_countries.insert(supported_countries[i]); } - ptr++; + locale_script_info.push_back(info); + idx++; } - return false; -} + // Init supported script list. + script_map.clear(); + idx = 0; + while (script_list[idx][0] != nullptr) { + script_map[script_list[idx][1]] = String::utf8(script_list[idx][0]); + idx++; + } -String TranslationServer::standardize_locale(const String &p_locale) { - // Replaces '-' with '_' for macOS Sierra-style locales - String univ_locale = p_locale.replace("-", "_"); + // Init regional variant map. + variant_map.clear(); + idx = 0; + while (locale_variants[idx][0] != nullptr) { + variant_map[locale_variants[idx][0]] = locale_variants[idx][1]; + idx++; + } - // Handles known non-ISO locale names used e.g. on Windows - int idx = 0; + // Init locale renames. + locale_rename_map.clear(); + idx = 0; while (locale_renames[idx][0] != nullptr) { - if (locale_renames[idx][0] == univ_locale) { - univ_locale = locale_renames[idx][1]; - break; + if (!String(locale_renames[idx][1]).is_empty()) { + locale_rename_map[locale_renames[idx][0]] = locale_renames[idx][1]; } idx++; } - return univ_locale; + // Init country names. + country_name_map.clear(); + idx = 0; + while (country_names[idx][0] != nullptr) { + country_name_map[String(country_names[idx][0])] = String::utf8(country_names[idx][1]); + idx++; + } + + // Init country renames. + country_rename_map.clear(); + idx = 0; + while (country_renames[idx][0] != nullptr) { + if (!String(country_renames[idx][1]).is_empty()) { + country_rename_map[country_renames[idx][0]] = country_renames[idx][1]; + } + idx++; + } } -String TranslationServer::get_language_code(const String &p_locale) { - ERR_FAIL_COND_V_MSG(p_locale.length() < 2, p_locale, "Invalid locale '" + p_locale + "'."); - // Most language codes are two letters, but some are three, - // so we have to look for a regional code separator ('_' or '-') - // to extract the left part. - // For example we get 'nah_MX' as input and should return 'nah'. - int split = p_locale.find("_"); - if (split == -1) { - split = p_locale.find("-"); +String TranslationServer::standardize_locale(const String &p_locale) const { + // Replaces '-' with '_' for macOS style locales. + String univ_locale = p_locale.replace("-", "_"); + + // Extract locale elements. + String lang, script, country, variant; + Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_"); + lang = locale_elements[0]; + if (locale_elements.size() >= 2) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { + script = locale_elements[1]; + } + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { + country = locale_elements[1]; + } } - if (split == -1) { // No separator, so the locale is already only a language code. - return p_locale; + if (locale_elements.size() >= 3) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { + country = locale_elements[2]; + } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) { + variant = locale_elements[2].to_lower(); + } + } + if (locale_elements.size() >= 4) { + if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang) { + variant = locale_elements[3].to_lower(); + } } - return p_locale.left(split); -} -void TranslationServer::set_locale(const String &p_locale) { - String univ_locale = standardize_locale(p_locale); + // Try extract script and variant from the extra part. + Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";"); + for (int i = 0; i < script_extra.size(); i++) { + if (script_extra[i].to_lower() == "cyrillic") { + script = "Cyrl"; + break; + } else if (script_extra[i].to_lower() == "latin") { + script = "Latn"; + break; + } else if (script_extra[i].to_lower() == "devanagari") { + script = "Deva"; + break; + } else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang) { + variant = script_extra[i].to_lower(); + } + } - if (!is_locale_valid(univ_locale)) { - String trimmed_locale = get_language_code(univ_locale); - print_verbose(vformat("Unsupported locale '%s', falling back to '%s'.", p_locale, trimmed_locale)); + // Handles known non-ISO language names used e.g. on Windows. + if (locale_rename_map.has(lang)) { + lang = locale_rename_map[lang]; + } - if (!is_locale_valid(trimmed_locale)) { - ERR_PRINT(vformat("Unsupported locale '%s', falling back to 'en'.", trimmed_locale)); - locale = "en"; - } else { - locale = trimmed_locale; + // Handle country renames. + if (country_rename_map.has(country)) { + country = country_rename_map[country]; + } + + // Remove unsupported script codes. + if (!script_map.has(script)) { + script = ""; + } + + // Add script code base on language and country codes for some ambiguous cases. + if (script.is_empty()) { + for (int i = 0; i < locale_script_info.size(); i++) { + const LocaleScriptInfo &info = locale_script_info[i]; + if (info.name == lang) { + if (country.is_empty() || info.supported_countries.has(country)) { + script = info.script; + break; + } + } } + } + if (!script.is_empty() && country.is_empty()) { + // Add conntry code based on script for some ambiguous cases. + for (int i = 0; i < locale_script_info.size(); i++) { + const LocaleScriptInfo &info = locale_script_info[i]; + if (info.name == lang && info.script == script) { + country = info.default_country; + break; + } + } + } + + // Combine results. + String locale = lang; + if (!script.is_empty()) { + locale = locale + "_" + script; + } + if (!country.is_empty()) { + locale = locale + "_" + country; + } + if (!variant.is_empty()) { + locale = locale + "_" + variant; + } + return locale; +} + +int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const { + String locale_a = standardize_locale(p_locale_a); + String locale_b = standardize_locale(p_locale_b); + + if (locale_a == locale_b) { + // Exact match. + return 10; + } + + Vector<String> locale_a_elements = locale_a.split("_"); + Vector<String> locale_b_elements = locale_b.split("_"); + if (locale_a_elements[0] == locale_b_elements[0]) { + // Matching language, both locales have extra parts. + // Return number of matching elements. + int matching_elements = 1; + for (int i = 1; i < locale_a_elements.size(); i++) { + for (int j = 1; j < locale_b_elements.size(); j++) { + if (locale_a_elements[i] == locale_b_elements[j]) { + matching_elements++; + } + } + } + return matching_elements; } else { - locale = univ_locale; + // No match. + return 0; } +} - if (OS::get_singleton()->get_main_loop()) { - OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); +String TranslationServer::get_locale_name(const String &p_locale) const { + String locale = standardize_locale(p_locale); + + String lang, script, country; + Vector<String> locale_elements = locale.split("_"); + lang = locale_elements[0]; + if (locale_elements.size() >= 2) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { + script = locale_elements[1]; + } + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { + country = locale_elements[1]; + } + } + if (locale_elements.size() >= 3) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { + country = locale_elements[2]; + } } - ResourceLoader::reload_translation_remaps(); + String name = language_map[lang]; + if (!script.is_empty()) { + name = name + " (" + script_map[script] + ")"; + } + if (!country.is_empty()) { + name = name + ", " + country_name_map[country]; + } + return name; } -String TranslationServer::get_locale() const { - return locale; +Vector<String> TranslationServer::get_all_languages() const { + Vector<String> languages; + + for (const Map<String, String>::Element *E = language_map.front(); E; E = E->next()) { + languages.push_back(E->key()); + } + + return languages; } -String TranslationServer::get_locale_name(const String &p_locale) const { - if (!locale_name_map.has(p_locale)) { - return String(); +String TranslationServer::get_language_name(const String &p_language) const { + return language_map[p_language]; +} + +Vector<String> TranslationServer::get_all_scripts() const { + Vector<String> scripts; + + for (const Map<String, String>::Element *E = script_map.front(); E; E = E->next()) { + scripts.push_back(E->key()); } - return locale_name_map[p_locale]; + + return scripts; } -Array TranslationServer::get_loaded_locales() const { - Array locales; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); - ERR_FAIL_COND_V(t.is_null(), Array()); - String l = t->get_locale(); +String TranslationServer::get_script_name(const String &p_script) const { + return script_map[p_script]; +} - locales.push_back(l); +Vector<String> TranslationServer::get_all_countries() const { + Vector<String> countries; + + for (const Map<String, String>::Element *E = country_name_map.front(); E; E = E->next()) { + countries.push_back(E->key()); } - return locales; + return countries; } -Vector<String> TranslationServer::get_all_locales() { - Vector<String> locales; +String TranslationServer::get_country_name(const String &p_country) const { + return country_name_map[p_country]; +} - const char **ptr = locale_list; +void TranslationServer::set_locale(const String &p_locale) { + locale = standardize_locale(p_locale); - while (*ptr) { - locales.push_back(*ptr); - ptr++; + if (OS::get_singleton()->get_main_loop()) { + OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); } - return locales; + ResourceLoader::reload_translation_remaps(); } -Vector<String> TranslationServer::get_all_locale_names() { - Vector<String> locales; +String TranslationServer::get_locale() const { + return locale; +} - const char **ptr = locale_names; +Array TranslationServer::get_loaded_locales() const { + Array locales; + for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { + const Ref<Translation> &t = E->get(); + ERR_FAIL_COND_V(t.is_null(), Array()); + String l = t->get_locale(); - while (*ptr) { - locales.push_back(String::utf8(*ptr)); - ptr++; + locales.push_back(l); } return locales; @@ -1134,23 +528,20 @@ void TranslationServer::remove_translation(const Ref<Translation> &p_translation Ref<Translation> TranslationServer::get_translation_object(const String &p_locale) { Ref<Translation> res; - String lang = get_language_code(p_locale); - bool near_match_found = false; + int best_score = 0; for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { const Ref<Translation> &t = E->get(); ERR_FAIL_COND_V(t.is_null(), nullptr); String l = t->get_locale(); - // Exact match. - if (l == p_locale) { - return t; - } - - // If near match found, keep that match, but keep looking to try to look for perfect match. - if (get_language_code(l) == lang && !near_match_found) { + int score = compare_locales(p_locale, l); + if (score > 0 && score >= best_score) { res = t; - near_match_found = true; + best_score = score; + if (score == 10) { + break; // Exact match, skip the rest. + } } } return res; @@ -1167,8 +558,6 @@ StringName TranslationServer::translate(const StringName &p_message, const Strin return p_message; } - ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid."); - StringName res = _get_message_from_translations(p_message, p_context, locale, false); if (!res && fallback.length() >= 2) { @@ -1190,8 +579,6 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons return p_message_plural; } - ERR_FAIL_COND_V_MSG(locale.length() < 2, p_message, "Could not translate message as configured locale '" + locale + "' is invalid."); - StringName res = _get_message_from_translations(p_message, p_context, locale, true, p_message_plural, p_n); if (!res && fallback.length() >= 2) { @@ -1209,51 +596,30 @@ StringName TranslationServer::translate_plural(const StringName &p_message, cons } StringName TranslationServer::_get_message_from_translations(const StringName &p_message, const StringName &p_context, const String &p_locale, bool plural, const String &p_message_plural, int p_n) const { - // Locale can be of the form 'll_CC', i.e. language code and regional code, - // e.g. 'en_US', 'en_GB', etc. It might also be simply 'll', e.g. 'en'. - // To find the relevant translation, we look for those with locale starting - // with the language code, and then if any is an exact match for the long - // form. If not found, we fall back to a near match (another locale with - // same language code). - - // Note: ResourceLoader::_path_remap reproduces this locale near matching - // logic, so be sure to propagate changes there when changing things here. - StringName res; - String lang = get_language_code(p_locale); - bool near_match = false; + int best_score = 0; for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { const Ref<Translation> &t = E->get(); ERR_FAIL_COND_V(t.is_null(), p_message); String l = t->get_locale(); - bool exact_match = (l == p_locale); - if (!exact_match) { - if (near_match) { - continue; // Only near-match once, but keep looking for exact matches. + int score = compare_locales(p_locale, l); + if (score > 0 && score >= best_score) { + StringName r; + if (!plural) { + r = t->get_message(p_message, p_context); + } else { + r = t->get_plural_message(p_message, p_message_plural, p_n, p_context); } - if (get_language_code(l) != lang) { - continue; // Language code does not match. + if (!r) { + continue; + } + res = r; + best_score = score; + if (score == 10) { + break; // Exact match, skip the rest. } - } - - StringName r; - if (!plural) { - r = t->get_message(p_message, p_context); - } else { - r = t->get_plural_message(p_message, p_message_plural, p_n, p_context); - } - - if (!r) { - continue; - } - res = r; - - if (exact_match) { - break; - } else { - near_match = true; } } @@ -1287,7 +653,7 @@ bool TranslationServer::_load_translations(const String &p_from) { void TranslationServer::setup() { String test = GLOBAL_DEF("internationalization/locale/test", ""); test = test.strip_edges(); - if (test != "") { + if (!test.is_empty()) { set_locale(test); } else { set_locale(OS::get_singleton()->get_locale()); @@ -1305,18 +671,7 @@ void TranslationServer::setup() { pseudolocalization_skip_placeholders_enabled = GLOBAL_DEF("internationalization/pseudolocalization/skip_placeholders", true); #ifdef TOOLS_ENABLED - { - String options = ""; - int idx = 0; - while (locale_list[idx]) { - if (idx > 0) { - options += ","; - } - options += locale_list[idx]; - idx++; - } - ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_ENUM, options)); - } + ProjectSettings::get_singleton()->set_custom_property_info("internationalization/locale/fallback", PropertyInfo(Variant::STRING, "internationalization/locale/fallback", PROPERTY_HINT_LOCALE_ID, "")); #endif } @@ -1330,8 +685,12 @@ Ref<Translation> TranslationServer::get_tool_translation() const { String TranslationServer::get_tool_locale() { #ifdef TOOLS_ENABLED - if (TranslationServer::get_singleton()->get_tool_translation().is_valid() && (Engine::get_singleton()->is_editor_hint() || Main::is_project_manager())) { - return tool_translation->get_locale(); + if (Engine::get_singleton()->is_editor_hint() || Engine::get_singleton()->is_project_manager_hint()) { + if (TranslationServer::get_singleton()->get_tool_translation().is_valid()) { + return tool_translation->get_locale(); + } else { + return "en"; + } } else { #else { @@ -1548,7 +907,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const { } const char32_t *TranslationServer::get_accented_version(char32_t p_character) const { - if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) { + if (!is_ascii_char(p_character)) { return nullptr; } @@ -1570,6 +929,19 @@ bool TranslationServer::is_placeholder(String &p_message, int p_index) const { void TranslationServer::_bind_methods() { ClassDB::bind_method(D_METHOD("set_locale", "locale"), &TranslationServer::set_locale); ClassDB::bind_method(D_METHOD("get_locale"), &TranslationServer::get_locale); + ClassDB::bind_method(D_METHOD("get_tool_locale"), &TranslationServer::get_tool_locale); + + ClassDB::bind_method(D_METHOD("compare_locales", "locale_a", "locale_b"), &TranslationServer::compare_locales); + ClassDB::bind_method(D_METHOD("standardize_locale", "locale"), &TranslationServer::standardize_locale); + + ClassDB::bind_method(D_METHOD("get_all_languages"), &TranslationServer::get_all_languages); + ClassDB::bind_method(D_METHOD("get_language_name", "language"), &TranslationServer::get_language_name); + + ClassDB::bind_method(D_METHOD("get_all_scripts"), &TranslationServer::get_all_scripts); + ClassDB::bind_method(D_METHOD("get_script_name", "script"), &TranslationServer::get_script_name); + + ClassDB::bind_method(D_METHOD("get_all_countries"), &TranslationServer::get_all_countries); + ClassDB::bind_method(D_METHOD("get_country_name", "country"), &TranslationServer::get_country_name); ClassDB::bind_method(D_METHOD("get_locale_name", "locale"), &TranslationServer::get_locale_name); @@ -1603,8 +975,5 @@ void TranslationServer::load_translations() { TranslationServer::TranslationServer() { singleton = this; - - for (int i = 0; locale_list[i]; ++i) { - locale_name_map.insert(locale_list[i], String::utf8(locale_names[i])); - } + init_locale_info(); } diff --git a/core/string/translation.h b/core/string/translation.h index 6aec0bb8ea..947ca4c6d8 100644 --- a/core/string/translation.h +++ b/core/string/translation.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -78,8 +78,6 @@ class TranslationServer : public Object { Ref<Translation> tool_translation; Ref<Translation> doc_translation; - Map<String, String> locale_name_map; - bool enabled = true; bool pseudolocalization_enabled = false; @@ -109,6 +107,23 @@ class TranslationServer : public Object { static void _bind_methods(); + struct LocaleScriptInfo { + String name; + String script; + String default_country; + Set<String> supported_countries; + }; + static Vector<LocaleScriptInfo> locale_script_info; + + static Map<String, String> language_map; + static Map<String, String> script_map; + static Map<String, String> locale_rename_map; + static Map<String, String> country_name_map; + static Map<String, String> country_rename_map; + static Map<String, String> variant_map; + + void init_locale_info(); + public: _FORCE_INLINE_ static TranslationServer *get_singleton() { return singleton; } @@ -119,6 +134,15 @@ public: String get_locale() const; Ref<Translation> get_translation_object(const String &p_locale); + Vector<String> get_all_languages() const; + String get_language_name(const String &p_language) const; + + Vector<String> get_all_scripts() const; + String get_script_name(const String &p_script) const; + + Vector<String> get_all_countries() const; + String get_country_name(const String &p_country) const; + String get_locale_name(const String &p_locale) const; Array get_loaded_locales() const; @@ -136,11 +160,9 @@ public: void set_editor_pseudolocalization(bool p_enabled); void reload_pseudolocalization(); - static Vector<String> get_all_locales(); - static Vector<String> get_all_locale_names(); - static bool is_locale_valid(const String &p_locale); - static String standardize_locale(const String &p_locale); - static String get_language_code(const String &p_locale); + String standardize_locale(const String &p_locale) const; + + int compare_locales(const String &p_locale_a, const String &p_locale_b) const; String get_tool_locale(); void set_tool_translation(const Ref<Translation> &p_translation); diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp index 1da00aa54b..1c991ee12d 100644 --- a/core/string/translation_po.cpp +++ b/core/string/translation_po.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/translation_po.h b/core/string/translation_po.h index 0e1d03d6ca..7d63af2246 100644 --- a/core/string/translation_po.h +++ b/core/string/translation_po.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/ucaps.h b/core/string/ucaps.h index b785ac7879..357d36e703 100644 --- a/core/string/ucaps.h +++ b/core/string/ucaps.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 320aae2ba4..759c121f29 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -54,34 +54,14 @@ static const int MAX_DECIMALS = 32; -static _FORCE_INLINE_ bool is_digit(char32_t c) { - return (c >= '0' && c <= '9'); -} - -static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { - return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); -} - -static _FORCE_INLINE_ bool is_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); -} - -static _FORCE_INLINE_ bool is_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); -} - static _FORCE_INLINE_ char32_t lower_case(char32_t c) { - return (is_upper_case(c) ? (c + ('a' - 'A')) : c); + return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c); } const char CharString::_null = 0; const char16_t Char16String::_null = 0; const char32_t String::_null = 0; -bool is_symbol(char32_t c) { - return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); -} - bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) { const String &s = p_s; int beg = CLAMP(p_col, 0, s.length()); @@ -123,16 +103,18 @@ bool Char16String::operator<(const Char16String &p_right) const { } Char16String &Char16String::operator+=(char16_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char16_t *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -Char16String &Char16String::operator=(const char16_t *p_cstr) { +void Char16String::operator=(const char16_t *p_cstr) { copy_from(p_cstr); - return *this; } const char16_t *Char16String::get_data() const { @@ -179,16 +161,18 @@ bool CharString::operator<(const CharString &p_right) const { } CharString &CharString::operator+=(char p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -CharString &CharString::operator=(const char *p_cstr) { +void CharString::operator=(const char *p_cstr) { copy_from(p_cstr); - return *this; } const char *CharString::get_data() const { @@ -326,11 +310,7 @@ void String::copy_from(const char *p_cstr) { return; } - int len = 0; - const char *ptr = p_cstr; - while (*(ptr++) != 0) { - len++; - } + const size_t len = strlen(p_cstr); if (len == 0) { resize(0); @@ -341,7 +321,7 @@ void String::copy_from(const char *p_cstr) { char32_t *dst = this->ptrw(); - for (int i = 0; i < len + 1; i++) { + for (size_t i = 0; i <= len; i++) { dst[i] = p_cstr[i]; } } @@ -396,13 +376,14 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) { void String::copy_from(const char32_t &p_char) { resize(2); + char32_t *dst = ptrw(); if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(0, 0xfffd); + dst[0] = 0xfffd; } else { - set(0, p_char); + dst[0] = p_char; } - set(1, 0); + dst[1] = 0; } void String::copy_from(const char32_t *p_cstr) { @@ -451,9 +432,8 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) { // p_length <= p_char strlen void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { resize(p_length + 1); - set(p_length, 0); - char32_t *dst = ptrw(); + dst[p_length] = 0; for (int i = 0; i < p_length; i++) { if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) { @@ -506,27 +486,23 @@ String operator+(char32_t p_chr, const String &p_str) { } String &String::operator+=(const String &p_str) { - if (is_empty()) { + const int lhs_len = length(); + if (lhs_len == 0) { *this = p_str; return *this; } - if (p_str.is_empty()) { + const int rhs_len = p_str.length(); + if (rhs_len == 0) { return *this; } - int from = length(); - - resize(length() + p_str.size()); + resize(lhs_len + rhs_len + 1); const char32_t *src = p_str.get_data(); - char32_t *dst = ptrw(); + char32_t *dst = ptrw() + lhs_len; - set(length(), 0); - - for (int i = 0; i < p_str.length(); i++) { - dst[from + i] = src[i]; - } + memcpy(dst, src, (rhs_len + 1) * sizeof(char32_t)); return *this; } @@ -536,22 +512,15 @@ String &String::operator+=(const char *p_str) { return *this; } - int src_len = 0; - const char *ptr = p_str; - while (*(ptr++) != 0) { - src_len++; - } - - int from = length(); + const int lhs_len = length(); + const size_t rhs_len = strlen(p_str); - resize(from + src_len + 1); + resize(lhs_len + rhs_len + 1); - char32_t *dst = ptrw(); + char32_t *dst = ptrw() + lhs_len; - set(length(), 0); - - for (int i = 0; i < src_len; i++) { - dst[from + i] = p_str[i]; + for (size_t i = 0; i <= rhs_len; i++) { + dst[i] = p_str[i]; } return *this; @@ -574,14 +543,16 @@ String &String::operator+=(const char32_t *p_str) { } String &String::operator+=(char32_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); + const int lhs_len = length(); + resize(lhs_len + 2); + char32_t *dst = ptrw(); if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(length() - 1, 0xfffd); + dst[lhs_len] = 0xfffd; } else { - set(length() - 1, p_char); + dst[lhs_len] = p_char; } + dst[lhs_len + 1] = 0; return *this; } @@ -976,21 +947,21 @@ String String::camelcase_to_underscore(bool lowercase) const { int start_index = 0; for (int i = 1; i < this->size(); i++) { - bool is_upper = is_upper_case(cstr[i]); + bool is_upper = is_ascii_upper_case(cstr[i]); bool is_number = is_digit(cstr[i]); bool are_next_2_lower = false; bool is_next_lower = false; bool is_next_number = false; - bool was_precedent_upper = is_upper_case(cstr[i - 1]); + bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]); bool was_precedent_number = is_digit(cstr[i - 1]); if (i + 2 < this->size()) { - are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]); + are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]); } if (i + 1 < this->size()) { - is_next_lower = is_lower_case(cstr[i + 1]); + is_next_lower = is_ascii_lower_case(cstr[i + 1]); is_next_number = is_digit(cstr[i + 1]); } @@ -1529,115 +1500,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { } String String::num_real(double p_num, bool p_trailing) { - if (Math::is_nan(p_num)) { - return "nan"; - } - - if (Math::is_inf(p_num)) { - if (signbit(p_num)) { - return "-inf"; + if (p_num == (double)(int64_t)p_num) { + if (p_trailing) { + return num_int64((int64_t)p_num) + ".0"; } else { - return "inf"; + return num_int64((int64_t)p_num); } } - - String s; - String sd; - - // Integer part. - - bool neg = p_num < 0; - p_num = ABS(p_num); - int64_t intn = (int64_t)p_num; - - // Decimal part. - - if (intn != p_num) { - double dec = p_num - (double)intn; - - int digit = 0; - #ifdef REAL_T_IS_DOUBLE - int decimals = 14; - double tolerance = 1e-14; + int decimals = 14; #else - int decimals = 6; - double tolerance = 1e-6; + int decimals = 6; #endif - // We want to align the digits to the above sane default, so we only - // need to subtract log10 for numbers with a positive power of ten. - if (p_num > 10) { - decimals -= (int)floor(log10(p_num)); - } - - if (decimals > MAX_DECIMALS) { - decimals = MAX_DECIMALS; - } - - // In case the value ends up ending in "99999", we want to add a - // tiny bit to the value we're checking when deciding when to stop, - // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15). - double check_multiplier = 1 + tolerance / 10; - - int64_t dec_int = 0; - int64_t dec_max = 0; - - while (true) { - dec *= 10.0; - dec_int = dec_int * 10 + (int64_t)dec % 10; - dec_max = dec_max * 10 + 9; - digit++; - - if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) { - break; - } - - if (digit == decimals) { - break; - } - } - - dec *= 10; - int last = (int64_t)dec % 10; - - if (last > 5) { - if (dec_int == dec_max) { - dec_int = 0; - intn++; - } else { - dec_int++; - } - } - - String decimal; - for (int i = 0; i < digit; i++) { - char num[2] = { 0, 0 }; - num[0] = '0' + dec_int % 10; - decimal = num + decimal; - dec_int /= 10; - } - sd = '.' + decimal; - } else if (p_trailing) { - sd = ".0"; - } else { - sd = ""; - } - - if (intn == 0) { - s = "0"; - } else { - while (intn) { - char32_t num = '0' + (intn % 10); - intn /= 10; - s = num + s; - } + // We want to align the digits to the above sane default, so we only + // need to subtract log10 for numbers with a positive power of ten. + if (p_num > 10) { + decimals -= (int)floor(log10(p_num)); } - - s = s + sd; - if (neg) { - s = "-" + s; - } - return s; + return num(p_num, decimals); } String String::num_scientific(double p_num) { @@ -1725,7 +1605,7 @@ String String::utf8(const char *p_utf8, int p_len) { } bool String::parse_utf8(const char *p_utf8, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); +#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); if (!p_utf8) { return true; @@ -1766,12 +1646,12 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } else if ((c & 0xf8) == 0xf0) { skip = 3; } else { - _UNICERROR("invalid skip at " + num_int64(cstr_size)); + UNICERROR("invalid skip at " + num_int64(cstr_size)); return true; //invalid utf8 } if (skip == 1 && (c & 0x1e) == 0) { - _UNICERROR("overlong rejected at " + num_int64(cstr_size)); + UNICERROR("overlong rejected at " + num_int64(cstr_size)); return true; //reject overlong } @@ -1786,7 +1666,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } if (skip) { - _UNICERROR("no space left"); + UNICERROR("no space left"); return true; //not enough space } } @@ -1813,17 +1693,17 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } else if ((*p_utf8 & 0xf8) == 0xf0) { len = 4; } else { - _UNICERROR("invalid len"); + UNICERROR("invalid len"); return true; //invalid UTF8 } if (len > cstr_size) { - _UNICERROR("no space left"); + UNICERROR("no space left"); return true; //not enough space } if (len == 2 && (*p_utf8 & 0x1E) == 0) { - _UNICERROR("no space left"); + UNICERROR("no space left"); return true; //reject overlong } @@ -1838,18 +1718,18 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { for (int i = 1; i < len; i++) { if ((p_utf8[i] & 0xc0) != 0x80) { - _UNICERROR("invalid utf8"); + UNICERROR("invalid utf8"); return true; //invalid utf8 } if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) { - _UNICERROR("invalid utf8 overlong"); + UNICERROR("invalid utf8 overlong"); return true; //no overlong } unichar = (unichar << 6) | (p_utf8[i] & 0x3f); } } if (unichar >= 0xd800 && unichar <= 0xdfff) { - _UNICERROR("invalid code point"); + UNICERROR("invalid code point"); return CharString(); } @@ -1859,7 +1739,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } return false; -#undef _UNICERROR +#undef UNICERROR } CharString String::utf8() const { @@ -1933,7 +1813,7 @@ String String::utf16(const char16_t *p_utf16, int p_len) { } bool String::parse_utf16(const char16_t *p_utf16, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); +#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); if (!p_utf16) { return true; @@ -1973,7 +1853,7 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { if ((c & 0xfffffc00) == 0xd800) { skip = 1; // lead surrogate } else if ((c & 0xfffffc00) == 0xdc00) { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); + UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); return true; // invalid UTF16 } else { skip = 0; @@ -1983,7 +1863,7 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate --skip; } else { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); + UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); return true; // invalid UTF16 } } @@ -1993,7 +1873,7 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } if (skip) { - _UNICERROR("no space left"); + UNICERROR("no space left"); return true; // not enough space } } @@ -2018,7 +1898,7 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } if (len > cstr_size) { - _UNICERROR("no space left"); + UNICERROR("no space left"); return true; //not enough space } @@ -2036,7 +1916,7 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } return false; -#undef _UNICERROR +#undef UNICERROR } Char16String String::utf16() const { @@ -2305,7 +2185,7 @@ bool String::is_numeric() const { return false; } dot = true; - } else if (c < '0' || c > '9') { + } else if (!is_digit(c)) { return false; } } @@ -3173,7 +3053,7 @@ bool String::is_subsequence_of(const String &p_string) const { return _base_is_subsequence_of(p_string, false); } -bool String::is_subsequence_ofi(const String &p_string) const { +bool String::is_subsequence_ofn(const String &p_string) const { return _base_is_subsequence_of(p_string, true); } @@ -3509,6 +3389,27 @@ char32_t String::unicode_at(int p_idx) const { return operator[](p_idx); } +String String::indent(const String &p_prefix) const { + String new_string; + int line_start = 0; + + for (int i = 0; i < length(); i++) { + const char32_t c = operator[](i); + if (c == '\n') { + if (i == line_start) { + new_string += c; // Leave empty lines empty. + } else { + new_string += p_prefix + substr(line_start, i - line_start + 1); + } + line_start = i + 1; + } + } + if (line_start != length()) { + new_string += p_prefix + substr(line_start); + } + return new_string; +} + String String::dedent() const { String new_string; String indent; @@ -3630,6 +3531,10 @@ String String::rstrip(const String &p_chars) const { return substr(0, end + 1); } +bool String::is_network_share_path() const { + return begins_with("//") || begins_with("\\\\"); +} + String String::simplify_path() const { String s = *this; String drive; @@ -3642,6 +3547,9 @@ String String::simplify_path() const { } else if (s.begins_with("user://")) { drive = "user://"; s = s.substr(7, s.length()); + } else if (is_network_share_path()) { + drive = s.substr(0, 2); + s = s.substr(2, s.length() - 2); } else if (s.begins_with("/") || s.begins_with("\\")) { drive = s.substr(0, 1); s = s.substr(1, s.length() - 1); @@ -3756,7 +3664,7 @@ bool String::is_valid_identifier() const { } } - bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_'; + bool valid_char = is_ascii_identifier_char(str[i]); if (!valid_char) { return false; @@ -3781,7 +3689,7 @@ String String::uri_encode() const { String res; for (int i = 0; i < temp.length(); ++i) { char ord = temp[i]; - if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) { + if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) { res += ord; } else { char h_Val[3]; @@ -3803,9 +3711,9 @@ String String::uri_decode() const { for (int i = 0; i < src.length(); ++i) { if (src[i] == '%' && i + 2 < src.length()) { char ord1 = src[i + 1]; - if (is_digit(ord1) || is_upper_case(ord1)) { + if (is_digit(ord1) || is_ascii_upper_case(ord1)) { char ord2 = src[i + 2]; - if (is_digit(ord2) || is_upper_case(ord2)) { + if (is_digit(ord2) || is_ascii_upper_case(ord2)) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; @@ -3932,7 +3840,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch for (int i = 2; i < p_src_len; i++) { eat = i + 1; char32_t ct = p_src[i]; - if (ct == ';' || ct < '0' || ct > '9') { + if (ct == ';' || !is_digit(ct)) { break; } } @@ -4062,7 +3970,7 @@ String String::pad_zeros(int p_digits) const { int begin = 0; - while (begin < end && (s[begin] < '0' || s[begin] > '9')) { + while (begin < end && !is_digit(s[begin])) { begin++; } @@ -4107,7 +4015,7 @@ bool String::is_valid_int() const { } for (int i = from; i < len; i++) { - if (operator[](i) < '0' || operator[](i) > '9') { + if (!is_digit(operator[](i))) { return false; // no start with number plz } } @@ -4285,7 +4193,7 @@ bool String::is_valid_filename() const { return false; } - if (stripped == String()) { + if (stripped.is_empty()) { return false; } @@ -4343,13 +4251,13 @@ bool String::is_relative_path() const { String String::get_base_dir() const { int end = 0; - // url scheme style base + // URL scheme style base. int basepos = find("://"); if (basepos != -1) { end = basepos + 3; } - // windows top level directory base + // Windows top level directory base. if (end == 0) { basepos = find(":/"); if (basepos == -1) { @@ -4360,7 +4268,24 @@ String String::get_base_dir() const { } } - // unix root directory base + // Windows UNC network share path. + if (end == 0) { + if (is_network_share_path()) { + basepos = find("/", 2); + if (basepos == -1) { + basepos = find("\\", 2); + } + int servpos = find("/", basepos + 1); + if (servpos == -1) { + servpos = find("\\", basepos + 1); + } + if (servpos != -1) { + end = servpos + 1; + } + } + } + + // Unix root directory base. if (end == 0) { if (begins_with("/")) { end = 1; @@ -4904,7 +4829,7 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St String RTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context); - if (rtr == String() || rtr == p_text) { + if (rtr.is_empty() || rtr == p_text) { return TranslationServer::get_singleton()->translate(p_text, p_context); } else { return rtr; @@ -4917,7 +4842,7 @@ String RTR(const String &p_text, const String &p_context) { String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); - if (rtr == String() || rtr == p_text || rtr == p_text_plural) { + if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) { return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context); } else { return rtr; diff --git a/core/string/ustring.h b/core/string/ustring.h index f25c36f66c..1d302b65a7 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -32,6 +32,7 @@ #define USTRING_GODOT_H // Note: Renamed to avoid conflict with ICU header with the same name. +#include "core/string/char_utils.h" #include "core/templates/cowdata.h" #include "core/templates/vector.h" #include "core/typedefs.h" @@ -108,13 +109,10 @@ public: _FORCE_INLINE_ Char16String() {} _FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); } - _FORCE_INLINE_ Char16String &operator=(const Char16String &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); } _FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); } - Char16String &operator=(const char16_t *p_cstr); + void operator=(const char16_t *p_cstr); bool operator<(const Char16String &p_right) const; Char16String &operator+=(char16_t p_char); int length() const { return size() ? size() - 1 : 0; } @@ -152,13 +150,10 @@ public: _FORCE_INLINE_ CharString() {} _FORCE_INLINE_ CharString(const CharString &p_str) { _cowdata._ref(p_str._cowdata); } - _FORCE_INLINE_ CharString &operator=(const CharString &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const CharString &p_str) { _cowdata._ref(p_str._cowdata); } _FORCE_INLINE_ CharString(const char *p_cstr) { copy_from(p_cstr); } - CharString &operator=(const char *p_cstr); + void operator=(const char *p_cstr); bool operator<(const CharString &p_right) const; CharString &operator+=(char p_char); int length() const { return size() ? size() - 1 : 0; } @@ -291,7 +286,7 @@ public: bool ends_with(const String &p_string) const; bool is_enclosed_in(const String &p_string) const; bool is_subsequence_of(const String &p_string) const; - bool is_subsequence_ofi(const String &p_string) const; + bool is_subsequence_ofn(const String &p_string) const; bool is_quoted() const; Vector<String> bigrams() const; float similarity(const String &p_string) const; @@ -362,6 +357,7 @@ public: String left(int p_pos) const; String right(int p_pos) const; + String indent(const String &p_prefix) const; String dedent() const; String strip_edges(bool left = true, bool right = true) const; String strip_escapes() const; @@ -399,6 +395,8 @@ public: Vector<uint8_t> sha256_buffer() const; _FORCE_INLINE_ bool is_empty() const { return length() == 0; } + _FORCE_INLINE_ bool contains(const char *p_str) const { return find(p_str) != -1; } + _FORCE_INLINE_ bool contains(const String &p_str) const { return find(p_str) != -1; } // path functions bool is_absolute_path() const; @@ -410,6 +408,7 @@ public: String get_file() const; static String humanize_size(uint64_t p_size); String simplify_path() const; + bool is_network_share_path() const; String xml_escape(bool p_escape_quotes = false) const; String xml_unescape() const; @@ -442,11 +441,7 @@ public: _FORCE_INLINE_ String() {} _FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); } - - String &operator=(const String &p_str) { - _cowdata._ref(p_str._cowdata); - return *this; - } + _FORCE_INLINE_ void operator=(const String &p_str) { _cowdata._ref(p_str._cowdata); } Vector<uint8_t> to_ascii_buffer() const; Vector<uint8_t> to_utf8_buffer() const; @@ -539,7 +534,6 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St String RTR(const String &p_text, const String &p_context = ""); String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = ""); -bool is_symbol(char32_t c); bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end); _FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) { |