diff options
Diffstat (limited to 'core/string')
-rw-r--r-- | core/string/node_path.cpp | 15 | ||||
-rw-r--r-- | core/string/node_path.h | 2 | ||||
-rw-r--r-- | core/string/optimized_translation.cpp | 10 | ||||
-rw-r--r-- | core/string/print_string.cpp | 95 | ||||
-rw-r--r-- | core/string/print_string.h | 12 | ||||
-rw-r--r-- | core/string/translation.cpp | 40 | ||||
-rw-r--r-- | core/string/translation.h | 18 | ||||
-rw-r--r-- | core/string/translation_po.cpp | 40 | ||||
-rw-r--r-- | core/string/ustring.cpp | 575 | ||||
-rw-r--r-- | core/string/ustring.h | 25 |
10 files changed, 561 insertions, 271 deletions
diff --git a/core/string/node_path.cpp b/core/string/node_path.cpp index 238897c2b1..30fa434fad 100644 --- a/core/string/node_path.cpp +++ b/core/string/node_path.cpp @@ -199,6 +199,21 @@ Vector<StringName> NodePath::get_subnames() const { return Vector<StringName>(); } +StringName NodePath::get_concatenated_names() const { + ERR_FAIL_COND_V(!data, StringName()); + + if (!data->concatenated_path) { + int pc = data->path.size(); + String concatenated; + const StringName *sn = data->path.ptr(); + for (int i = 0; i < pc; i++) { + concatenated += i == 0 ? sn[i].operator String() : "/" + sn[i]; + } + data->concatenated_path = concatenated; + } + return data->concatenated_path; +} + StringName NodePath::get_concatenated_subnames() const { ERR_FAIL_COND_V(!data, StringName()); diff --git a/core/string/node_path.h b/core/string/node_path.h index 53976bd524..2bce33e21e 100644 --- a/core/string/node_path.h +++ b/core/string/node_path.h @@ -39,6 +39,7 @@ class NodePath { SafeRefCount refcount; Vector<StringName> path; Vector<StringName> subpath; + StringName concatenated_path; StringName concatenated_subpath; bool absolute; bool has_slashes; @@ -59,6 +60,7 @@ public: StringName get_subname(int p_idx) const; Vector<StringName> get_names() const; Vector<StringName> get_subnames() const; + StringName get_concatenated_names() const; StringName get_concatenated_subnames() const; NodePath rel_path_to(const NodePath &p_np) const; diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp index 07b58f2418..07302cc8c3 100644 --- a/core/string/optimized_translation.cpp +++ b/core/string/optimized_translation.cpp @@ -37,9 +37,9 @@ extern "C" { } struct CompressedString { - int orig_len; + int orig_len = 0; CharString compressed; - int offset; + int offset = 0; }; void OptimizedTranslation::generate(const Ref<Translation> &p_from) { @@ -53,7 +53,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { int size = Math::larger_prime(keys.size()); Vector<Vector<Pair<int, CharString>>> buckets; - Vector<Map<uint32_t, int>> table; + Vector<HashMap<uint32_t, int>> table; Vector<uint32_t> hfunc_table; Vector<CompressedString> compressed; @@ -108,7 +108,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { for (int i = 0; i < size; i++) { const Vector<Pair<int, CharString>> &b = buckets[i]; - Map<uint32_t, int> &t = table.write[i]; + HashMap<uint32_t, int> &t = table.write[i]; if (b.size() == 0) { continue; @@ -147,7 +147,7 @@ void OptimizedTranslation::generate(const Ref<Translation> &p_from) { int btindex = 0; for (int i = 0; i < size; i++) { - const Map<uint32_t, int> &t = table[i]; + const HashMap<uint32_t, int> &t = table[i]; if (t.size() == 0) { htw[i] = 0xFFFFFFFF; //nothing continue; diff --git a/core/string/print_string.cpp b/core/string/print_string.cpp index 919c9e08e3..f58486e0a5 100644 --- a/core/string/print_string.cpp +++ b/core/string/print_string.cpp @@ -79,7 +79,98 @@ void __print_line(String p_string) { _global_lock(); PrintHandlerList *l = print_handler_list; while (l) { - l->printfunc(l->userdata, p_string, false); + l->printfunc(l->userdata, p_string, false, false); + l = l->next; + } + + _global_unlock(); +} + +void __print_line_rich(String p_string) { + if (!_print_line_enabled) { + return; + } + + // Convert a subset of BBCode tags to ANSI escape codes for correct display in the terminal. + // Support of those ANSI escape codes varies across terminal emulators, + // especially for italic and strikethrough. + String p_string_ansi = p_string; + + p_string_ansi = p_string_ansi.replace("[b]", "\u001b[1m"); + p_string_ansi = p_string_ansi.replace("[/b]", "\u001b[22m"); + p_string_ansi = p_string_ansi.replace("[i]", "\u001b[3m"); + p_string_ansi = p_string_ansi.replace("[/i]", "\u001b[23m"); + p_string_ansi = p_string_ansi.replace("[u]", "\u001b[4m"); + p_string_ansi = p_string_ansi.replace("[/u]", "\u001b[24m"); + p_string_ansi = p_string_ansi.replace("[s]", "\u001b[9m"); + p_string_ansi = p_string_ansi.replace("[/s]", "\u001b[29m"); + + p_string_ansi = p_string_ansi.replace("[indent]", " "); + p_string_ansi = p_string_ansi.replace("[/indent]", ""); + p_string_ansi = p_string_ansi.replace("[code]", "\u001b[2m"); + p_string_ansi = p_string_ansi.replace("[/code]", "\u001b[22m"); + p_string_ansi = p_string_ansi.replace("[url]", ""); + p_string_ansi = p_string_ansi.replace("[/url]", ""); + p_string_ansi = p_string_ansi.replace("[center]", "\n\t\t\t"); + p_string_ansi = p_string_ansi.replace("[/center]", ""); + p_string_ansi = p_string_ansi.replace("[right]", "\n\t\t\t\t\t\t"); + p_string_ansi = p_string_ansi.replace("[/right]", ""); + + if (p_string_ansi.contains("[color")) { + p_string_ansi = p_string_ansi.replace("[color=black]", "\u001b[30m"); + p_string_ansi = p_string_ansi.replace("[color=red]", "\u001b[91m"); + p_string_ansi = p_string_ansi.replace("[color=green]", "\u001b[92m"); + p_string_ansi = p_string_ansi.replace("[color=lime]", "\u001b[92m"); + p_string_ansi = p_string_ansi.replace("[color=yellow]", "\u001b[93m"); + p_string_ansi = p_string_ansi.replace("[color=blue]", "\u001b[94m"); + p_string_ansi = p_string_ansi.replace("[color=magenta]", "\u001b[95m"); + p_string_ansi = p_string_ansi.replace("[color=pink]", "\u001b[38;5;218m"); + p_string_ansi = p_string_ansi.replace("[color=purple]", "\u001b[38;5;98m"); + p_string_ansi = p_string_ansi.replace("[color=cyan]", "\u001b[96m"); + p_string_ansi = p_string_ansi.replace("[color=white]", "\u001b[97m"); + p_string_ansi = p_string_ansi.replace("[color=orange]", "\u001b[38;5;208m"); + p_string_ansi = p_string_ansi.replace("[color=gray]", "\u001b[90m"); + p_string_ansi = p_string_ansi.replace("[/color]", "\u001b[39m"); + } + if (p_string_ansi.contains("[bgcolor")) { + p_string_ansi = p_string_ansi.replace("[bgcolor=black]", "\u001b[40m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=red]", "\u001b[101m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=green]", "\u001b[102m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=lime]", "\u001b[102m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=yellow]", "\u001b[103m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=blue]", "\u001b[104m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=magenta]", "\u001b[105m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=pink]", "\u001b[48;5;218m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=purple]", "\u001b[48;5;98m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=cyan]", "\u001b[106m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=white]", "\u001b[107m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=orange]", "\u001b[48;5;208m"); + p_string_ansi = p_string_ansi.replace("[bgcolor=gray]", "\u001b[100m"); + p_string_ansi = p_string_ansi.replace("[/bgcolor]", "\u001b[49m"); + } + if (p_string_ansi.contains("[fgcolor")) { + p_string_ansi = p_string_ansi.replace("[fgcolor=black]", "\u001b[30;40m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=red]", "\u001b[91;101m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=green]", "\u001b[92;102m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=lime]", "\u001b[92;102m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=yellow]", "\u001b[93;103m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=blue]", "\u001b[94;104m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=magenta]", "\u001b[95;105m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=pink]", "\u001b[38;5;218;48;5;218m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=purple]", "\u001b[38;5;98;48;5;98m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=cyan]", "\u001b[96;106m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=white]", "\u001b[97;107m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=orange]", "\u001b[38;5;208;48;5;208m"); + p_string_ansi = p_string_ansi.replace("[fgcolor=gray]", "\u001b[90;100m"); + p_string_ansi = p_string_ansi.replace("[/fgcolor]", "\u001b[39;49m"); + } + + OS::get_singleton()->print_rich("%s\n", p_string_ansi.utf8().get_data()); + + _global_lock(); + PrintHandlerList *l = print_handler_list; + while (l) { + l->printfunc(l->userdata, p_string, false, true); l = l->next; } @@ -96,7 +187,7 @@ void print_error(String p_string) { _global_lock(); PrintHandlerList *l = print_handler_list; while (l) { - l->printfunc(l->userdata, p_string, true); + l->printfunc(l->userdata, p_string, true, false); l = l->next; } diff --git a/core/string/print_string.h b/core/string/print_string.h index f7d0f25030..823e2c29e8 100644 --- a/core/string/print_string.h +++ b/core/string/print_string.h @@ -35,7 +35,7 @@ extern void (*_print_func)(String); -typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error); +typedef void (*PrintHandlerFunc)(void *, const String &p_string, bool p_error, bool p_rich); struct PrintHandlerList { PrintHandlerFunc printfunc = nullptr; @@ -59,6 +59,7 @@ void remove_print_handler(const PrintHandlerList *p_handler); extern bool _print_line_enabled; extern bool _print_error_enabled; extern void __print_line(String p_string); +extern void __print_line_rich(String p_string); extern void print_error(String p_string); extern void print_verbose(String p_string); @@ -66,9 +67,18 @@ inline void print_line(Variant v) { __print_line(stringify_variants(v)); } +inline void print_line_rich(Variant v) { + __print_line_rich(stringify_variants(v)); +} + template <typename... Args> void print_line(Variant p_var, Args... p_args) { __print_line(stringify_variants(p_var, p_args...)); } +template <typename... Args> +void print_line_rich(Variant p_var, Args... p_args) { + __print_line_rich(stringify_variants(p_var, p_args...)); +} + #endif // PRINT_STRING_H diff --git a/core/string/translation.cpp b/core/string/translation.cpp index d6d361b5f1..cba2f09022 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -95,12 +95,12 @@ StringName Translation::get_message(const StringName &p_src_text, const StringNa WARN_PRINT("Translation class doesn't handle context. Using context in get_message() on a Translation instance is probably a mistake. \nUse a derived Translation class that handles context, such as TranslationPO class"); } - const Map<StringName, StringName>::Element *E = translation_map.find(p_src_text); + HashMap<StringName, StringName>::ConstIterator E = translation_map.find(p_src_text); if (!E) { return StringName(); } - return E->get(); + return E->value; } StringName Translation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { @@ -215,12 +215,12 @@ static _character_accent_pair _character_to_accented[] = { Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info; -Map<String, String> TranslationServer::language_map; -Map<String, String> TranslationServer::script_map; -Map<String, String> TranslationServer::locale_rename_map; -Map<String, String> TranslationServer::country_name_map; -Map<String, String> TranslationServer::variant_map; -Map<String, String> TranslationServer::country_rename_map; +HashMap<String, String> TranslationServer::language_map; +HashMap<String, String> TranslationServer::script_map; +HashMap<String, String> TranslationServer::locale_rename_map; +HashMap<String, String> TranslationServer::country_name_map; +HashMap<String, String> TranslationServer::variant_map; +HashMap<String, String> TranslationServer::country_rename_map; void TranslationServer::init_locale_info() { // Init locale info. @@ -452,8 +452,8 @@ String TranslationServer::get_locale_name(const String &p_locale) const { Vector<String> TranslationServer::get_all_languages() const { Vector<String> languages; - for (const Map<String, String>::Element *E = language_map.front(); E; E = E->next()) { - languages.push_back(E->key()); + for (const KeyValue<String, String> &E : language_map) { + languages.push_back(E.key); } return languages; @@ -466,8 +466,8 @@ String TranslationServer::get_language_name(const String &p_language) const { Vector<String> TranslationServer::get_all_scripts() const { Vector<String> scripts; - for (const Map<String, String>::Element *E = script_map.front(); E; E = E->next()) { - scripts.push_back(E->key()); + for (const KeyValue<String, String> &E : script_map) { + scripts.push_back(E.key); } return scripts; @@ -480,8 +480,8 @@ String TranslationServer::get_script_name(const String &p_script) const { Vector<String> TranslationServer::get_all_countries() const { Vector<String> countries; - for (const Map<String, String>::Element *E = country_name_map.front(); E; E = E->next()) { - countries.push_back(E->key()); + for (const KeyValue<String, String> &E : country_name_map) { + countries.push_back(E.key); } return countries; @@ -507,8 +507,8 @@ String TranslationServer::get_locale() const { Array TranslationServer::get_loaded_locales() const { Array locales; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; ERR_FAIL_COND_V(t.is_null(), Array()); String l = t->get_locale(); @@ -530,8 +530,8 @@ Ref<Translation> TranslationServer::get_translation_object(const String &p_local Ref<Translation> res; int best_score = 0; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; ERR_FAIL_COND_V(t.is_null(), nullptr); String l = t->get_locale(); @@ -599,8 +599,8 @@ StringName TranslationServer::_get_message_from_translations(const StringName &p StringName res; int best_score = 0; - for (const Set<Ref<Translation>>::Element *E = translations.front(); E; E = E->next()) { - const Ref<Translation> &t = E->get(); + for (const Ref<Translation> &E : translations) { + const Ref<Translation> &t = E; ERR_FAIL_COND_V(t.is_null(), p_message); String l = t->get_locale(); diff --git a/core/string/translation.h b/core/string/translation.h index ded6ed5925..20c6ebd5a5 100644 --- a/core/string/translation.h +++ b/core/string/translation.h @@ -41,7 +41,7 @@ class Translation : public Resource { RES_BASE_EXTENSION("translation"); String locale = "en"; - Map<StringName, StringName> translation_map; + HashMap<StringName, StringName> translation_map; virtual Vector<String> _get_message_list() const; virtual Dictionary _get_messages() const; @@ -74,7 +74,7 @@ class TranslationServer : public Object { String locale = "en"; String fallback; - Set<Ref<Translation>> translations; + HashSet<Ref<Translation>> translations; Ref<Translation> tool_translation; Ref<Translation> doc_translation; @@ -111,16 +111,16 @@ class TranslationServer : public Object { String name; String script; String default_country; - Set<String> supported_countries; + HashSet<String> supported_countries; }; static Vector<LocaleScriptInfo> locale_script_info; - static Map<String, String> language_map; - static Map<String, String> script_map; - static Map<String, String> locale_rename_map; - static Map<String, String> country_name_map; - static Map<String, String> country_rename_map; - static Map<String, String> variant_map; + static HashMap<String, String> language_map; + static HashMap<String, String> script_map; + static HashMap<String, String> locale_rename_map; + static HashMap<String, String> country_name_map; + static HashMap<String, String> country_rename_map; + static HashMap<String, String> variant_map; void init_locale_info(); diff --git a/core/string/translation_po.cpp b/core/string/translation_po.cpp index 3f94e064ec..fa656b634d 100644 --- a/core/string/translation_po.cpp +++ b/core/string/translation_po.cpp @@ -70,21 +70,14 @@ Dictionary TranslationPO::_get_messages() const { Dictionary d; - List<StringName> context_l; - translation_map.get_key_list(&context_l); - for (const StringName &ctx : context_l) { - const HashMap<StringName, Vector<StringName>> &id_str_map = translation_map[ctx]; - + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { Dictionary d2; - List<StringName> id_l; - id_str_map.get_key_list(&id_l); - // Save list of id and strs associated with a context in a temporary dictionary. - for (List<StringName>::Element *E2 = id_l.front(); E2; E2 = E2->next()) { - StringName id = E2->get(); - d2[id] = id_str_map[id]; + + for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) { + d2[E2.key] = E2.value; } - d[ctx] = d2; + d[E.key] = d2; } return d; @@ -274,31 +267,24 @@ void TranslationPO::get_message_list(List<StringName> *r_messages) const { // OptimizedTranslation uses this function to get the list of msgid. // Return all the keys of translation_map under "" context. - List<StringName> context_l; - translation_map.get_key_list(&context_l); - - for (const StringName &E : context_l) { - if (String(E) != "") { + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { + if (E.key != StringName()) { continue; } - List<StringName> msgid_l; - translation_map[E].get_key_list(&msgid_l); - - for (List<StringName>::Element *E2 = msgid_l.front(); E2; E2 = E2->next()) { - r_messages->push_back(E2->get()); + for (const KeyValue<StringName, Vector<StringName>> &E2 : E.value) { + r_messages->push_back(E2.key); } } } int TranslationPO::get_message_count() const { - List<StringName> context_l; - translation_map.get_key_list(&context_l); - int count = 0; - for (const StringName &E : context_l) { - count += translation_map[E].size(); + + for (const KeyValue<StringName, HashMap<StringName, Vector<StringName>>> &E : translation_map) { + count += E.value.size(); } + return count; } diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index a2b1e4c428..beefe54faf 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -323,7 +323,13 @@ void String::copy_from(const char *p_cstr) { char32_t *dst = this->ptrw(); for (size_t i = 0; i <= len; i++) { - dst[i] = p_cstr[i]; + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0 && i < len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } } @@ -350,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) { char32_t *dst = this->ptrw(); for (int i = 0; i < len; i++) { - dst[i] = p_cstr[i]; + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } dst[len] = 0; } @@ -376,14 +388,21 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) { } void String::copy_from(const char32_t &p_char) { + if (p_char == 0) { + print_unicode_error("NUL character", true); + return; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); + } + resize(2); + char32_t *dst = ptrw(); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - dst[0] = 0xfffd; - } else { - dst[0] = p_char; - } + dst[0] = p_char; dst[1] = 0; } @@ -437,12 +456,18 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { dst[p_length] = 0; for (int i = 0; i < p_length; i++) { - if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + "."); - dst[i] = 0xfffd; - } else { - dst[i] = p_char[i]; + if (p_char[i] == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + continue; } + if ((p_char[i] & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i])); + } + if (p_char[i] > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i])); + } + dst[i] = p_char[i]; } } @@ -481,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) { // wchar_t is 16-bit String tmp = String::utf16((const char16_t *)p_chr); #else - // wchar_t is 32-bi + // wchar_t is 32-bit String tmp = (const char32_t *)p_chr; #endif tmp += p_str; @@ -527,7 +552,13 @@ String &String::operator+=(const char *p_str) { char32_t *dst = ptrw() + lhs_len; for (size_t i = 0; i <= rhs_len; i++) { - dst[i] = p_str[i]; + uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]); + if (c == 0 && i < rhs_len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } return *this; @@ -550,15 +581,21 @@ String &String::operator+=(const char32_t *p_str) { } String &String::operator+=(char32_t p_char) { + if (p_char == 0) { + print_unicode_error("NUL character", true); + return *this; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); + } + const int lhs_len = length(); resize(lhs_len + 2); char32_t *dst = ptrw(); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - dst[lhs_len] = 0xfffd; - } else { - dst[lhs_len] = p_char; - } + dst[lhs_len] = p_char; dst[lhs_len + 1] = 0; return *this; @@ -1583,6 +1620,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { return ret; } +void String::print_unicode_error(const String &p_message, bool p_critical) const { + if (p_critical) { + print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message)); + } else { + print_error(vformat("Unicode parsing error: %s", p_message)); + } +} + CharString String::ascii(bool p_allow_extended) const { if (!length()) { return CharString(); @@ -1596,7 +1641,7 @@ CharString String::ascii(bool p_allow_extended) const { if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { cs[i] = c; } else { - print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character."); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c)); cs[i] = 0x20; } } @@ -1611,11 +1656,9 @@ String String::utf8(const char *p_utf8, int p_len) { return ret; } -bool String::parse_utf8(const char *p_utf8, int p_len) { -#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); - +Error String::parse_utf8(const char *p_utf8, int p_len) { if (!p_utf8) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1635,14 +1678,17 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } } + bool decode_error = false; + bool decode_failed = false; { const char *ptrtmp = p_utf8; const char *ptrtmp_limit = &p_utf8[p_len]; int skip = 0; + uint8_t c_start = 0; while (ptrtmp != ptrtmp_limit && *ptrtmp) { - if (skip == 0) { - uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + if (skip == 0) { /* Determine the number of characters in sequence */ if ((c & 0x80) == 0) { skip = 0; @@ -1652,20 +1698,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { skip = 2; } else if ((c & 0xf8) == 0xf0) { skip = 3; + } else if ((c & 0xfc) == 0xf8) { + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + skip = 5; } else { - UNICERROR("invalid skip at " + num_int64(cstr_size)); - return true; //invalid utf8 + skip = 0; + print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true); + decode_failed = true; } + c_start = c; if (skip == 1 && (c & 0x1e) == 0) { - UNICERROR("overlong rejected at " + num_int64(cstr_size)); - return true; //reject overlong + print_unicode_error(vformat("Overlong encoding (%x ...)", c)); + decode_error = true; } - str_size++; - } else { - --skip; + if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) { + print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c)); + decode_error = true; + } + if (c < 0x80 || c > 0xbf) { + print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true); + decode_failed = true; + skip = 0; + } else { + --skip; + } } cstr_size++; @@ -1673,80 +1733,91 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } if (skip) { - UNICERROR("no space left"); - return true; //not enough space + print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true); + decode_failed = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + int skip = 0; + uint32_t unichar = 0; while (cstr_size) { - int len = 0; - - /* Determine the number of characters in sequence */ - if ((*p_utf8 & 0x80) == 0) { - len = 1; - } else if ((*p_utf8 & 0xe0) == 0xc0) { - len = 2; - } else if ((*p_utf8 & 0xf0) == 0xe0) { - len = 3; - } else if ((*p_utf8 & 0xf8) == 0xf0) { - len = 4; - } else { - UNICERROR("invalid len"); - return true; //invalid UTF8 - } - - if (len > cstr_size) { - UNICERROR("no space left"); - return true; //not enough space - } - - if (len == 2 && (*p_utf8 & 0x1E) == 0) { - UNICERROR("no space left"); - return true; //reject overlong - } - - /* Convert the first character */ - - uint32_t unichar = 0; - - if (len == 1) { - unichar = *p_utf8; + uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8); + + if (skip == 0) { + /* Determine the number of characters in sequence */ + if ((c & 0x80) == 0) { + *(dst++) = c; + unichar = 0; + skip = 0; + } else if ((c & 0xe0) == 0xc0) { + unichar = (0xff >> 3) & c; + skip = 1; + } else if ((c & 0xf0) == 0xe0) { + unichar = (0xff >> 4) & c; + skip = 2; + } else if ((c & 0xf8) == 0xf0) { + unichar = (0xff >> 5) & c; + skip = 3; + } else if ((c & 0xfc) == 0xf8) { + unichar = (0xff >> 6) & c; + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + unichar = (0xff >> 7) & c; + skip = 5; + } else { + *(dst++) = 0x20; + unichar = 0; + skip = 0; + } } else { - unichar = (0xff >> (len + 1)) & *p_utf8; - - for (int i = 1; i < len; i++) { - if ((p_utf8[i] & 0xc0) != 0x80) { - UNICERROR("invalid utf8"); - return true; //invalid utf8 - } - if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) { - UNICERROR("invalid utf8 overlong"); - return true; //no overlong + if (c < 0x80 || c > 0xbf) { + *(dst++) = 0x20; + skip = 0; + } else { + unichar = (unichar << 6) | (c & 0x3f); + --skip; + if (skip == 0) { + if (unichar == 0) { + print_unicode_error("NUL character", true); + decode_failed = true; + unichar = 0x20; + } + if ((unichar & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", unichar)); + decode_error = true; + } + if (unichar > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar)); + decode_error = true; + } + *(dst++) = unichar; } - unichar = (unichar << 6) | (p_utf8[i] & 0x3f); } } - if (unichar >= 0xd800 && unichar <= 0xdfff) { - UNICERROR("invalid code point"); - return CharString(); - } - *(dst++) = unichar; - cstr_size -= len; - p_utf8 += len; + cstr_size--; + p_utf8++; + } + if (skip) { + *(dst++) = 0x20; } - return false; -#undef UNICERROR + if (decode_failed) { + return ERR_INVALID_DATA; + } else if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } CharString String::utf8() const { @@ -1765,15 +1836,17 @@ CharString String::utf8() const { fl += 2; } else if (c <= 0xffff) { // 16 bits fl += 3; - } else if (c <= 0x0010ffff) { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits fl += 4; + } else if (c <= 0x03ffffff) { // 26 bits + fl += 5; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); + } else if (c <= 0x7fffffff) { // 31 bits + fl += 6; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); + fl += 1; + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true); } } @@ -1799,11 +1872,26 @@ CharString String::utf8() const { APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x03ffffff) { // 26 bits + APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x7fffffff) { // 31 bits + APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit. + APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -1819,11 +1907,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) { return ret; } -bool String::parse_utf16(const char16_t *p_utf16, int p_len) { -#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); - +Error String::parse_utf16(const char16_t *p_utf16, int p_len) { if (!p_utf16) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1850,80 +1936,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } } + bool decode_error = false; { const char16_t *ptrtmp = p_utf16; const char16_t *ptrtmp_limit = &p_utf16[p_len]; - int skip = 0; + uint32_t c_prev = 0; + bool skip = false; while (ptrtmp != ptrtmp_limit && *ptrtmp) { uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp; - if (skip == 0) { - if ((c & 0xfffffc00) == 0xd800) { - skip = 1; // lead surrogate - } else if ((c & 0xfffffc00) == 0xdc00) { - UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 - } else { - skip = 0; + + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c)); + decode_error = true; } - str_size++; - } else { - if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate - --skip; + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + str_size--; } else { - UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 + print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c)); + decode_error = true; } + skip = false; + } else { + skip = false; } + c_prev = c; + str_size++; cstr_size++; ptrtmp++; } if (skip) { - UNICERROR("no space left"); - return true; // not enough space + print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev)); + decode_error = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + bool skip = false; + uint32_t c_prev = 0; while (cstr_size) { - int len = 0; uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16; - if ((c & 0xfffffc00) == 0xd800) { - len = 2; + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + *(dst++) = c_prev; // unpaired, store as is + } + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair + } else { + *(dst++) = c; // unpaired, store as is + } + skip = false; } else { - len = 1; + *(dst++) = c; + skip = false; } - if (len > cstr_size) { - UNICERROR("no space left"); - return true; //not enough space - } - - uint32_t unichar = 0; - if (len == 1) { - unichar = c; - } else { - uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1]; - unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000); - } + cstr_size--; + p_utf16++; + c_prev = c; + } - *(dst++) = unichar; - cstr_size -= len; - p_utf16 += len; + if (skip) { + *(dst++) = c_prev; } - return false; -#undef UNICERROR + if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } Char16String String::utf16() const { @@ -1938,15 +2034,14 @@ Char16String String::utf16() const { uint32_t c = d[i]; if (c <= 0xffff) { // 16 bits. fl += 1; + if ((c & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", c)); + } } else if (c <= 0x10ffff) { // 32 bits. fl += 2; } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true); + fl += 1; } } @@ -1965,9 +2060,11 @@ Char16String String::utf16() const { if (c <= 0xffff) { // 16 bits. APPEND_CHAR(c); - } else { // 32 bits. + } else if (c <= 0x10ffff) { // 32 bits. APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate. APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -2039,7 +2136,7 @@ int64_t String::hex_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); hex *= 16; hex += n; s++; @@ -2078,7 +2175,7 @@ int64_t String::bin_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); binary *= 2; binary += n; s++; @@ -2101,7 +2198,7 @@ int64_t String::to_int() const { char32_t c = operator[](i); if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2130,7 +2227,7 @@ int64_t String::to_int(const char *p_str, int p_len) { char c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2161,7 +2258,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) { wchar_t c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2483,7 +2580,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { return INT64_MIN; } } else { - ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); } } integer *= 10; @@ -3359,36 +3456,36 @@ String String::repeat(int p_count) const { return new_string; } -String String::left(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::left(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(0, p_pos); + return substr(0, p_len); } -String String::right(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::right(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(length() - p_pos); + return substr(length() - p_len); } char32_t String::unicode_at(int p_idx) const { @@ -3655,6 +3752,31 @@ bool String::is_absolute_path() const { } } +static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) { + if (p_index == 0 && is_digit(p_char)) { + return false; // No start with number plz. + } + return is_ascii_identifier_char(p_char); +} + +String String::validate_identifier() const { + if (is_empty()) { + return "_"; // Empty string is not a valid identifier; + } + + String result = *this; + int len = result.length(); + char32_t *buffer = result.ptrw(); + + for (int i = 0; i < len; i++) { + if (!_is_valid_identifier_bit(i, buffer[i])) { + buffer[i] = '_'; + } + } + + return result; +} + bool String::is_valid_identifier() const { int len = length(); @@ -3665,15 +3787,7 @@ bool String::is_valid_identifier() const { const char32_t *str = &operator[](0); for (int i = 0; i < len; i++) { - if (i == 0) { - if (is_digit(str[0])) { - return false; // no start with number plz - } - } - - bool valid_char = is_ascii_identifier_char(str[i]); - - if (!valid_char) { + if (!_is_valid_identifier_bit(i, str[i])) { return false; } } @@ -3695,18 +3809,15 @@ String String::uri_encode() const { const CharString temp = utf8(); String res; for (int i = 0; i < temp.length(); ++i) { - char ord = temp[i]; + uint8_t ord = temp[i]; if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) { res += ord; } else { - char h_Val[3]; -#if defined(__GNUC__) || defined(_MSC_VER) - snprintf(h_Val, 3, "%02hhX", ord); -#else - sprintf(h_Val, "%02hhX", ord); -#endif - res += "%"; - res += h_Val; + char p[4] = { '%', 0, 0, 0 }; + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + p[1] = hex[ord >> 4]; + p[2] = hex[ord & 0xF]; + res += p; } } return res; @@ -4427,7 +4538,7 @@ String String::sprintf(const Array &values, bool *error) const { int min_chars = 0; int min_decimals = 0; bool in_decimals = false; - bool pad_with_zeroes = false; + bool pad_with_zeros = false; bool left_justified = false; bool show_sign = false; @@ -4480,7 +4591,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -4488,10 +4599,13 @@ String String::sprintf(const Array &values, bool *error) const { } // Sign. - if (show_sign && value >= 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+"); - } else if (value < 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-"); + if (show_sign || value < 0) { + String sign_char = value < 0 ? "-" : "+"; + if (left_justified) { + str = str.insert(0, sign_char); + } else { + str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char); + } } formatted += str; @@ -4520,13 +4634,9 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { - if (pad_with_zeroes) { - return "left justification cannot be used with zeros as the padding"; - } else { - str = str.rpad(pad_chars_count, pad_char); - } + str = str.rpad(pad_chars_count, pad_char); } else { str = str.lpad(pad_chars_count, pad_char); } @@ -4537,7 +4647,7 @@ String String::sprintf(const Array &values, bool *error) const { if (left_justified) { str = str.insert(0, sign_char); } else { - str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char); + str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char); } } @@ -4626,7 +4736,11 @@ String String::sprintf(const Array &values, bool *error) const { min_decimals += n; } else { if (c == '0' && min_chars == 0) { - pad_with_zeroes = true; + if (left_justified) { + WARN_PRINT("'0' flag ignored with '-' flag in string format"); + } else { + pad_with_zeros = true; + } } else { min_chars *= 10; min_chars += n; @@ -4675,7 +4789,7 @@ String String::sprintf(const Array &values, bool *error) const { // Back to defaults: min_chars = 0; min_decimals = 6; - pad_with_zeroes = false; + pad_with_zeros = false; left_justified = false; show_sign = false; in_decimals = false; @@ -4776,6 +4890,17 @@ Vector<uint8_t> String::to_utf32_buffer() const { } #ifdef TOOLS_ENABLED +/** + * "Tools TRanslate". Performs string replacement for internationalization + * within the editor. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `TTRN()` instead of `TTR()`. + * + * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTR()` instead. + */ String TTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate(p_text, p_context); @@ -4784,6 +4909,18 @@ String TTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Tools TRanslate for N items". Performs string replacement for + * internationalization within the editor. A translation context can optionally + * be specified to disambiguate between identical source strings in + * translations. Use `TTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTRN()` instead. + */ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); @@ -4796,9 +4933,10 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St return p_text_plural; } -/* DTR and DTRN are used for the documentation, handling descriptions extracted - * from the XML. - * They also replace `$DOCS_URL` with the actual URL to the documentation's branch, +/** + * "Docs TRanslate". Used for the editor class reference documentation, + * handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, * to allow dehardcoding it in the XML and doing proper substitutions everywhere. */ String DTR(const String &p_text, const String &p_context) { @@ -4812,6 +4950,12 @@ String DTR(const String &p_text, const String &p_context) { return text.replace("$DOCS_URL", VERSION_DOCS_URL); } +/** + * "Docs TRanslate for N items". Used for the editor class reference documentation + * (with support for plurals), handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { const String text = p_text.dedent().strip_edges(); const String text_plural = p_text_plural.dedent().strip_edges(); @@ -4828,6 +4972,19 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St } #endif +/** + * "Run-time TRanslate". Performs string replacement for internationalization + * within a running project. The translation string must be supplied by the + * project, as Godot does not provide built-in translations for `RTR()` strings + * to keep binary size low. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `RTRN()` instead of `RTR()`. + * + * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTR()` instead. + */ String RTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context); @@ -4841,6 +4998,20 @@ String RTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Run-time TRanslate for N items". Performs string replacement for + * internationalization within a running project. The translation string must be + * supplied by the project, as Godot does not provide built-in translations for + * `RTRN()` strings to keep binary size low. A translation context can + * optionally be specified to disambiguate between identical source strings in + * translations. Use `RTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTRN()` instead. + */ String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); diff --git a/core/string/ustring.h b/core/string/ustring.h index 48f2e45105..7672663964 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -28,9 +28,10 @@ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ +// Note: _GODOT suffix added to avoid conflict with ICU header with the same guard. + #ifndef USTRING_GODOT_H #define USTRING_GODOT_H -// Note: Renamed to avoid conflict with ICU header with the same name. #include "core/string/char_utils.h" #include "core/templates/cowdata.h" @@ -271,6 +272,9 @@ public: bool is_valid_string() const; + /* debug, error messages */ + void print_unicode_error(const String &p_message, bool p_critical = false) const; + /* complex helpers */ String substr(int p_from, int p_chars = -1) const; int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed @@ -356,8 +360,8 @@ public: int count(const String &p_string, int p_from = 0, int p_to = 0) const; int countn(const String &p_string, int p_from = 0, int p_to = 0) const; - String left(int p_pos) const; - String right(int p_pos) const; + String left(int p_len) const; + String right(int p_len) const; String indent(const String &p_prefix) const; String dedent() const; String strip_edges(bool left = true, bool right = true) const; @@ -373,11 +377,11 @@ public: CharString ascii(bool p_allow_extended = false) const; CharString utf8() const; - bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error + Error parse_utf8(const char *p_utf8, int p_len = -1); static String utf8(const char *p_utf8, int p_len = -1); Char16String utf16() const; - bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error + Error parse_utf16(const char16_t *p_utf16, int p_len = -1); static String utf16(const char16_t *p_utf16, int p_len = -1); static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */ @@ -427,6 +431,7 @@ public: // node functions static const String invalid_node_name_characters; String validate_node_name() const; + String validate_identifier() const; bool is_valid_identifier() const; bool is_valid_int() const; @@ -527,6 +532,16 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St #define TTRGET(m_value) (m_value) #endif +// Use this to mark property names for editor translation. +// Often for dynamic properties defined in _get_property_list(). +// Property names defined directly inside EDITOR_DEF, GLOBAL_DEF, and ADD_PROPERTY macros don't need this. +#define PNAME(m_value) (m_value) + +// Similar to PNAME, but to mark groups, i.e. properties with PROPERTY_USAGE_GROUP. +// Groups defined directly inside ADD_GROUP macros don't need this. +// The arguments are the same as ADD_GROUP. m_prefix is only used for extraction. +#define GNAME(m_value, m_prefix) (m_value) + // Runtime translate for the public node API. String RTR(const String &p_text, const String &p_context = ""); String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = ""); |