diff options
Diffstat (limited to 'core/string')
-rw-r--r-- | core/string/char_utils.h | 92 | ||||
-rw-r--r-- | core/string/translation.cpp | 22 | ||||
-rw-r--r-- | core/string/ustring.cpp | 46 | ||||
-rw-r--r-- | core/string/ustring.h | 2 |
4 files changed, 113 insertions, 49 deletions
diff --git a/core/string/char_utils.h b/core/string/char_utils.h new file mode 100644 index 0000000000..0afd058f01 --- /dev/null +++ b/core/string/char_utils.h @@ -0,0 +1,92 @@ +/*************************************************************************/ +/* char_utils.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef CHAR_UTILS_H +#define CHAR_UTILS_H + +#include "core/typedefs.h" + +static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) { + return (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) { + return (c >= 'a' && c <= 'z'); +} + +static _FORCE_INLINE_ bool is_digit(char32_t c) { + return (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { + return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); +} + +static _FORCE_INLINE_ bool is_binary_digit(char32_t c) { + return (c == '0' || c == '1'); +} + +static _FORCE_INLINE_ bool is_ascii_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +static _FORCE_INLINE_ bool is_symbol(char32_t c) { + return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); +} + +static _FORCE_INLINE_ bool is_control(char32_t p_char) { + return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f); +} + +static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) { + return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085); +} + +static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) { + return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029); +} + +static _FORCE_INLINE_ bool is_punct(char32_t p_char) { + return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f); +} + +static _FORCE_INLINE_ bool is_underscore(char32_t p_char) { + return (p_char == '_'); +} + +#endif // CHAR_UTILS_H diff --git a/core/string/translation.cpp b/core/string/translation.cpp index 355ee238e8..7cc41df9ef 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -213,14 +213,6 @@ static _character_accent_pair _character_to_accented[] = { { 'z', U"ΕΊ" }, }; -static _FORCE_INLINE_ bool is_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); -} - -static _FORCE_INLINE_ bool is_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); -} - Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info; Map<String, String> TranslationServer::language_map; @@ -309,15 +301,15 @@ String TranslationServer::standardize_locale(const String &p_locale) const { Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_"); lang = locale_elements[0]; if (locale_elements.size() >= 2) { - if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { script = locale_elements[1]; } - if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) { + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { country = locale_elements[1]; } } if (locale_elements.size() >= 3) { - if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { country = locale_elements[2]; } else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) { variant = locale_elements[2].to_lower(); @@ -434,15 +426,15 @@ String TranslationServer::get_locale_name(const String &p_locale) const { Vector<String> locale_elements = locale.split("_"); lang = locale_elements[0]; if (locale_elements.size() >= 2) { - if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) { + if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) { script = locale_elements[1]; } - if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) { + if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) { country = locale_elements[1]; } } if (locale_elements.size() >= 3) { - if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) { + if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) { country = locale_elements[2]; } } @@ -911,7 +903,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const { } const char32_t *TranslationServer::get_accented_version(char32_t p_character) const { - if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) { + if (!is_ascii_char(p_character)) { return nullptr; } diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 6e0a7c7022..c4edc8c086 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -54,34 +54,14 @@ static const int MAX_DECIMALS = 32; -static _FORCE_INLINE_ bool is_digit(char32_t c) { - return (c >= '0' && c <= '9'); -} - -static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { - return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); -} - -static _FORCE_INLINE_ bool is_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); -} - -static _FORCE_INLINE_ bool is_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); -} - static _FORCE_INLINE_ char32_t lower_case(char32_t c) { - return (is_upper_case(c) ? (c + ('a' - 'A')) : c); + return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c); } const char CharString::_null = 0; const char16_t Char16String::_null = 0; const char32_t String::_null = 0; -bool is_symbol(char32_t c) { - return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); -} - bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) { const String &s = p_s; int beg = CLAMP(p_col, 0, s.length()); @@ -974,21 +954,21 @@ String String::camelcase_to_underscore(bool lowercase) const { int start_index = 0; for (int i = 1; i < this->size(); i++) { - bool is_upper = is_upper_case(cstr[i]); + bool is_upper = is_ascii_upper_case(cstr[i]); bool is_number = is_digit(cstr[i]); bool are_next_2_lower = false; bool is_next_lower = false; bool is_next_number = false; - bool was_precedent_upper = is_upper_case(cstr[i - 1]); + bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]); bool was_precedent_number = is_digit(cstr[i - 1]); if (i + 2 < this->size()) { - are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]); + are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]); } if (i + 1 < this->size()) { - is_next_lower = is_lower_case(cstr[i + 1]); + is_next_lower = is_ascii_lower_case(cstr[i + 1]); is_next_number = is_digit(cstr[i + 1]); } @@ -2212,7 +2192,7 @@ bool String::is_numeric() const { return false; } dot = true; - } else if (c < '0' || c > '9') { + } else if (!is_digit(c)) { return false; } } @@ -3691,7 +3671,7 @@ bool String::is_valid_identifier() const { } } - bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_'; + bool valid_char = is_ascii_identifier_char(str[i]); if (!valid_char) { return false; @@ -3716,7 +3696,7 @@ String String::uri_encode() const { String res; for (int i = 0; i < temp.length(); ++i) { char ord = temp[i]; - if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) { + if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) { res += ord; } else { char h_Val[3]; @@ -3738,9 +3718,9 @@ String String::uri_decode() const { for (int i = 0; i < src.length(); ++i) { if (src[i] == '%' && i + 2 < src.length()) { char ord1 = src[i + 1]; - if (is_digit(ord1) || is_upper_case(ord1)) { + if (is_digit(ord1) || is_ascii_upper_case(ord1)) { char ord2 = src[i + 2]; - if (is_digit(ord2) || is_upper_case(ord2)) { + if (is_digit(ord2) || is_ascii_upper_case(ord2)) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; @@ -3867,7 +3847,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch for (int i = 2; i < p_src_len; i++) { eat = i + 1; char32_t ct = p_src[i]; - if (ct == ';' || ct < '0' || ct > '9') { + if (ct == ';' || !is_digit(ct)) { break; } } @@ -3997,7 +3977,7 @@ String String::pad_zeros(int p_digits) const { int begin = 0; - while (begin < end && (s[begin] < '0' || s[begin] > '9')) { + while (begin < end && !is_digit(s[begin])) { begin++; } @@ -4042,7 +4022,7 @@ bool String::is_valid_int() const { } for (int i = from; i < len; i++) { - if (operator[](i) < '0' || operator[](i) > '9') { + if (!is_digit(operator[](i))) { return false; // no start with number plz } } diff --git a/core/string/ustring.h b/core/string/ustring.h index b685e3929f..1d302b65a7 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -32,6 +32,7 @@ #define USTRING_GODOT_H // Note: Renamed to avoid conflict with ICU header with the same name. +#include "core/string/char_utils.h" #include "core/templates/cowdata.h" #include "core/templates/vector.h" #include "core/typedefs.h" @@ -533,7 +534,6 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St String RTR(const String &p_text, const String &p_context = ""); String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = ""); -bool is_symbol(char32_t c); bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end); _FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) { |