summaryrefslogtreecommitdiff
path: root/core/string
diff options
context:
space:
mode:
Diffstat (limited to 'core/string')
-rw-r--r--core/string/char_utils.h92
-rw-r--r--core/string/translation.cpp22
-rw-r--r--core/string/ustring.cpp46
-rw-r--r--core/string/ustring.h2
4 files changed, 113 insertions, 49 deletions
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
new file mode 100644
index 0000000000..0afd058f01
--- /dev/null
+++ b/core/string/char_utils.h
@@ -0,0 +1,92 @@
+/*************************************************************************/
+/* char_utils.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
+#ifndef CHAR_UTILS_H
+#define CHAR_UTILS_H
+
+#include "core/typedefs.h"
+
+static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
+ return (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) {
+ return (c >= 'a' && c <= 'z');
+}
+
+static _FORCE_INLINE_ bool is_digit(char32_t c) {
+ return (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
+ return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
+}
+
+static _FORCE_INLINE_ bool is_binary_digit(char32_t c) {
+ return (c == '0' || c == '1');
+}
+
+static _FORCE_INLINE_ bool is_ascii_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+}
+
+static _FORCE_INLINE_ bool is_symbol(char32_t c) {
+ return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
+}
+
+static _FORCE_INLINE_ bool is_control(char32_t p_char) {
+ return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
+}
+
+static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
+ return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
+}
+
+static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
+ return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
+}
+
+static _FORCE_INLINE_ bool is_punct(char32_t p_char) {
+ return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
+}
+
+static _FORCE_INLINE_ bool is_underscore(char32_t p_char) {
+ return (p_char == '_');
+}
+
+#endif // CHAR_UTILS_H
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index 355ee238e8..7cc41df9ef 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -213,14 +213,6 @@ static _character_accent_pair _character_to_accented[] = {
{ 'z', U"ΕΊ" },
};
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
- return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
- return (c >= 'a' && c <= 'z');
-}
-
Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info;
Map<String, String> TranslationServer::language_map;
@@ -309,15 +301,15 @@ String TranslationServer::standardize_locale(const String &p_locale) const {
Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
lang = locale_elements[0];
if (locale_elements.size() >= 2) {
- if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) {
+ if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
script = locale_elements[1];
}
- if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) {
+ if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
country = locale_elements[1];
}
}
if (locale_elements.size() >= 3) {
- if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) {
+ if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
country = locale_elements[2];
} else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) {
variant = locale_elements[2].to_lower();
@@ -434,15 +426,15 @@ String TranslationServer::get_locale_name(const String &p_locale) const {
Vector<String> locale_elements = locale.split("_");
lang = locale_elements[0];
if (locale_elements.size() >= 2) {
- if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) {
+ if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
script = locale_elements[1];
}
- if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) {
+ if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
country = locale_elements[1];
}
}
if (locale_elements.size() >= 3) {
- if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) {
+ if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
country = locale_elements[2];
}
}
@@ -911,7 +903,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const {
}
const char32_t *TranslationServer::get_accented_version(char32_t p_character) const {
- if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) {
+ if (!is_ascii_char(p_character)) {
return nullptr;
}
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 6e0a7c7022..c4edc8c086 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -54,34 +54,14 @@
static const int MAX_DECIMALS = 32;
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
- return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
- return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
- return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
- return (c >= 'a' && c <= 'z');
-}
-
static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
- return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+ return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
}
const char CharString::_null = 0;
const char16_t Char16String::_null = 0;
const char32_t String::_null = 0;
-bool is_symbol(char32_t c) {
- return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
const String &s = p_s;
int beg = CLAMP(p_col, 0, s.length());
@@ -974,21 +954,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
int start_index = 0;
for (int i = 1; i < this->size(); i++) {
- bool is_upper = is_upper_case(cstr[i]);
+ bool is_upper = is_ascii_upper_case(cstr[i]);
bool is_number = is_digit(cstr[i]);
bool are_next_2_lower = false;
bool is_next_lower = false;
bool is_next_number = false;
- bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+ bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
bool was_precedent_number = is_digit(cstr[i - 1]);
if (i + 2 < this->size()) {
- are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+ are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
}
if (i + 1 < this->size()) {
- is_next_lower = is_lower_case(cstr[i + 1]);
+ is_next_lower = is_ascii_lower_case(cstr[i + 1]);
is_next_number = is_digit(cstr[i + 1]);
}
@@ -2212,7 +2192,7 @@ bool String::is_numeric() const {
return false;
}
dot = true;
- } else if (c < '0' || c > '9') {
+ } else if (!is_digit(c)) {
return false;
}
}
@@ -3691,7 +3671,7 @@ bool String::is_valid_identifier() const {
}
}
- bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
+ bool valid_char = is_ascii_identifier_char(str[i]);
if (!valid_char) {
return false;
@@ -3716,7 +3696,7 @@ String String::uri_encode() const {
String res;
for (int i = 0; i < temp.length(); ++i) {
char ord = temp[i];
- if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+ if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
res += ord;
} else {
char h_Val[3];
@@ -3738,9 +3718,9 @@ String String::uri_decode() const {
for (int i = 0; i < src.length(); ++i) {
if (src[i] == '%' && i + 2 < src.length()) {
char ord1 = src[i + 1];
- if (is_digit(ord1) || is_upper_case(ord1)) {
+ if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
char ord2 = src[i + 2];
- if (is_digit(ord2) || is_upper_case(ord2)) {
+ if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
char bytes[3] = { (char)ord1, (char)ord2, 0 };
res += (char)strtol(bytes, nullptr, 16);
i += 2;
@@ -3867,7 +3847,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
for (int i = 2; i < p_src_len; i++) {
eat = i + 1;
char32_t ct = p_src[i];
- if (ct == ';' || ct < '0' || ct > '9') {
+ if (ct == ';' || !is_digit(ct)) {
break;
}
}
@@ -3997,7 +3977,7 @@ String String::pad_zeros(int p_digits) const {
int begin = 0;
- while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+ while (begin < end && !is_digit(s[begin])) {
begin++;
}
@@ -4042,7 +4022,7 @@ bool String::is_valid_int() const {
}
for (int i = from; i < len; i++) {
- if (operator[](i) < '0' || operator[](i) > '9') {
+ if (!is_digit(operator[](i))) {
return false; // no start with number plz
}
}
diff --git a/core/string/ustring.h b/core/string/ustring.h
index b685e3929f..1d302b65a7 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -32,6 +32,7 @@
#define USTRING_GODOT_H
// Note: Renamed to avoid conflict with ICU header with the same name.
+#include "core/string/char_utils.h"
#include "core/templates/cowdata.h"
#include "core/templates/vector.h"
#include "core/typedefs.h"
@@ -533,7 +534,6 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St
String RTR(const String &p_text, const String &p_context = "");
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = "");
-bool is_symbol(char32_t c);
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end);
_FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) {