diff options
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r-- | core/string/ustring.cpp | 846 |
1 files changed, 439 insertions, 407 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index fee168993b..70236231a2 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -38,13 +38,11 @@ #include "core/string/translation.h" #include "core/string/ucaps.h" #include "core/variant/variant.h" +#include "core/version_generated.gen.h" -#include <cstdint> - -#ifndef NO_USE_STDLIB #include <stdio.h> #include <stdlib.h> -#endif +#include <cstdint> #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS // to disable build-time warning which suggested to use strcpy_s instead strcpy @@ -54,11 +52,27 @@ #define snprintf _snprintf_s #endif -#define MAX_DIGITS 6 -#define UPPERCASE(m_c) (((m_c) >= 'a' && (m_c) <= 'z') ? ((m_c) - ('a' - 'A')) : (m_c)) -#define LOWERCASE(m_c) (((m_c) >= 'A' && (m_c) <= 'Z') ? ((m_c) + ('a' - 'A')) : (m_c)) -#define IS_DIGIT(m_d) ((m_d) >= '0' && (m_d) <= '9') -#define IS_HEX_DIGIT(m_d) (((m_d) >= '0' && (m_d) <= '9') || ((m_d) >= 'a' && (m_d) <= 'f') || ((m_d) >= 'A' && (m_d) <= 'F')) +static const int MAX_DECIMALS = 32; + +static _FORCE_INLINE_ bool is_digit(char32_t c) { + return (c >= '0' && c <= '9'); +} + +static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { + return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); +} + +static _FORCE_INLINE_ bool is_upper_case(char32_t c) { + return (c >= 'A' && c <= 'Z'); +} + +static _FORCE_INLINE_ bool is_lower_case(char32_t c) { + return (c >= 'a' && c <= 'z'); +} + +static _FORCE_INLINE_ char32_t lower_case(char32_t c) { + return (is_upper_case(c) ? (c + ('a' - 'A')) : c); +} const char CharString::_null = 0; const char16_t Char16String::_null = 0; @@ -240,6 +254,71 @@ String String::word_wrap(int p_chars_per_line) const { return ret; } +Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const { + // Splits the URL into scheme, host, port, path. Strip credentials when present. + String base = *this; + r_scheme = ""; + r_host = ""; + r_port = 0; + r_path = ""; + int pos = base.find("://"); + // Scheme + if (pos != -1) { + r_scheme = base.substr(0, pos + 3).to_lower(); + base = base.substr(pos + 3, base.length() - pos - 3); + } + pos = base.find("/"); + // Path + if (pos != -1) { + r_path = base.substr(pos, base.length() - pos); + base = base.substr(0, pos); + } + // Host + pos = base.find("@"); + if (pos != -1) { + // Strip credentials + base = base.substr(pos + 1, base.length() - pos - 1); + } + if (base.begins_with("[")) { + // Literal IPv6 + pos = base.rfind("]"); + if (pos == -1) { + return ERR_INVALID_PARAMETER; + } + r_host = base.substr(1, pos - 1); + base = base.substr(pos + 1, base.length() - pos - 1); + } else { + // Anything else + if (base.get_slice_count(":") > 2) { + return ERR_INVALID_PARAMETER; + } + pos = base.rfind(":"); + if (pos == -1) { + r_host = base; + base = ""; + } else { + r_host = base.substr(0, pos); + base = base.substr(pos, base.length() - pos); + } + } + if (r_host.is_empty()) { + return ERR_INVALID_PARAMETER; + } + r_host = r_host.to_lower(); + // Port + if (base.begins_with(":")) { + base = base.substr(1, base.length() - 1); + if (!base.is_valid_int()) { + return ERR_INVALID_PARAMETER; + } + r_port = base.to_int(); + if (r_port < 1 || r_port > 65535) { + return ERR_INVALID_PARAMETER; + } + } + return OK; +} + void String::copy_from(const char *p_cstr) { // copy Latin-1 encoded c-string directly if (!p_cstr) { @@ -427,12 +506,12 @@ String operator+(char32_t p_chr, const String &p_str) { } String &String::operator+=(const String &p_str) { - if (empty()) { + if (is_empty()) { *this = p_str; return *this; } - if (p_str.empty()) { + if (p_str.is_empty()) { return *this; } @@ -519,7 +598,7 @@ bool String::operator==(const char *p_str) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -558,7 +637,7 @@ bool String::operator==(const char32_t *p_str) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -580,7 +659,7 @@ bool String::operator==(const String &p_str) const { if (length() != p_str.length()) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -605,7 +684,7 @@ bool String::operator==(const StrRange &p_str_range) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -673,25 +752,26 @@ bool String::operator<=(const String &p_str) const { bool String::operator>(const String &p_str) const { return p_str < *this; } + bool String::operator>=(const String &p_str) const { return !(*this < p_str); } bool String::operator<(const char *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } return is_str_less(get_data(), p_str); } bool String::operator<(const wchar_t *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -705,10 +785,10 @@ bool String::operator<(const wchar_t *p_str) const { } bool String::operator<(const char32_t *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -720,13 +800,13 @@ bool String::operator<(const String &p_str) const { } signed char String::nocasecmp_to(const String &p_str) const { - if (empty() && p_str.empty()) { + if (is_empty() && p_str.is_empty()) { return 0; } - if (empty()) { + if (is_empty()) { return -1; } - if (p_str.empty()) { + if (p_str.is_empty()) { return 1; } @@ -752,13 +832,13 @@ signed char String::nocasecmp_to(const String &p_str) const { } signed char String::casecmp_to(const String &p_str) const { - if (empty() && p_str.empty()) { + if (is_empty() && p_str.is_empty()) { return 0; } - if (empty()) { + if (is_empty()) { return -1; } - if (p_str.empty()) { + if (p_str.is_empty()) { return 1; } @@ -806,8 +886,8 @@ signed char String::naturalnocasecmp_to(const String &p_str) const { while (*this_str) { if (!*that_str) { return 1; - } else if (IS_DIGIT(*this_str)) { - if (!IS_DIGIT(*that_str)) { + } else if (is_digit(*this_str)) { + if (!is_digit(*that_str)) { return -1; } @@ -816,10 +896,10 @@ signed char String::naturalnocasecmp_to(const String &p_str) const { const char32_t *that_substr = that_str; // Compare lengths of both numerical sequences, ignoring leading zeros - while (IS_DIGIT(*this_str)) { + while (is_digit(*this_str)) { this_str++; } - while (IS_DIGIT(*that_str)) { + while (is_digit(*that_str)) { that_str++; } while (*this_substr == '0') { @@ -847,7 +927,7 @@ signed char String::naturalnocasecmp_to(const String &p_str) const { this_substr++; that_substr++; } - } else if (IS_DIGIT(*that_str)) { + } else if (is_digit(*that_str)) { return 1; } else { if (_find_upper(*this_str) < _find_upper(*that_str)) { //more than @@ -873,10 +953,6 @@ const char32_t *String::get_data() const { return size() ? &operator[](0) : &zero; } -void String::erase(int p_pos, int p_chars) { - *this = left(p_pos) + substr(p_pos + p_chars, length() - ((p_pos + p_chars))); -} - String String::capitalize() const { String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges(); String cap; @@ -897,26 +973,25 @@ String String::capitalize() const { String String::camelcase_to_underscore(bool lowercase) const { const char32_t *cstr = get_data(); String new_string; - const char A = 'A', Z = 'Z'; - const char a = 'a', z = 'z'; int start_index = 0; for (int i = 1; i < this->size(); i++) { - bool is_upper = cstr[i] >= A && cstr[i] <= Z; - bool is_number = cstr[i] >= '0' && cstr[i] <= '9'; + bool is_upper = is_upper_case(cstr[i]); + bool is_number = is_digit(cstr[i]); + bool are_next_2_lower = false; bool is_next_lower = false; bool is_next_number = false; - bool was_precedent_upper = cstr[i - 1] >= A && cstr[i - 1] <= Z; - bool was_precedent_number = cstr[i - 1] >= '0' && cstr[i - 1] <= '9'; + bool was_precedent_upper = is_upper_case(cstr[i - 1]); + bool was_precedent_number = is_digit(cstr[i - 1]); if (i + 2 < this->size()) { - are_next_2_lower = cstr[i + 1] >= a && cstr[i + 1] <= z && cstr[i + 2] >= a && cstr[i + 2] <= z; + are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]); } if (i + 1 < this->size()) { - is_next_lower = cstr[i + 1] >= a && cstr[i + 1] <= z; - is_next_number = cstr[i + 1] >= '0' && cstr[i + 1] <= '9'; + is_next_lower = is_lower_case(cstr[i + 1]); + is_next_number = is_digit(cstr[i + 1]); } const bool cond_a = is_upper && !was_precedent_upper && !was_precedent_number; @@ -949,10 +1024,10 @@ String String::get_with_code_lines() const { } int String::get_slice_count(String p_splitter) const { - if (empty()) { + if (is_empty()) { return 0; } - if (p_splitter.empty()) { + if (p_splitter.is_empty()) { return 0; } @@ -968,7 +1043,7 @@ int String::get_slice_count(String p_splitter) const { } String String::get_slice(String p_splitter, int p_slice) const { - if (empty() || p_splitter.empty()) { + if (is_empty() || p_splitter.is_empty()) { return ""; } @@ -1008,7 +1083,7 @@ String String::get_slice(String p_splitter, int p_slice) const { } String String::get_slicec(char32_t p_splitter, int p_slice) const { - if (empty()) { + if (is_empty()) { return String(); } @@ -1138,7 +1213,7 @@ Vector<String> String::rsplit(const String &p_splitter, bool p_allow_empty, int remaining_len = left_edge; } - ret.invert(); + ret.reverse(); return ret; } @@ -1312,10 +1387,18 @@ String String::num(double p_num, int p_decimals) { return "inf"; } } -#ifndef NO_USE_STDLIB - if (p_decimals > 16) { - p_decimals = 16; + if (p_decimals < 0) { + p_decimals = 14; + const double abs_num = ABS(p_num); + if (abs_num > 10) { + // We want to align the digits to the above sane default, so we only + // need to subtract log10 for numbers with a positive power of ten. + p_decimals -= (int)floor(log10(abs_num)); + } + } + if (p_decimals > MAX_DECIMALS) { + p_decimals = MAX_DECIMALS; } char fmt[7]; @@ -1326,7 +1409,6 @@ String String::num(double p_num, int p_decimals) { fmt[1] = 'l'; fmt[2] = 'f'; fmt[3] = 0; - } else if (p_decimals < 10) { fmt[2] = '0' + p_decimals; fmt[3] = 'l'; @@ -1377,83 +1459,6 @@ String String::num(double p_num, int p_decimals) { } return buf; -#else - - String s; - String sd; - /* integer part */ - - bool neg = p_num < 0; - p_num = ABS(p_num); - int intn = (int)p_num; - - /* decimal part */ - - if (p_decimals > 0 || (p_decimals == -1 && (int)p_num != p_num)) { - double dec = p_num - (double)((int)p_num); - - int digit = 0; - if (p_decimals > MAX_DIGITS) - p_decimals = MAX_DIGITS; - - int dec_int = 0; - int dec_max = 0; - - while (true) { - dec *= 10.0; - dec_int = dec_int * 10 + (int)dec % 10; - dec_max = dec_max * 10 + 9; - digit++; - - if (p_decimals == -1) { - if (digit == MAX_DIGITS) //no point in going to infinite - break; - - if ((dec - (double)((int)dec)) < 1e-6) - break; - } - - if (digit == p_decimals) - break; - } - dec *= 10; - int last = (int)dec % 10; - - if (last > 5) { - if (dec_int == dec_max) { - dec_int = 0; - intn++; - } else { - dec_int++; - } - } - - String decimal; - for (int i = 0; i < digit; i++) { - char num[2] = { 0, 0 }; - num[0] = '0' + dec_int % 10; - decimal = num + decimal; - dec_int /= 10; - } - sd = '.' + decimal; - } - - if (intn == 0) - - s = "0"; - else { - while (intn) { - char32_t num = '0' + (intn % 10); - intn /= 10; - s = num + s; - } - } - - s = s + sd; - if (neg) - s = "-" + s; - return s; -#endif } String String::num_int64(int64_t p_num, int base, bool capitalize_hex) { @@ -1523,7 +1528,7 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { return s; } -String String::num_real(double p_num) { +String String::num_real(double p_num, bool p_trailing) { if (Math::is_nan(p_num)) { return "nan"; } @@ -1538,30 +1543,52 @@ String String::num_real(double p_num) { String s; String sd; - /* integer part */ + + // Integer part. bool neg = p_num < 0; p_num = ABS(p_num); - int intn = (int)p_num; + int64_t intn = (int64_t)p_num; - /* decimal part */ + // Decimal part. - if ((int)p_num != p_num) { - double dec = p_num - (double)((int)p_num); + if (intn != p_num) { + double dec = p_num - (double)intn; int digit = 0; - int decimals = MAX_DIGITS; - int dec_int = 0; - int dec_max = 0; +#ifdef REAL_T_IS_DOUBLE + int decimals = 14; + double tolerance = 1e-14; +#else + int decimals = 6; + double tolerance = 1e-6; +#endif + // We want to align the digits to the above sane default, so we only + // need to subtract log10 for numbers with a positive power of ten. + if (p_num > 10) { + decimals -= (int)floor(log10(p_num)); + } + + if (decimals > MAX_DECIMALS) { + decimals = MAX_DECIMALS; + } + + // In case the value ends up ending in "99999", we want to add a + // tiny bit to the value we're checking when deciding when to stop, + // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15). + double check_multiplier = 1 + tolerance / 10; + + int64_t dec_int = 0; + int64_t dec_max = 0; while (true) { dec *= 10.0; - dec_int = dec_int * 10 + (int)dec % 10; + dec_int = dec_int * 10 + (int64_t)dec % 10; dec_max = dec_max * 10 + 9; digit++; - if ((dec - (double)((int)dec)) < 1e-6) { + if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) { break; } @@ -1571,7 +1598,7 @@ String String::num_real(double p_num) { } dec *= 10; - int last = (int)dec % 10; + int last = (int64_t)dec % 10; if (last > 5) { if (dec_int == dec_max) { @@ -1590,8 +1617,10 @@ String String::num_real(double p_num) { dec_int /= 10; } sd = '.' + decimal; - } else { + } else if (p_trailing) { sd = ".0"; + } else { + sd = ""; } if (intn == 0) { @@ -1623,19 +1652,18 @@ String String::num_scientific(double p_num) { return "inf"; } } -#ifndef NO_USE_STDLIB char buf[256]; #if defined(__GNUC__) || defined(_MSC_VER) -#if (defined(__MINGW32__) || (defined(_MSC_VER) && _MSC_VER < 1900)) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT) - // MinGW and old MSC require _set_output_format() to conform to C99 output for printf +#if defined(__MINGW32__) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT) + // MinGW requires _set_output_format() to conform to C99 output for printf unsigned int old_exponent_format = _set_output_format(_TWO_DIGIT_EXPONENT); #endif snprintf(buf, 256, "%lg", p_num); -#if (defined(__MINGW32__) || (defined(_MSC_VER) && _MSC_VER < 1900)) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT) +#if defined(__MINGW32__) && defined(_TWO_DIGIT_EXPONENT) && !defined(_UCRT) _set_output_format(old_exponent_format); #endif @@ -1646,10 +1674,6 @@ String String::num_scientific(double p_num) { buf[255] = 0; return buf; -#else - - return String::num(p_num); -#endif } String String::md5(const uint8_t *p_md5) { @@ -1763,7 +1787,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { if (skip) { _UNICERROR("no space left"); - return true; //not enough spac + return true; //not enough space } } @@ -2096,8 +2120,9 @@ String::String(const StrRange &p_range) { copy_from(p_range.c_str, p_range.len); } -int64_t String::hex_to_int(bool p_with_prefix) const { - if (p_with_prefix && length() < 3) { +int64_t String::hex_to_int() const { + int len = length(); + if (len == 0) { return 0; } @@ -2109,24 +2134,21 @@ int64_t String::hex_to_int(bool p_with_prefix) const { s++; } - if (p_with_prefix) { - if (s[0] != '0' || s[1] != 'x') { - return 0; - } + if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'x') { s += 2; } int64_t hex = 0; while (*s) { - char32_t c = LOWERCASE(*s); + char32_t c = lower_case(*s); int64_t n; - if (c >= '0' && c <= '9') { + if (is_digit(c)) { n = c - '0'; } else if (c >= 'a' && c <= 'f') { n = (c - 'a') + 10; } else { - return 0; + ERR_FAIL_COND_V_MSG(true, 0, "Invalid hexadecimal notation character \"" + chr(*s) + "\" in string \"" + *this + "\"."); } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0'); @@ -2139,8 +2161,9 @@ int64_t String::hex_to_int(bool p_with_prefix) const { return hex * sign; } -int64_t String::bin_to_int(bool p_with_prefix) const { - if (p_with_prefix && length() < 3) { +int64_t String::bin_to_int() const { + int len = length(); + if (len == 0) { return 0; } @@ -2152,17 +2175,14 @@ int64_t String::bin_to_int(bool p_with_prefix) const { s++; } - if (p_with_prefix) { - if (s[0] != '0' || s[1] != 'b') { - return 0; - } + if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'b') { s += 2; } int64_t binary = 0; while (*s) { - char32_t c = LOWERCASE(*s); + char32_t c = lower_case(*s); int64_t n; if (c == '0' || c == '1') { n = c - '0'; @@ -2192,7 +2212,7 @@ int64_t String::to_int() const { for (int i = 0; i < to; i++) { char32_t c = operator[](i); - if (c >= '0' && c <= '9') { + if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); integer *= 10; @@ -2221,7 +2241,7 @@ int64_t String::to_int(const char *p_str, int p_len) { for (int i = 0; i < to; i++) { char c = p_str[i]; - if (c >= '0' && c <= '9') { + if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); integer *= 10; @@ -2252,7 +2272,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) { for (int i = 0; i < to; i++) { wchar_t c = p_str[i]; - if (c >= '0' && c <= '9') { + if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); integer *= 10; @@ -2294,28 +2314,33 @@ bool String::is_numeric() const { } template <class C> -static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number, - * optionally preceded by white space. Must - * have form "-I.FE-X", where I is the integer - * part of the mantissa, F is the fractional - * part of the mantissa, and X is the - * exponent. Either of the signs may be "+", - * "-", or omitted. Either I or F may be - * omitted, or both. The decimal point isn't - * necessary unless F is present. The "E" may - * actually be an "e". E and X may both be - * omitted (but not just one). */ - C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's - * address here. */ -{ - static const int maxExponent = 511; /* Largest possible base 10 exponent. Any - * exponent larger than this will already - * produce underflow or overflow, so there's - * no need to worry about additional digits. - */ - static const double powersOf10[] = { /* Table giving binary powers of 10. Entry */ - 10., /* is 10^2^i. Used to convert decimal */ - 100., /* exponents into floating-point numbers. */ +static double built_in_strtod( + /* A decimal ASCII floating-point number, + * optionally preceded by white space. Must + * have form "-I.FE-X", where I is the integer + * part of the mantissa, F is the fractional + * part of the mantissa, and X is the + * exponent. Either of the signs may be "+", + * "-", or omitted. Either I or F may be + * omitted, or both. The decimal point isn't + * necessary unless F is present. The "E" may + * actually be an "e". E and X may both be + * omitted (but not just one). */ + const C *string, + /* If non-nullptr, store terminating Cacter's + * address here. */ + C **endPtr = nullptr) { + /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. */ + static const int maxExponent = 511; + /* Table giving binary powers of 10. Entry + * is 10^2^i. Used to convert decimal + * exponents into floating-point numbers. */ + static const double powersOf10[] = { + 10., + 100., 1.0e4, 1.0e8, 1.0e16, @@ -2330,25 +2355,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point const double *d; const C *p; int c; - int exp = 0; /* Exponent read from "EX" field. */ - int fracExp = 0; /* Exponent that derives from the fractional - * part. Under normal circumstances, it is - * the negative of the number of digits in F. - * However, if I is very long, the last digits - * of I get dropped (otherwise a long I with a - * large negative exponent could cause an - * unnecessary overflow on I alone). In this - * case, fracExp is incremented one for each - * dropped digit. */ - int mantSize; /* Number of digits in mantissa. */ - int decPt; /* Number of mantissa digits BEFORE decimal - * point. */ - const C *pExp; /* Temporarily holds location of exponent in - * string. */ + /* Exponent read from "EX" field. */ + int exp = 0; + /* Exponent that derives from the fractional + * part. Under normal circumstances, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int fracExp = 0; + /* Number of digits in mantissa. */ + int mantSize; + /* Number of mantissa digits BEFORE decimal point. */ + int decPt; + /* Temporarily holds location of exponent in string. */ + const C *pExp; /* - * Strip off leading blanks and check for a sign. - */ + * Strip off leading blanks and check for a sign. + */ p = string; while (*p == ' ' || *p == '\t' || *p == '\n') { @@ -2365,14 +2393,14 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Count the number of digits in the mantissa (including the decimal - * point), and also locate the decimal point. - */ + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ decPt = -1; for (mantSize = 0;; mantSize += 1) { c = *p; - if (!IS_DIGIT(c)) { + if (!is_digit(c)) { if ((c != '.') || (decPt >= 0)) { break; } @@ -2382,11 +2410,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Now suck up the digits in the mantissa. Use two integers to collect 9 - * digits each (this is faster than using floating-point). If the mantissa - * has more than 18 digits, ignore the extras, since they can't affect the - * value anyway. - */ + * Now suck up the digits in the mantissa. Use two integers to collect 9 + * digits each (this is faster than using floating-point). If the mantissa + * has more than 18 digits, ignore the extras, since they can't affect the + * value anyway. + */ pExp = p; p -= mantSize; @@ -2432,8 +2460,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Skim off the exponent. - */ + * Skim off the exponent. + */ p = pExp; if ((*p == 'E') || (*p == 'e')) { @@ -2447,11 +2475,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } expSign = false; } - if (!IS_DIGIT(char32_t(*p))) { + if (!is_digit(char32_t(*p))) { p = pExp; goto done; } - while (IS_DIGIT(char32_t(*p))) { + while (is_digit(char32_t(*p))) { exp = exp * 10 + (*p - '0'); p += 1; } @@ -2463,10 +2491,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Generate a floating-point number that represents the exponent. Do this - * by processing the exponent one bit at a time to combine many powers of - * 2 of 10. Then combine the exponent with the fraction. - */ + * Generate a floating-point number that represents the exponent. Do this + * by processing the exponent one bit at a time to combine many powers of + * 2 of 10. Then combine the exponent with the fraction. + */ if (exp < 0) { expSign = true; @@ -2537,7 +2565,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { char32_t c = *(str++); switch (reading) { case READING_SIGN: { - if (c >= '0' && c <= '9') { + if (is_digit(c)) { reading = READING_INT; // let it fallthrough } else if (c == '-') { @@ -2554,7 +2582,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { [[fallthrough]]; } case READING_INT: { - if (c >= '0' && c <= '9') { + if (is_digit(c)) { if (integer > INT64_MAX / 10) { String number(""); str = p_str; @@ -2585,7 +2613,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { } double String::to_float() const { - if (empty()) { + if (is_empty()) { return 0; } return built_in_strtod<char32_t>(get_data()); @@ -2767,7 +2795,7 @@ String String::substr(int p_from, int p_chars) const { p_chars = length() - p_from; } - if (empty() || p_from < 0 || p_from >= length() || p_chars <= 0) { + if (is_empty() || p_from < 0 || p_from >= length() || p_chars <= 0) { return ""; } @@ -3074,35 +3102,47 @@ int String::rfindn(const String &p_str, int p_from) const { } bool String::ends_with(const String &p_string) const { - int pos = rfind(p_string); - if (pos == -1) { + int l = p_string.length(); + if (l > length()) { return false; } - return pos + p_string.length() == length(); + + if (l == 0) { + return true; + } + + const char32_t *p = &p_string[0]; + const char32_t *s = &operator[](length() - l); + + for (int i = 0; i < l; i++) { + if (p[i] != s[i]) { + return false; + } + } + + return true; } bool String::begins_with(const String &p_string) const { - if (p_string.length() > length()) { + int l = p_string.length(); + if (l > length()) { return false; } - int l = p_string.length(); if (l == 0) { return true; } - const char32_t *src = &p_string[0]; - const char32_t *str = &operator[](0); + const char32_t *p = &p_string[0]; + const char32_t *s = &operator[](0); - int i = 0; - for (; i < l; i++) { - if (src[i] != str[i]) { + for (int i = 0; i < l; i++) { + if (p[i] != s[i]) { return false; } } - // only if i == l the p_string matches the beginning - return i == l; + return true; } bool String::begins_with(const char *p_string) const { @@ -3142,7 +3182,7 @@ bool String::is_quoted() const { } int String::_count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const { - if (p_string.empty()) { + if (p_string.is_empty()) { return 0; } int len = length(); @@ -3250,8 +3290,8 @@ float String::similarity(const String &p_string) const { int src_size = src_bigrams.size(); int tgt_size = tgt_bigrams.size(); - double sum = src_size + tgt_size; - double inter = 0; + int sum = src_size + tgt_size; + int inter = 0; for (int i = 0; i < src_size; i++) { for (int j = 0; j < tgt_size; j++) { if (src_bigrams[i] == tgt_bigrams[j]) { @@ -3308,17 +3348,10 @@ String String::format(const Variant &values, String placeholder) const { if (value_arr.size() == 2) { Variant v_key = value_arr[0]; String key = v_key; - if (key.left(1) == "\"" && key.right(key.length() - 1) == "\"") { - key = key.substr(1, key.length() - 2); - } Variant v_val = value_arr[1]; String val = v_val; - if (val.left(1) == "\"" && val.right(val.length() - 1) == "\"") { - val = val.substr(1, val.length() - 2); - } - new_string = new_string.replace(placeholder.replace("_", key), val); } else { ERR_PRINT(String("STRING.format Inner Array size != 2 ").ascii().get_data()); @@ -3327,10 +3360,6 @@ String String::format(const Variant &values, String placeholder) const { Variant v_val = values_arr[i]; String val = v_val; - if (val.left(1) == "\"" && val.right(val.length() - 1) == "\"") { - val = val.substr(1, val.length() - 2); - } - if (placeholder.find("_") > -1) { new_string = new_string.replace(placeholder.replace("_", i_as_str), val); } else { @@ -3343,19 +3372,8 @@ String String::format(const Variant &values, String placeholder) const { List<Variant> keys; d.get_key_list(&keys); - for (List<Variant>::Element *E = keys.front(); E; E = E->next()) { - String key = E->get(); - String val = d[E->get()]; - - if (key.left(1) == "\"" && key.right(key.length() - 1) == "\"") { - key = key.substr(1, key.length() - 2); - } - - if (val.left(1) == "\"" && val.right(val.length() - 1) == "\"") { - val = val.substr(1, val.length() - 2); - } - - new_string = new_string.replace(placeholder.replace("_", key), val); + for (const Variant &key : keys) { + new_string = new_string.replace(placeholder.replace("_", key), d[key]); } } else { ERR_PRINT(String("Invalid type: use Array or Dictionary.").ascii().get_data()); @@ -3455,6 +3473,10 @@ String String::repeat(int p_count) const { } String String::left(int p_pos) const { + if (p_pos < 0) { + p_pos = length() + p_pos; + } + if (p_pos <= 0) { return ""; } @@ -3467,18 +3489,22 @@ String String::left(int p_pos) const { } String String::right(int p_pos) const { - if (p_pos >= length()) { - return ""; + if (p_pos < 0) { + p_pos = length() + p_pos; } if (p_pos <= 0) { + return ""; + } + + if (p_pos >= length()) { return *this; } - return substr(p_pos, (length() - p_pos)); + return substr(length() - p_pos); } -char32_t String::ord_at(int p_idx) const { +char32_t String::unicode_at(int p_idx) const { ERR_FAIL_INDEX_V(p_idx, length(), 0); return operator[](p_idx); } @@ -3541,7 +3567,7 @@ String String::strip_edges(bool left, bool right) const { } if (right) { - for (int i = (int)(len - 1); i >= 0; i--) { + for (int i = len - 1; i >= 0; i--) { if (operator[](i) <= 32) { end--; } else { @@ -3704,7 +3730,7 @@ String String::humanize_size(uint64_t p_size) { return String::num(p_size / divisor).pad_decimals(digits) + " " + prefixes[prefix_idx]; } -bool String::is_abs_path() const { +bool String::is_absolute_path() const { if (length() > 1) { return (operator[](0) == '/' || operator[](0) == '\\' || find(":/") != -1 || find(":\\") != -1); } else if ((length()) == 1) { @@ -3725,12 +3751,12 @@ bool String::is_valid_identifier() const { for (int i = 0; i < len; i++) { if (i == 0) { - if (str[0] >= '0' && str[0] <= '9') { + if (is_digit(str[0])) { return false; // no start with number plz } } - bool valid_char = (str[i] >= '0' && str[i] <= '9') || (str[i] >= 'a' && str[i] <= 'z') || (str[i] >= 'A' && str[i] <= 'Z') || str[i] == '_'; + bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_'; if (!valid_char) { return false; @@ -3750,22 +3776,19 @@ bool String::is_valid_string() const { return valid; } -String String::http_escape() const { +String String::uri_encode() const { const CharString temp = utf8(); String res; for (int i = 0; i < temp.length(); ++i) { char ord = temp[i]; - if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || - (ord >= 'a' && ord <= 'z') || - (ord >= 'A' && ord <= 'Z') || - (ord >= '0' && ord <= '9')) { + if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) { res += ord; } else { char h_Val[3]; #if defined(__GNUC__) || defined(_MSC_VER) - snprintf(h_Val, 3, "%hhX", ord); + snprintf(h_Val, 3, "%02hhX", ord); #else - sprintf(h_Val, "%hhX", ord); + sprintf(h_Val, "%02hhX", ord); #endif res += "%"; res += h_Val; @@ -3774,26 +3797,29 @@ String String::http_escape() const { return res; } -String String::http_unescape() const { - String res; - for (int i = 0; i < length(); ++i) { - if (ord_at(i) == '%' && i + 2 < length()) { - char32_t ord1 = ord_at(i + 1); - if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) { - char32_t ord2 = ord_at(i + 2); - if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) { +String String::uri_decode() const { + CharString src = utf8(); + CharString res; + for (int i = 0; i < src.length(); ++i) { + if (src[i] == '%' && i + 2 < src.length()) { + char ord1 = src[i + 1]; + if (is_digit(ord1) || is_upper_case(ord1)) { + char ord2 = src[i + 2]; + if (is_digit(ord2) || is_upper_case(ord2)) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; } } else { - res += ord_at(i); + res += src[i]; } + } else if (src[i] == '+') { + res += ' '; } else { - res += ord_at(i); + res += src[i]; } } - return String::utf8(res.ascii()); + return String::utf8(res); } String String::c_unescape() const { @@ -3877,25 +3903,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch if (p_src_len >= 4 && p_src[1] == '#') { char32_t c = 0; - - for (int i = 2; i < p_src_len; i++) { - eat = i + 1; - char32_t ct = p_src[i]; - if (ct == ';') { - break; - } else if (ct >= '0' && ct <= '9') { - ct = ct - '0'; - } else if (ct >= 'a' && ct <= 'f') { - ct = (ct - 'a') + 10; - } else if (ct >= 'A' && ct <= 'F') { - ct = (ct - 'A') + 10; - } else { - continue; + bool overflow = false; + if (p_src[2] == 'x') { + // Hex entity &#x<num>; + for (int i = 3; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';') { + break; + } else if (is_digit(ct)) { + ct = ct - '0'; + } else if (ct >= 'a' && ct <= 'f') { + ct = (ct - 'a') + 10; + } else if (ct >= 'A' && ct <= 'F') { + ct = (ct - 'A') + 10; + } else { + break; + } + if (c > (UINT32_MAX >> 4)) { + overflow = true; + break; + } + c <<= 4; + c |= ct; + } + } else { + // Decimal entity &#<num>; + for (int i = 2; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';' || ct < '0' || ct > '9') { + break; + } + } + if (p_src[eat - 1] == ';') { + int64_t val = String::to_int(p_src + 2, eat - 3); + if (val > 0 && val <= UINT32_MAX) { + c = (char32_t)val; + } else { + overflow = true; + } } - c <<= 4; - c |= ct; } + // Value must be non-zero, in the range of char32_t, + // actually end with ';'. If invalid, leave the entity as-is + if (c == '\0' || overflow || p_src[eat - 1] != ';') { + eat = 1; + c = *p_src; + } if (p_dst) { *p_dst = c; } @@ -4038,7 +4094,7 @@ String String::trim_suffix(const String &p_suffix) const { return s; } -bool String::is_valid_integer() const { +bool String::is_valid_int() const { int len = length(); if (len == 0) { @@ -4083,7 +4139,7 @@ bool String::is_valid_hex_number(bool p_with_prefix) const { for (int i = from; i < len; i++) { char32_t c = operator[](i); - if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + if (is_hex_digit(c)) { continue; } return false; @@ -4111,7 +4167,7 @@ bool String::is_valid_float() const { bool numbers_found = false; for (int i = from; i < len; i++) { - if (operator[](i) >= '0' && operator[](i) <= '9') { + if (is_digit(operator[](i))) { if (exponent_found) { exponent_values_found = true; } else { @@ -4241,11 +4297,11 @@ bool String::is_valid_ip_address() const { Vector<String> ip = split(":"); for (int i = 0; i < ip.size(); i++) { String n = ip[i]; - if (n.empty()) { + if (n.is_empty()) { continue; } if (n.is_valid_hex_number(false)) { - int64_t nint = n.hex_to_int(false); + int64_t nint = n.hex_to_int(); if (nint < 0 || nint > 0xffff) { return false; } @@ -4263,7 +4319,7 @@ bool String::is_valid_ip_address() const { } for (int i = 0; i < ip.size(); i++) { String n = ip[i]; - if (!n.is_valid_integer()) { + if (!n.is_valid_int()) { return false; } int val = n.to_int(); @@ -4280,28 +4336,44 @@ bool String::is_resource_file() const { return begins_with("res://") && find("::") == -1; } -bool String::is_rel_path() const { - return !is_abs_path(); +bool String::is_relative_path() const { + return !is_absolute_path(); } String String::get_base_dir() const { - int basepos = find(":/"); - if (basepos == -1) { - basepos = find(":\\"); + int end = 0; + + // url scheme style base + int basepos = find("://"); + if (basepos != -1) { + end = basepos + 3; } + + // windows top level directory base + if (end == 0) { + basepos = find(":/"); + if (basepos == -1) { + basepos = find(":\\"); + } + if (basepos != -1) { + end = basepos + 2; + } + } + + // unix root directory base + if (end == 0) { + if (begins_with("/")) { + end = 1; + } + } + String rs; String base; - if (basepos != -1) { - int end = basepos + 3; + if (end != 0) { rs = substr(end, length()); base = substr(0, end); } else { - if (begins_with("/")) { - rs = substr(1, length()); - base = "/"; - } else { - rs = *this; - } + rs = *this; } int sep = MAX(rs.rfind("/"), rs.rfind("\\")); @@ -4331,7 +4403,7 @@ String String::get_extension() const { } String String::plus_file(const String &p_file) const { - if (empty()) { + if (is_empty()) { return p_file; } if (operator[](length() - 1) == '/' || (p_file.size() > 0 && p_file.operator[](0) == '/')) { @@ -4340,69 +4412,12 @@ String String::plus_file(const String &p_file) const { return *this + "/" + p_file; } -String String::percent_encode() const { - CharString cs = utf8(); - String encoded; - for (int i = 0; i < cs.length(); i++) { - uint8_t c = cs[i]; - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '~' || c == '.') { - char p[2] = { (char)c, 0 }; - encoded += p; - } else { - char p[4] = { '%', 0, 0, 0 }; - static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - - p[1] = hex[c >> 4]; - p[2] = hex[c & 0xF]; - encoded += p; - } - } - - return encoded; -} - -String String::percent_decode() const { - CharString pe; - - CharString cs = utf8(); - for (int i = 0; i < cs.length(); i++) { - uint8_t c = cs[i]; - if (c == '%' && i < length() - 2) { - uint8_t a = LOWERCASE(cs[i + 1]); - uint8_t b = LOWERCASE(cs[i + 2]); - - if (a >= '0' && a <= '9') { - c = (a - '0') << 4; - } else if (a >= 'a' && a <= 'f') { - c = (a - 'a' + 10) << 4; - } else { - continue; - } - - uint8_t d = 0; - - if (b >= '0' && b <= '9') { - d = (b - '0'); - } else if (b >= 'a' && b <= 'f') { - d = (b - 'a' + 10); - } else { - continue; - } - c += d; - i += 2; - } - pe += c; - } - - return String::utf8(pe.ptr()); -} - String String::property_name_encode() const { // Escape and quote strings with extended ASCII or further Unicode characters // as well as '"', '=' or ' ' (32) const char32_t *cstr = get_data(); for (int i = 0; cstr[i]; i++) { - if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) { + if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) { return "\"" + c_escape_multiline() + "\""; } } @@ -4410,6 +4425,18 @@ String String::property_name_encode() const { return *this; } +// Changes made to the set of invalid characters must also be reflected in the String documentation. +const String String::invalid_node_name_characters = ". : @ / \""; + +String String::validate_node_name() const { + Vector<String> chars = String::invalid_node_name_characters.split(" "); + String name = this->replace(chars[0], ""); + for (int i = 1; i < chars.size(); i++) { + name = name.replace(chars[i], ""); + } + return name; +} + String String::get_basename() const { int pos = rfind("."); if (pos < 0 || pos < MAX(rfind("/"), rfind("\\"))) { @@ -4484,7 +4511,7 @@ String String::sprintf(const Array &values, bool *error) const { for (; *self; self++) { const char32_t c = *self; - if (in_format) { // We have % - lets see what else we get. + if (in_format) { // We have % - let's see what else we get. switch (c) { case '%': { // Replace %% with % formatted += chr(c); @@ -4760,7 +4787,7 @@ String String::unquote() const { Vector<uint8_t> String::to_ascii_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } CharString charstr = s->ascii(); @@ -4769,14 +4796,14 @@ Vector<uint8_t> String::to_ascii_buffer() const { size_t len = charstr.length(); retval.resize(len); uint8_t *w = retval.ptrw(); - copymem(w, charstr.ptr(), len); + memcpy(w, charstr.ptr(), len); return retval; } Vector<uint8_t> String::to_utf8_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } CharString charstr = s->utf8(); @@ -4785,14 +4812,14 @@ Vector<uint8_t> String::to_utf8_buffer() const { size_t len = charstr.length(); retval.resize(len); uint8_t *w = retval.ptrw(); - copymem(w, charstr.ptr(), len); + memcpy(w, charstr.ptr(), len); return retval; } Vector<uint8_t> String::to_utf16_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } Char16String charstr = s->utf16(); @@ -4801,14 +4828,14 @@ Vector<uint8_t> String::to_utf16_buffer() const { size_t len = charstr.length() * sizeof(char16_t); retval.resize(len); uint8_t *w = retval.ptrw(); - copymem(w, (const void *)charstr.ptr(), len); + memcpy(w, (const void *)charstr.ptr(), len); return retval; } Vector<uint8_t> String::to_utf32_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } @@ -4816,7 +4843,7 @@ Vector<uint8_t> String::to_utf32_buffer() const { size_t len = s->length() * sizeof(char32_t); retval.resize(len); uint8_t *w = retval.ptrw(); - copymem(w, (const void *)s->ptr(), len); + memcpy(w, (const void *)s->ptr(), len); return retval; } @@ -4842,15 +4869,20 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St return p_text_plural; } +/* DTR and DTRN are used for the documentation, handling descriptions extracted + * from the XML. + * They also replace `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTR(const String &p_text, const String &p_context) { // Comes straight from the XML, so remove indentation and any trailing whitespace. const String text = p_text.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate(text, p_context); + return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { @@ -4858,14 +4890,14 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St const String text_plural = p_text_plural.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context); + return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } // Return message based on English plural rule if translation is not possible. if (p_n == 1) { - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } - return text_plural; + return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL); } #endif |