diff options
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r-- | core/string/ustring.cpp | 1059 |
1 files changed, 589 insertions, 470 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 397743fb6e..c02be9e5b7 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -35,9 +35,11 @@ #include "core/math/math_funcs.h" #include "core/os/memory.h" #include "core/string/print_string.h" +#include "core/string/string_name.h" #include "core/string/translation.h" #include "core/string/ucaps.h" #include "core/variant/variant.h" +#include "core/version_generated.gen.h" #include <stdio.h> #include <stdlib.h> @@ -53,34 +55,14 @@ static const int MAX_DECIMALS = 32; -static _FORCE_INLINE_ bool is_digit(char32_t c) { - return (c >= '0' && c <= '9'); -} - -static _FORCE_INLINE_ bool is_hex_digit(char32_t c) { - return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); -} - -static _FORCE_INLINE_ bool is_upper_case(char32_t c) { - return (c >= 'A' && c <= 'Z'); -} - -static _FORCE_INLINE_ bool is_lower_case(char32_t c) { - return (c >= 'a' && c <= 'z'); -} - static _FORCE_INLINE_ char32_t lower_case(char32_t c) { - return (is_upper_case(c) ? (c + ('a' - 'A')) : c); + return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c); } const char CharString::_null = 0; const char16_t Char16String::_null = 0; const char32_t String::_null = 0; -bool is_symbol(char32_t c) { - return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); -} - bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) { const String &s = p_s; int beg = CLAMP(p_col, 0, s.length()); @@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const { } Char16String &Char16String::operator+=(char16_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char16_t *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -Char16String &Char16String::operator=(const char16_t *p_cstr) { +void Char16String::operator=(const char16_t *p_cstr) { copy_from(p_cstr); - return *this; } const char16_t *Char16String::get_data() const { @@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const { } CharString &CharString::operator+=(char p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); + const int lhs_len = length(); + resize(lhs_len + 2); + + char *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; return *this; } -CharString &CharString::operator=(const char *p_cstr) { +void CharString::operator=(const char *p_cstr) { copy_from(p_cstr); - return *this; } const char *CharString::get_data() const { @@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) { return; } - int len = 0; - const char *ptr = p_cstr; - while (*(ptr++) != 0) { - len++; - } + const size_t len = strlen(p_cstr); if (len == 0) { resize(0); @@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) { char32_t *dst = this->ptrw(); - for (int i = 0; i < len + 1; i++) { - dst[i] = p_cstr[i]; + for (size_t i = 0; i <= len; i++) { + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0 && i < len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } } @@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) { char32_t *dst = this->ptrw(); for (int i = 0; i < len; i++) { - dst[i] = p_cstr[i]; + uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]); + if (c == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } dst[len] = 0; } @@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) { } void String::copy_from(const char32_t &p_char) { - resize(2); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(0, 0xfffd); - } else { - set(0, p_char); + if (p_char == 0) { + print_unicode_error("NUL character", true); + return; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); } - set(1, 0); + + resize(2); + + char32_t *dst = ptrw(); + dst[0] = p_char; + dst[1] = 0; } void String::copy_from(const char32_t *p_cstr) { @@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) { // p_length <= p_char strlen void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { resize(p_length + 1); - set(p_length, 0); - char32_t *dst = ptrw(); + dst[p_length] = 0; for (int i = 0; i < p_length; i++) { - if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + "."); - dst[i] = 0xfffd; - } else { - dst[i] = p_char[i]; + if (p_char[i] == 0) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + continue; + } + if ((p_char[i] & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i])); } + if (p_char[i] > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i])); + } + dst[i] = p_char[i]; } } @@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const { return res; } +String String::operator+(char32_t p_char) const { + String res = *this; + res += p_char; + return res; +} + String operator+(const char *p_chr, const String &p_str) { String tmp = p_chr; tmp += p_str; @@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) { // wchar_t is 16-bit String tmp = String::utf16((const char16_t *)p_chr); #else - // wchar_t is 32-bi + // wchar_t is 32-bit String tmp = (const char32_t *)p_chr; #endif tmp += p_str; @@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) { } String &String::operator+=(const String &p_str) { - if (is_empty()) { + const int lhs_len = length(); + if (lhs_len == 0) { *this = p_str; return *this; } - if (p_str.is_empty()) { + const int rhs_len = p_str.length(); + if (rhs_len == 0) { return *this; } - int from = length(); - - resize(length() + p_str.size()); + resize(lhs_len + rhs_len + 1); - const char32_t *src = p_str.get_data(); - char32_t *dst = ptrw(); + const char32_t *src = p_str.ptr(); + char32_t *dst = ptrw() + lhs_len; - set(length(), 0); - - for (int i = 0; i < p_str.length(); i++) { - dst[from + i] = src[i]; - } + // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination). + memcpy(dst, src, rhs_len * sizeof(char32_t)); + *(dst + rhs_len) = _null; return *this; } @@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) { return *this; } - int src_len = 0; - const char *ptr = p_str; - while (*(ptr++) != 0) { - src_len++; - } - - int from = length(); + const int lhs_len = length(); + const size_t rhs_len = strlen(p_str); - resize(from + src_len + 1); - - char32_t *dst = ptrw(); + resize(lhs_len + rhs_len + 1); - set(length(), 0); + char32_t *dst = ptrw() + lhs_len; - for (int i = 0; i < src_len; i++) { - dst[from + i] = p_str[i]; + for (size_t i = 0; i <= rhs_len; i++) { + uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]); + if (c == 0 && i < rhs_len) { + print_unicode_error("NUL character", true); + dst[i] = 0x20; + } else { + dst[i] = c; + } } return *this; @@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) { } String &String::operator+=(char32_t p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); - set(length() - 1, 0xfffd); - } else { - set(length() - 1, p_char); + if (p_char == 0) { + print_unicode_error("NUL character", true); + return *this; + } + if ((p_char & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char)); + } + if (p_char > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char)); } + const int lhs_len = length(); + resize(lhs_len + 2); + char32_t *dst = ptrw(); + dst[lhs_len] = p_char; + dst[lhs_len + 1] = 0; + return *this; } @@ -952,10 +970,6 @@ const char32_t *String::get_data() const { return size() ? &operator[](0) : &zero; } -void String::erase(int p_pos, int p_chars) { - *this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars))); -} - String String::capitalize() const { String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges(); String cap; @@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const { int start_index = 0; for (int i = 1; i < this->size(); i++) { - bool is_upper = is_upper_case(cstr[i]); + bool is_upper = is_ascii_upper_case(cstr[i]); bool is_number = is_digit(cstr[i]); bool are_next_2_lower = false; bool is_next_lower = false; bool is_next_number = false; - bool was_precedent_upper = is_upper_case(cstr[i - 1]); + bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]); bool was_precedent_number = is_digit(cstr[i - 1]); if (i + 2 < this->size()) { - are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]); + are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]); } if (i + 1 < this->size()) { - is_next_lower = is_lower_case(cstr[i + 1]); + is_next_lower = is_ascii_lower_case(cstr[i + 1]); is_next_number = is_digit(cstr[i + 1]); } @@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { } String String::num_real(double p_num, bool p_trailing) { - if (Math::is_nan(p_num)) { - return "nan"; - } - - if (Math::is_inf(p_num)) { - if (signbit(p_num)) { - return "-inf"; + if (p_num == (double)(int64_t)p_num) { + if (p_trailing) { + return num_int64((int64_t)p_num) + ".0"; } else { - return "inf"; + return num_int64((int64_t)p_num); } } - - String s; - String sd; - - // Integer part. - - bool neg = p_num < 0; - p_num = ABS(p_num); - int64_t intn = (int64_t)p_num; - - // Decimal part. - - if (intn != p_num) { - double dec = p_num - (double)intn; - - int digit = 0; - #ifdef REAL_T_IS_DOUBLE - int decimals = 14; - double tolerance = 1e-14; + int decimals = 14; #else - int decimals = 6; - double tolerance = 1e-6; + int decimals = 6; #endif - // We want to align the digits to the above sane default, so we only - // need to subtract log10 for numbers with a positive power of ten. - if (p_num > 10) { - decimals -= (int)floor(log10(p_num)); - } - - if (decimals > MAX_DECIMALS) { - decimals = MAX_DECIMALS; - } - - // In case the value ends up ending in "99999", we want to add a - // tiny bit to the value we're checking when deciding when to stop, - // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15). - double check_multiplier = 1 + tolerance / 10; - - int64_t dec_int = 0; - int64_t dec_max = 0; - - while (true) { - dec *= 10.0; - dec_int = dec_int * 10 + (int64_t)dec % 10; - dec_max = dec_max * 10 + 9; - digit++; - - if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) { - break; - } - - if (digit == decimals) { - break; - } - } - - dec *= 10; - int last = (int64_t)dec % 10; - - if (last > 5) { - if (dec_int == dec_max) { - dec_int = 0; - intn++; - } else { - dec_int++; - } - } - - String decimal; - for (int i = 0; i < digit; i++) { - char num[2] = { 0, 0 }; - num[0] = '0' + dec_int % 10; - decimal = num + decimal; - dec_int /= 10; - } - sd = '.' + decimal; - } else if (p_trailing) { - sd = ".0"; - } else { - sd = ""; - } - - if (intn == 0) { - s = "0"; - } else { - while (intn) { - char32_t num = '0' + (intn % 10); - intn /= 10; - s = num + s; - } - } - - s = s + sd; - if (neg) { - s = "-" + s; + // We want to align the digits to the above sane default, so we only + // need to subtract log10 for numbers with a positive power of ten. + if (p_num > 10) { + decimals -= (int)floor(log10(p_num)); } - return s; + return num(p_num, decimals); } String String::num_scientific(double p_num) { @@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { return ret; } +void String::print_unicode_error(const String &p_message, bool p_critical) const { + if (p_critical) { + print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message)); + } else { + print_error(vformat("Unicode parsing error: %s", p_message)); + } +} + CharString String::ascii(bool p_allow_extended) const { if (!length()) { return CharString(); @@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const { if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { cs[i] = c; } else { - print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character."); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c)); cs[i] = 0x20; } } @@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) { return ret; } -bool String::parse_utf8(const char *p_utf8, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); - +Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) { if (!p_utf8) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } } + bool decode_error = false; + bool decode_failed = false; { const char *ptrtmp = p_utf8; const char *ptrtmp_limit = &p_utf8[p_len]; int skip = 0; + uint8_t c_start = 0; while (ptrtmp != ptrtmp_limit && *ptrtmp) { - if (skip == 0) { - uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp); + if (skip == 0) { + if (p_skip_cr && c == '\r') { + ptrtmp++; + continue; + } /* Determine the number of characters in sequence */ if ((c & 0x80) == 0) { skip = 0; @@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { skip = 2; } else if ((c & 0xf8) == 0xf0) { skip = 3; + } else if ((c & 0xfc) == 0xf8) { + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + skip = 5; } else { - _UNICERROR("invalid skip at " + num_int64(cstr_size)); - return true; //invalid utf8 + skip = 0; + print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true); + decode_failed = true; } + c_start = c; if (skip == 1 && (c & 0x1e) == 0) { - _UNICERROR("overlong rejected at " + num_int64(cstr_size)); - return true; //reject overlong + print_unicode_error(vformat("Overlong encoding (%x ...)", c)); + decode_error = true; } - str_size++; - } else { - --skip; + if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) { + print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c)); + decode_error = true; + } + if (c < 0x80 || c > 0xbf) { + print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true); + decode_failed = true; + skip = 0; + } else { + --skip; + } } cstr_size++; @@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } if (skip) { - _UNICERROR("no space left"); - return true; //not enough space + print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true); + decode_failed = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + int skip = 0; + uint32_t unichar = 0; while (cstr_size) { - int len = 0; - - /* Determine the number of characters in sequence */ - if ((*p_utf8 & 0x80) == 0) { - len = 1; - } else if ((*p_utf8 & 0xe0) == 0xc0) { - len = 2; - } else if ((*p_utf8 & 0xf0) == 0xe0) { - len = 3; - } else if ((*p_utf8 & 0xf8) == 0xf0) { - len = 4; - } else { - _UNICERROR("invalid len"); - return true; //invalid UTF8 - } - - if (len > cstr_size) { - _UNICERROR("no space left"); - return true; //not enough space - } + uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8); - if (len == 2 && (*p_utf8 & 0x1E) == 0) { - _UNICERROR("no space left"); - return true; //reject overlong - } - - /* Convert the first character */ - - uint32_t unichar = 0; - - if (len == 1) { - unichar = *p_utf8; + if (skip == 0) { + if (p_skip_cr && c == '\r') { + p_utf8++; + continue; + } + /* Determine the number of characters in sequence */ + if ((c & 0x80) == 0) { + *(dst++) = c; + unichar = 0; + skip = 0; + } else if ((c & 0xe0) == 0xc0) { + unichar = (0xff >> 3) & c; + skip = 1; + } else if ((c & 0xf0) == 0xe0) { + unichar = (0xff >> 4) & c; + skip = 2; + } else if ((c & 0xf8) == 0xf0) { + unichar = (0xff >> 5) & c; + skip = 3; + } else if ((c & 0xfc) == 0xf8) { + unichar = (0xff >> 6) & c; + skip = 4; + } else if ((c & 0xfe) == 0xfc) { + unichar = (0xff >> 7) & c; + skip = 5; + } else { + *(dst++) = 0x20; + unichar = 0; + skip = 0; + } } else { - unichar = (0xff >> (len + 1)) & *p_utf8; - - for (int i = 1; i < len; i++) { - if ((p_utf8[i] & 0xc0) != 0x80) { - _UNICERROR("invalid utf8"); - return true; //invalid utf8 - } - if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) { - _UNICERROR("invalid utf8 overlong"); - return true; //no overlong + if (c < 0x80 || c > 0xbf) { + *(dst++) = 0x20; + skip = 0; + } else { + unichar = (unichar << 6) | (c & 0x3f); + --skip; + if (skip == 0) { + if (unichar == 0) { + print_unicode_error("NUL character", true); + decode_failed = true; + unichar = 0x20; + } + if ((unichar & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", unichar)); + decode_error = true; + } + if (unichar > 0x10ffff) { + print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar)); + decode_error = true; + } + *(dst++) = unichar; } - unichar = (unichar << 6) | (p_utf8[i] & 0x3f); } } - if (unichar >= 0xd800 && unichar <= 0xdfff) { - _UNICERROR("invalid code point"); - return CharString(); - } - *(dst++) = unichar; - cstr_size -= len; - p_utf8 += len; + cstr_size--; + p_utf8++; + } + if (skip) { + *(dst++) = 0x20; } - return false; -#undef _UNICERROR + if (decode_failed) { + return ERR_INVALID_DATA; + } else if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } CharString String::utf8() const { @@ -1881,15 +1846,17 @@ CharString String::utf8() const { fl += 2; } else if (c <= 0xffff) { // 16 bits fl += 3; - } else if (c <= 0x0010ffff) { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits fl += 4; + } else if (c <= 0x03ffffff) { // 26 bits + fl += 5; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); + } else if (c <= 0x7fffffff) { // 31 bits + fl += 6; + print_unicode_error(vformat("Invalid unicode codepoint (%x)", c)); } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return CharString(); + fl += 1; + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true); } } @@ -1915,11 +1882,26 @@ CharString String::utf8() const { APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else { // 21 bits + } else if (c <= 0x001fffff) { // 21 bits APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x03ffffff) { // 26 bits + APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else if (c <= 0x7fffffff) { // 31 bits + APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit. + APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits. + APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) { return ret; } -bool String::parse_utf16(const char16_t *p_utf16, int p_len) { -#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); - +Error String::parse_utf16(const char16_t *p_utf16, int p_len) { if (!p_utf16) { - return true; + return ERR_INVALID_DATA; } String aux; @@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) { } } + bool decode_error = false; { const char16_t *ptrtmp = p_utf16; const char16_t *ptrtmp_limit = &p_utf16[p_len]; - int skip = 0; + uint32_t c_prev = 0; + bool skip = false; while (ptrtmp != ptrtmp_limit && *ptrtmp) { uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp; - if (skip == 0) { - if ((c & 0xfffffc00) == 0xd800) { - skip = 1; // lead surrogate - } else if ((c & 0xfffffc00) == 0xdc00) { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 - } else { - skip = 0; + + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c)); + decode_error = true; } - str_size++; - } else { - if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate - --skip; + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + str_size--; } else { - _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); - return true; // invalid UTF16 + print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c)); + decode_error = true; } + skip = false; + } else { + skip = false; } + c_prev = c; + str_size++; cstr_size++; ptrtmp++; } if (skip) { - _UNICERROR("no space left"); - return true; // not enough space + print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev)); + decode_error = true; } } if (str_size == 0) { clear(); - return false; + return OK; // empty string } resize(str_size + 1); char32_t *dst = ptrw(); dst[str_size] = 0; + bool skip = false; + uint32_t c_prev = 0; while (cstr_size) { - int len = 0; uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16; - if ((c & 0xfffffc00) == 0xd800) { - len = 2; + if ((c & 0xfffffc00) == 0xd800) { // lead surrogate + if (skip) { + *(dst++) = c_prev; // unpaired, store as is + } + skip = true; + } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + if (skip) { + *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair + } else { + *(dst++) = c; // unpaired, store as is + } + skip = false; } else { - len = 1; - } - - if (len > cstr_size) { - _UNICERROR("no space left"); - return true; //not enough space + *(dst++) = c; + skip = false; } - uint32_t unichar = 0; - if (len == 1) { - unichar = c; - } else { - uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1]; - unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000); - } + cstr_size--; + p_utf16++; + c_prev = c; + } - *(dst++) = unichar; - cstr_size -= len; - p_utf16 += len; + if (skip) { + *(dst++) = c_prev; } - return false; -#undef _UNICERROR + if (decode_error) { + return ERR_PARSE_ERROR; + } else { + return OK; + } } Char16String String::utf16() const { @@ -2054,15 +2044,14 @@ Char16String String::utf16() const { uint32_t c = d[i]; if (c <= 0xffff) { // 16 bits. fl += 1; + if ((c & 0xfffff800) == 0xd800) { + print_unicode_error(vformat("Unpaired surrogate (%x)", c)); + } } else if (c <= 0x10ffff) { // 32 bits. fl += 2; } else { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); - } - if (c >= 0xd800 && c <= 0xdfff) { - print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); - return Char16String(); + print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true); + fl += 1; } } @@ -2081,9 +2070,11 @@ Char16String String::utf16() const { if (c <= 0xffff) { // 16 bits. APPEND_CHAR(c); - } else { // 32 bits. + } else if (c <= 0x10ffff) { // 32 bits. APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate. APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate. + } else { + APPEND_CHAR(0x20); } } #undef APPEND_CHAR @@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); hex *= 16; hex += n; s++; @@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const { } // Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0'); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); binary *= 2; binary += n; s++; @@ -2217,7 +2208,7 @@ int64_t String::to_int() const { char32_t c = operator[](i); if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) { char c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) { wchar_t c = p_str[i]; if (is_digit(c)) { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); - ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); integer *= 10; integer += c - '0'; @@ -2308,7 +2299,7 @@ bool String::is_numeric() const { return false; } dot = true; - } else if (c < '0' || c > '9') { + } else if (!is_digit(c)) { return false; } } @@ -2317,28 +2308,33 @@ bool String::is_numeric() const { } template <class C> -static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number, - * optionally preceded by white space. Must - * have form "-I.FE-X", where I is the integer - * part of the mantissa, F is the fractional - * part of the mantissa, and X is the - * exponent. Either of the signs may be "+", - * "-", or omitted. Either I or F may be - * omitted, or both. The decimal point isn't - * necessary unless F is present. The "E" may - * actually be an "e". E and X may both be - * omitted (but not just one). */ - C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's - * address here. */ -{ - static const int maxExponent = 511; /* Largest possible base 10 exponent. Any - * exponent larger than this will already - * produce underflow or overflow, so there's - * no need to worry about additional digits. - */ - static const double powersOf10[] = { /* Table giving binary powers of 10. Entry */ - 10., /* is 10^2^i. Used to convert decimal */ - 100., /* exponents into floating-point numbers. */ +static double built_in_strtod( + /* A decimal ASCII floating-point number, + * optionally preceded by white space. Must + * have form "-I.FE-X", where I is the integer + * part of the mantissa, F is the fractional + * part of the mantissa, and X is the + * exponent. Either of the signs may be "+", + * "-", or omitted. Either I or F may be + * omitted, or both. The decimal point isn't + * necessary unless F is present. The "E" may + * actually be an "e". E and X may both be + * omitted (but not just one). */ + const C *string, + /* If non-nullptr, store terminating Cacter's + * address here. */ + C **endPtr = nullptr) { + /* Largest possible base 10 exponent. Any + * exponent larger than this will already + * produce underflow or overflow, so there's + * no need to worry about additional digits. */ + static const int maxExponent = 511; + /* Table giving binary powers of 10. Entry + * is 10^2^i. Used to convert decimal + * exponents into floating-point numbers. */ + static const double powersOf10[] = { + 10., + 100., 1.0e4, 1.0e8, 1.0e16, @@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point const double *d; const C *p; int c; - int exp = 0; /* Exponent read from "EX" field. */ - int fracExp = 0; /* Exponent that derives from the fractional - * part. Under normal circumstances, it is - * the negative of the number of digits in F. - * However, if I is very long, the last digits - * of I get dropped (otherwise a long I with a - * large negative exponent could cause an - * unnecessary overflow on I alone). In this - * case, fracExp is incremented one for each - * dropped digit. */ - int mantSize; /* Number of digits in mantissa. */ - int decPt; /* Number of mantissa digits BEFORE decimal - * point. */ - const C *pExp; /* Temporarily holds location of exponent in - * string. */ + /* Exponent read from "EX" field. */ + int exp = 0; + /* Exponent that derives from the fractional + * part. Under normal circumstances, it is + * the negative of the number of digits in F. + * However, if I is very long, the last digits + * of I get dropped (otherwise a long I with a + * large negative exponent could cause an + * unnecessary overflow on I alone). In this + * case, fracExp is incremented one for each + * dropped digit. */ + int fracExp = 0; + /* Number of digits in mantissa. */ + int mantSize; + /* Number of mantissa digits BEFORE decimal point. */ + int decPt; + /* Temporarily holds location of exponent in string. */ + const C *pExp; /* - * Strip off leading blanks and check for a sign. - */ + * Strip off leading blanks and check for a sign. + */ p = string; while (*p == ' ' || *p == '\t' || *p == '\n') { @@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Count the number of digits in the mantissa (including the decimal - * point), and also locate the decimal point. - */ + * Count the number of digits in the mantissa (including the decimal + * point), and also locate the decimal point. + */ decPt = -1; for (mantSize = 0;; mantSize += 1) { @@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Now suck up the digits in the mantissa. Use two integers to collect 9 - * digits each (this is faster than using floating-point). If the mantissa - * has more than 18 digits, ignore the extras, since they can't affect the - * value anyway. - */ + * Now suck up the digits in the mantissa. Use two integers to collect 9 + * digits each (this is faster than using floating-point). If the mantissa + * has more than 18 digits, ignore the extras, since they can't affect the + * value anyway. + */ pExp = p; p -= mantSize; @@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Skim off the exponent. - */ + * Skim off the exponent. + */ p = pExp; if ((*p == 'E') || (*p == 'e')) { @@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } /* - * Generate a floating-point number that represents the exponent. Do this - * by processing the exponent one bit at a time to combine many powers of - * 2 of 10. Then combine the exponent with the fraction. - */ + * Generate a floating-point number that represents the exponent. Do this + * by processing the exponent one bit at a time to combine many powers of + * 2 of 10. Then combine the exponent with the fraction. + */ if (exp < 0) { expSign = true; @@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { return INT64_MIN; } } else { - ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small.")); } } integer *= 10; @@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const { return _base_is_subsequence_of(p_string, false); } -bool String::is_subsequence_ofi(const String &p_string) const { +bool String::is_subsequence_ofn(const String &p_string) const { return _base_is_subsequence_of(p_string, true); } @@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const { String String::repeat(int p_count) const { ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number."); - String new_string; - const char32_t *src = this->get_data(); - - new_string.resize(length() * p_count + 1); - new_string[length() * p_count] = 0; - - for (int i = 0; i < p_count; i++) { - for (int j = 0; j < length(); j++) { - new_string[i * length() + j] = src[j]; - } - } - + int len = length(); + String new_string = *this; + new_string.resize(p_count * len + 1); + + char32_t *dst = new_string.ptrw(); + int offset = 1; + int stride = 1; + while (offset < p_count) { + memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t)); + offset += stride; + stride = MIN(stride * 2, p_count - offset); + } + dst[p_count * len] = _null; return new_string; } -String String::left(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::left(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(0, p_pos); + return substr(0, p_len); } -String String::right(int p_pos) const { - if (p_pos < 0) { - p_pos = length() + p_pos; +String String::right(int p_len) const { + if (p_len < 0) { + p_len = length() + p_len; } - if (p_pos <= 0) { + if (p_len <= 0) { return ""; } - if (p_pos >= length()) { + if (p_len >= length()) { return *this; } - return substr(length() - p_pos); + return substr(length() - p_len); } char32_t String::unicode_at(int p_idx) const { @@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const { return operator[](p_idx); } +String String::indent(const String &p_prefix) const { + String new_string; + int line_start = 0; + + for (int i = 0; i < length(); i++) { + const char32_t c = operator[](i); + if (c == '\n') { + if (i == line_start) { + new_string += c; // Leave empty lines empty. + } else { + new_string += p_prefix + substr(line_start, i - line_start + 1); + } + line_start = i + 1; + } + } + if (line_start != length()) { + new_string += p_prefix + substr(line_start); + } + return new_string; +} + String String::dedent() const { String new_string; String indent; @@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const { return substr(0, end + 1); } +bool String::is_network_share_path() const { + return begins_with("//") || begins_with("\\\\"); +} + String String::simplify_path() const { String s = *this; String drive; @@ -3637,6 +3662,9 @@ String String::simplify_path() const { } else if (s.begins_with("user://")) { drive = "user://"; s = s.substr(7, s.length()); + } else if (is_network_share_path()) { + drive = s.substr(0, 2); + s = s.substr(2, s.length() - 2); } else if (s.begins_with("/") || s.begins_with("\\")) { drive = s.substr(0, 1); s = s.substr(1, s.length() - 1); @@ -3665,15 +3693,15 @@ String String::simplify_path() const { for (int i = 0; i < dirs.size(); i++) { String d = dirs[i]; if (d == ".") { - dirs.remove(i); + dirs.remove_at(i); i--; } else if (d == "..") { if (i == 0) { - dirs.remove(i); + dirs.remove_at(i); i--; } else { - dirs.remove(i); - dirs.remove(i - 1); + dirs.remove_at(i); + dirs.remove_at(i - 1); i -= 2; } } @@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const { } } +static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) { + if (p_index == 0 && is_digit(p_char)) { + return false; // No start with number plz. + } + return is_ascii_identifier_char(p_char); +} + +String String::validate_identifier() const { + if (is_empty()) { + return "_"; // Empty string is not a valid identifier; + } + + String result = *this; + int len = result.length(); + char32_t *buffer = result.ptrw(); + + for (int i = 0; i < len; i++) { + if (!_is_valid_identifier_bit(i, buffer[i])) { + buffer[i] = '_'; + } + } + + return result; +} + bool String::is_valid_identifier() const { int len = length(); @@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const { const char32_t *str = &operator[](0); for (int i = 0; i < len; i++) { - if (i == 0) { - if (is_digit(str[0])) { - return false; // no start with number plz - } - } - - bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_'; - - if (!valid_char) { + if (!_is_valid_identifier_bit(i, str[i])) { return false; } } @@ -3775,18 +3820,15 @@ String String::uri_encode() const { const CharString temp = utf8(); String res; for (int i = 0; i < temp.length(); ++i) { - char ord = temp[i]; - if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) { + uint8_t ord = temp[i]; + if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) { res += ord; } else { - char h_Val[3]; -#if defined(__GNUC__) || defined(_MSC_VER) - snprintf(h_Val, 3, "%02hhX", ord); -#else - sprintf(h_Val, "%02hhX", ord); -#endif - res += "%"; - res += h_Val; + char p[4] = { '%', 0, 0, 0 }; + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + p[1] = hex[ord >> 4]; + p[2] = hex[ord & 0xF]; + res += p; } } return res; @@ -3798,9 +3840,9 @@ String String::uri_decode() const { for (int i = 0; i < src.length(); ++i) { if (src[i] == '%' && i + 2 < src.length()) { char ord1 = src[i + 1]; - if (is_digit(ord1) || is_upper_case(ord1)) { + if (is_digit(ord1) || is_ascii_upper_case(ord1)) { char ord2 = src[i + 2]; - if (is_digit(ord2) || is_upper_case(ord2)) { + if (is_digit(ord2) || is_ascii_upper_case(ord2)) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; @@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch for (int i = 2; i < p_src_len; i++) { eat = i + 1; char32_t ct = p_src[i]; - if (ct == ';' || ct < '0' || ct > '9') { + if (ct == ';' || !is_digit(ct)) { break; } } @@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const { int begin = 0; - while (begin < end && (s[begin] < '0' || s[begin] > '9')) { + while (begin < end && !is_digit(s[begin])) { begin++; } @@ -4102,7 +4144,7 @@ bool String::is_valid_int() const { } for (int i = from; i < len; i++) { - if (operator[](i) < '0' || operator[](i) > '9') { + if (!is_digit(operator[](i))) { return false; // no start with number plz } } @@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const { dst += "/"; } - String base; - if (src.begins_with("res://") && dst.begins_with("res://")) { - base = "res:/"; src = src.replace("res://", "/"); dst = dst.replace("res://", "/"); } else if (src.begins_with("user://") && dst.begins_with("user://")) { - base = "user:/"; src = src.replace("user://", "/"); dst = dst.replace("user://", "/"); @@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const { return p_path; //impossible to do this } - base = src_begin; src = src.substr(src_begin.length(), src.length()); dst = dst.substr(dst_begin.length(), dst.length()); } @@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const { return false; } - if (stripped == String()) { + if (stripped.is_empty()) { return false; } @@ -4338,13 +4375,13 @@ bool String::is_relative_path() const { String String::get_base_dir() const { int end = 0; - // url scheme style base + // URL scheme style base. int basepos = find("://"); if (basepos != -1) { end = basepos + 3; } - // windows top level directory base + // Windows top level directory base. if (end == 0) { basepos = find(":/"); if (basepos == -1) { @@ -4355,7 +4392,24 @@ String String::get_base_dir() const { } } - // unix root directory base + // Windows UNC network share path. + if (end == 0) { + if (is_network_share_path()) { + basepos = find("/", 2); + if (basepos == -1) { + basepos = find("\\", 2); + } + int servpos = find("/", basepos + 1); + if (servpos == -1) { + servpos = find("\\", basepos + 1); + } + if (servpos != -1) { + end = servpos + 1; + } + } + } + + // Unix root directory base. if (end == 0) { if (begins_with("/")) { end = 1; @@ -4412,7 +4466,7 @@ String String::property_name_encode() const { // as well as '"', '=' or ' ' (32) const char32_t *cstr = get_data(); for (int i = 0; cstr[i]; i++) { - if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) { + if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) { return "\"" + c_escape_multiline() + "\""; } } @@ -4421,7 +4475,7 @@ String String::property_name_encode() const { } // Changes made to the set of invalid characters must also be reflected in the String documentation. -const String String::invalid_node_name_characters = ". : @ / \""; +const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX; String String::validate_node_name() const { Vector<String> chars = String::invalid_node_name_characters.split(" "); @@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const { int min_chars = 0; int min_decimals = 0; bool in_decimals = false; - bool pad_with_zeroes = false; + bool pad_with_zeros = false; bool left_justified = false; bool show_sign = false; @@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { str = str.rpad(pad_chars_count, pad_char); } else { @@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const { } // Sign. - if (show_sign && value >= 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+"); - } else if (value < 0) { - str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-"); + if (show_sign || value < 0) { + String sign_char = value < 0 ? "-" : "+"; + if (left_justified) { + str = str.insert(0, sign_char); + } else { + str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char); + } } formatted += str; @@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const { // Padding. Leave room for sign later if required. int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars; - String pad_char = pad_with_zeroes ? String("0") : String(" "); + String pad_char = pad_with_zeros ? String("0") : String(" "); if (left_justified) { - if (pad_with_zeroes) { - return "left justification cannot be used with zeros as the padding"; - } else { - str = str.rpad(pad_chars_count, pad_char); - } + str = str.rpad(pad_chars_count, pad_char); } else { str = str.lpad(pad_chars_count, pad_char); } @@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const { if (left_justified) { str = str.insert(0, sign_char); } else { - str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char); + str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char); } } @@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const { min_decimals += n; } else { if (c == '0' && min_chars == 0) { - pad_with_zeroes = true; + if (left_justified) { + WARN_PRINT("'0' flag ignored with '-' flag in string format"); + } else { + pad_with_zeros = true; + } } else { min_chars *= 10; min_chars += n; @@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const { // Back to defaults: min_chars = 0; min_decimals = 6; - pad_with_zeroes = false; + pad_with_zeros = false; left_justified = false; show_sign = false; in_decimals = false; @@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const { } #ifdef TOOLS_ENABLED +/** + * "Tools TRanslate". Performs string replacement for internationalization + * within the editor. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `TTRN()` instead of `TTR()`. + * + * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTR()` instead. + */ String TTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate(p_text, p_context); @@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Tools TRanslate for N items". Performs string replacement for + * internationalization within the editor. A translation context can optionally + * be specified to disambiguate between identical source strings in + * translations. Use `TTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder). + * For translations that can be supplied by exported projects, use `RTRN()` instead. + */ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); @@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St return p_text_plural; } +/** + * "Docs TRanslate". Used for the editor class reference documentation, + * handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTR(const String &p_text, const String &p_context) { // Comes straight from the XML, so remove indentation and any trailing whitespace. const String text = p_text.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate(text, p_context); + return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } +/** + * "Docs TRanslate for N items". Used for the editor class reference documentation + * (with support for plurals), handling descriptions extracted from the XML. + * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch, + * to allow dehardcoding it in the XML and doing proper substitutions everywhere. + */ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { const String text = p_text.dedent().strip_edges(); const String text_plural = p_text_plural.dedent().strip_edges(); if (TranslationServer::get_singleton()) { - return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context); + return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL); } // Return message based on English plural rule if translation is not possible. if (p_n == 1) { - return text; + return text.replace("$DOCS_URL", VERSION_DOCS_URL); } - return text_plural; + return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL); } #endif +/** + * "Run-time TRanslate". Performs string replacement for internationalization + * within a running project. The translation string must be supplied by the + * project, as Godot does not provide built-in translations for `RTR()` strings + * to keep binary size low. A translation context can optionally be specified to + * disambiguate between identical source strings in translations. When + * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`. + * If a string mentions a quantity (and may therefore need a dynamic plural form), + * use `RTRN()` instead of `RTR()`. + * + * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTR()` instead. + */ String RTR(const String &p_text, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context); - if (rtr == String() || rtr == p_text) { + if (rtr.is_empty() || rtr == p_text) { return TranslationServer::get_singleton()->translate(p_text, p_context); } else { return rtr; @@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) { return p_text; } +/** + * "Run-time TRanslate for N items". Performs string replacement for + * internationalization within a running project. The translation string must be + * supplied by the project, as Godot does not provide built-in translations for + * `RTRN()` strings to keep binary size low. A translation context can + * optionally be specified to disambiguate between identical source strings in + * translations. Use `RTR()` if the string doesn't need dynamic plural form. + * When placeholders are desired, use + * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`. + * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`. + * + * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/` + * folder). For editor translations, use `TTRN()` instead. + */ String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) { if (TranslationServer::get_singleton()) { String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context); - if (rtr == String() || rtr == p_text || rtr == p_text_plural) { + if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) { return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context); } else { return rtr; |