summaryrefslogtreecommitdiff
path: root/core/string/ustring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r--core/string/ustring.cpp1059
1 files changed, 589 insertions, 470 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 397743fb6e..c02be9e5b7 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -5,8 +5,8 @@
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
-/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
-/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
@@ -35,9 +35,11 @@
#include "core/math/math_funcs.h"
#include "core/os/memory.h"
#include "core/string/print_string.h"
+#include "core/string/string_name.h"
#include "core/string/translation.h"
#include "core/string/ucaps.h"
#include "core/variant/variant.h"
+#include "core/version_generated.gen.h"
#include <stdio.h>
#include <stdlib.h>
@@ -53,34 +55,14 @@
static const int MAX_DECIMALS = 32;
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
- return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
- return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
- return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
- return (c >= 'a' && c <= 'z');
-}
-
static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
- return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+ return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
}
const char CharString::_null = 0;
const char16_t Char16String::_null = 0;
const char32_t String::_null = 0;
-bool is_symbol(char32_t c) {
- return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
const String &s = p_s;
int beg = CLAMP(p_col, 0, s.length());
@@ -122,16 +104,18 @@ bool Char16String::operator<(const Char16String &p_right) const {
}
Char16String &Char16String::operator+=(char16_t p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- set(length() - 1, p_char);
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+
+ char16_t *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
return *this;
}
-Char16String &Char16String::operator=(const char16_t *p_cstr) {
+void Char16String::operator=(const char16_t *p_cstr) {
copy_from(p_cstr);
- return *this;
}
const char16_t *Char16String::get_data() const {
@@ -178,16 +162,18 @@ bool CharString::operator<(const CharString &p_right) const {
}
CharString &CharString::operator+=(char p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- set(length() - 1, p_char);
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+
+ char *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
return *this;
}
-CharString &CharString::operator=(const char *p_cstr) {
+void CharString::operator=(const char *p_cstr) {
copy_from(p_cstr);
- return *this;
}
const char *CharString::get_data() const {
@@ -325,11 +311,7 @@ void String::copy_from(const char *p_cstr) {
return;
}
- int len = 0;
- const char *ptr = p_cstr;
- while (*(ptr++) != 0) {
- len++;
- }
+ const size_t len = strlen(p_cstr);
if (len == 0) {
resize(0);
@@ -340,8 +322,14 @@ void String::copy_from(const char *p_cstr) {
char32_t *dst = this->ptrw();
- for (int i = 0; i < len + 1; i++) {
- dst[i] = p_cstr[i];
+ for (size_t i = 0; i <= len; i++) {
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0 && i < len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
}
@@ -368,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
char32_t *dst = this->ptrw();
for (int i = 0; i < len; i++) {
- dst[i] = p_cstr[i];
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
dst[len] = 0;
}
@@ -394,14 +388,22 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
}
void String::copy_from(const char32_t &p_char) {
- resize(2);
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- set(0, 0xfffd);
- } else {
- set(0, p_char);
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
}
- set(1, 0);
+
+ resize(2);
+
+ char32_t *dst = ptrw();
+ dst[0] = p_char;
+ dst[1] = 0;
}
void String::copy_from(const char32_t *p_cstr) {
@@ -450,17 +452,22 @@ void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
// p_length <= p_char strlen
void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
resize(p_length + 1);
- set(p_length, 0);
-
char32_t *dst = ptrw();
+ dst[p_length] = 0;
for (int i = 0; i < p_length; i++) {
- if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
- dst[i] = 0xfffd;
- } else {
- dst[i] = p_char[i];
+ if (p_char[i] == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ continue;
+ }
+ if ((p_char[i] & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
}
+ if (p_char[i] > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+ }
+ dst[i] = p_char[i];
}
}
@@ -482,6 +489,12 @@ String String::operator+(const String &p_str) const {
return res;
}
+String String::operator+(char32_t p_char) const {
+ String res = *this;
+ res += p_char;
+ return res;
+}
+
String operator+(const char *p_chr, const String &p_str) {
String tmp = p_chr;
tmp += p_str;
@@ -493,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
// wchar_t is 16-bit
String tmp = String::utf16((const char16_t *)p_chr);
#else
- // wchar_t is 32-bi
+ // wchar_t is 32-bit
String tmp = (const char32_t *)p_chr;
#endif
tmp += p_str;
@@ -505,27 +518,25 @@ String operator+(char32_t p_chr, const String &p_str) {
}
String &String::operator+=(const String &p_str) {
- if (is_empty()) {
+ const int lhs_len = length();
+ if (lhs_len == 0) {
*this = p_str;
return *this;
}
- if (p_str.is_empty()) {
+ const int rhs_len = p_str.length();
+ if (rhs_len == 0) {
return *this;
}
- int from = length();
-
- resize(length() + p_str.size());
+ resize(lhs_len + rhs_len + 1);
- const char32_t *src = p_str.get_data();
- char32_t *dst = ptrw();
+ const char32_t *src = p_str.ptr();
+ char32_t *dst = ptrw() + lhs_len;
- set(length(), 0);
-
- for (int i = 0; i < p_str.length(); i++) {
- dst[from + i] = src[i];
- }
+ // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
+ memcpy(dst, src, rhs_len * sizeof(char32_t));
+ *(dst + rhs_len) = _null;
return *this;
}
@@ -535,22 +546,21 @@ String &String::operator+=(const char *p_str) {
return *this;
}
- int src_len = 0;
- const char *ptr = p_str;
- while (*(ptr++) != 0) {
- src_len++;
- }
-
- int from = length();
+ const int lhs_len = length();
+ const size_t rhs_len = strlen(p_str);
- resize(from + src_len + 1);
-
- char32_t *dst = ptrw();
+ resize(lhs_len + rhs_len + 1);
- set(length(), 0);
+ char32_t *dst = ptrw() + lhs_len;
- for (int i = 0; i < src_len; i++) {
- dst[from + i] = p_str[i];
+ for (size_t i = 0; i <= rhs_len; i++) {
+ uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
+ if (c == 0 && i < rhs_len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
return *this;
@@ -573,15 +583,23 @@ String &String::operator+=(const char32_t *p_str) {
}
String &String::operator+=(char32_t p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- set(length() - 1, 0xfffd);
- } else {
- set(length() - 1, p_char);
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return *this;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
}
+ const int lhs_len = length();
+ resize(lhs_len + 2);
+ char32_t *dst = ptrw();
+ dst[lhs_len] = p_char;
+ dst[lhs_len + 1] = 0;
+
return *this;
}
@@ -952,10 +970,6 @@ const char32_t *String::get_data() const {
return size() ? &operator[](0) : &zero;
}
-void String::erase(int p_pos, int p_chars) {
- *this = left(MAX(p_pos, 0)) + substr(p_pos + p_chars, length() - ((p_pos + p_chars)));
-}
-
String String::capitalize() const {
String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges();
String cap;
@@ -979,21 +993,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
int start_index = 0;
for (int i = 1; i < this->size(); i++) {
- bool is_upper = is_upper_case(cstr[i]);
+ bool is_upper = is_ascii_upper_case(cstr[i]);
bool is_number = is_digit(cstr[i]);
bool are_next_2_lower = false;
bool is_next_lower = false;
bool is_next_number = false;
- bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+ bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
bool was_precedent_number = is_digit(cstr[i - 1]);
if (i + 2 < this->size()) {
- are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+ are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
}
if (i + 1 < this->size()) {
- is_next_lower = is_lower_case(cstr[i + 1]);
+ is_next_lower = is_ascii_lower_case(cstr[i + 1]);
is_next_number = is_digit(cstr[i + 1]);
}
@@ -1532,115 +1546,24 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
}
String String::num_real(double p_num, bool p_trailing) {
- if (Math::is_nan(p_num)) {
- return "nan";
- }
-
- if (Math::is_inf(p_num)) {
- if (signbit(p_num)) {
- return "-inf";
+ if (p_num == (double)(int64_t)p_num) {
+ if (p_trailing) {
+ return num_int64((int64_t)p_num) + ".0";
} else {
- return "inf";
+ return num_int64((int64_t)p_num);
}
}
-
- String s;
- String sd;
-
- // Integer part.
-
- bool neg = p_num < 0;
- p_num = ABS(p_num);
- int64_t intn = (int64_t)p_num;
-
- // Decimal part.
-
- if (intn != p_num) {
- double dec = p_num - (double)intn;
-
- int digit = 0;
-
#ifdef REAL_T_IS_DOUBLE
- int decimals = 14;
- double tolerance = 1e-14;
+ int decimals = 14;
#else
- int decimals = 6;
- double tolerance = 1e-6;
+ int decimals = 6;
#endif
- // We want to align the digits to the above sane default, so we only
- // need to subtract log10 for numbers with a positive power of ten.
- if (p_num > 10) {
- decimals -= (int)floor(log10(p_num));
- }
-
- if (decimals > MAX_DECIMALS) {
- decimals = MAX_DECIMALS;
- }
-
- // In case the value ends up ending in "99999", we want to add a
- // tiny bit to the value we're checking when deciding when to stop,
- // so we multiply by slightly above 1 (1 + 1e-7 or 1e-15).
- double check_multiplier = 1 + tolerance / 10;
-
- int64_t dec_int = 0;
- int64_t dec_max = 0;
-
- while (true) {
- dec *= 10.0;
- dec_int = dec_int * 10 + (int64_t)dec % 10;
- dec_max = dec_max * 10 + 9;
- digit++;
-
- if ((dec - (double)(int64_t)(dec * check_multiplier)) < tolerance) {
- break;
- }
-
- if (digit == decimals) {
- break;
- }
- }
-
- dec *= 10;
- int last = (int64_t)dec % 10;
-
- if (last > 5) {
- if (dec_int == dec_max) {
- dec_int = 0;
- intn++;
- } else {
- dec_int++;
- }
- }
-
- String decimal;
- for (int i = 0; i < digit; i++) {
- char num[2] = { 0, 0 };
- num[0] = '0' + dec_int % 10;
- decimal = num + decimal;
- dec_int /= 10;
- }
- sd = '.' + decimal;
- } else if (p_trailing) {
- sd = ".0";
- } else {
- sd = "";
- }
-
- if (intn == 0) {
- s = "0";
- } else {
- while (intn) {
- char32_t num = '0' + (intn % 10);
- intn /= 10;
- s = num + s;
- }
- }
-
- s = s + sd;
- if (neg) {
- s = "-" + s;
+ // We want to align the digits to the above sane default, so we only
+ // need to subtract log10 for numbers with a positive power of ten.
+ if (p_num > 10) {
+ decimals -= (int)floor(log10(p_num));
}
- return s;
+ return num(p_num, decimals);
}
String String::num_scientific(double p_num) {
@@ -1699,6 +1622,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
return ret;
}
+void String::print_unicode_error(const String &p_message, bool p_critical) const {
+ if (p_critical) {
+ print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+ } else {
+ print_error(vformat("Unicode parsing error: %s", p_message));
+ }
+}
+
CharString String::ascii(bool p_allow_extended) const {
if (!length()) {
return CharString();
@@ -1712,7 +1643,7 @@ CharString String::ascii(bool p_allow_extended) const {
if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
cs[i] = c;
} else {
- print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
cs[i] = 0x20;
}
}
@@ -1727,11 +1658,9 @@ String String::utf8(const char *p_utf8, int p_len) {
return ret;
}
-bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
-
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
if (!p_utf8) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1751,14 +1680,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
}
+ bool decode_error = false;
+ bool decode_failed = false;
{
const char *ptrtmp = p_utf8;
const char *ptrtmp_limit = &p_utf8[p_len];
int skip = 0;
+ uint8_t c_start = 0;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
- if (skip == 0) {
- uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ ptrtmp++;
+ continue;
+ }
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
skip = 0;
@@ -1768,20 +1704,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
skip = 2;
} else if ((c & 0xf8) == 0xf0) {
skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ skip = 5;
} else {
- _UNICERROR("invalid skip at " + num_int64(cstr_size));
- return true; //invalid utf8
+ skip = 0;
+ print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
+ decode_failed = true;
}
+ c_start = c;
if (skip == 1 && (c & 0x1e) == 0) {
- _UNICERROR("overlong rejected at " + num_int64(cstr_size));
- return true; //reject overlong
+ print_unicode_error(vformat("Overlong encoding (%x ...)", c));
+ decode_error = true;
}
-
str_size++;
-
} else {
- --skip;
+ if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
+ print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
+ decode_error = true;
+ }
+ if (c < 0x80 || c > 0xbf) {
+ print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
+ decode_failed = true;
+ skip = 0;
+ } else {
+ --skip;
+ }
}
cstr_size++;
@@ -1789,80 +1739,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
if (skip) {
- _UNICERROR("no space left");
- return true; //not enough space
+ print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
+ decode_failed = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ int skip = 0;
+ uint32_t unichar = 0;
while (cstr_size) {
- int len = 0;
-
- /* Determine the number of characters in sequence */
- if ((*p_utf8 & 0x80) == 0) {
- len = 1;
- } else if ((*p_utf8 & 0xe0) == 0xc0) {
- len = 2;
- } else if ((*p_utf8 & 0xf0) == 0xe0) {
- len = 3;
- } else if ((*p_utf8 & 0xf8) == 0xf0) {
- len = 4;
- } else {
- _UNICERROR("invalid len");
- return true; //invalid UTF8
- }
-
- if (len > cstr_size) {
- _UNICERROR("no space left");
- return true; //not enough space
- }
+ uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
- if (len == 2 && (*p_utf8 & 0x1E) == 0) {
- _UNICERROR("no space left");
- return true; //reject overlong
- }
-
- /* Convert the first character */
-
- uint32_t unichar = 0;
-
- if (len == 1) {
- unichar = *p_utf8;
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ p_utf8++;
+ continue;
+ }
+ /* Determine the number of characters in sequence */
+ if ((c & 0x80) == 0) {
+ *(dst++) = c;
+ unichar = 0;
+ skip = 0;
+ } else if ((c & 0xe0) == 0xc0) {
+ unichar = (0xff >> 3) & c;
+ skip = 1;
+ } else if ((c & 0xf0) == 0xe0) {
+ unichar = (0xff >> 4) & c;
+ skip = 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ unichar = (0xff >> 5) & c;
+ skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ unichar = (0xff >> 6) & c;
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ unichar = (0xff >> 7) & c;
+ skip = 5;
+ } else {
+ *(dst++) = 0x20;
+ unichar = 0;
+ skip = 0;
+ }
} else {
- unichar = (0xff >> (len + 1)) & *p_utf8;
-
- for (int i = 1; i < len; i++) {
- if ((p_utf8[i] & 0xc0) != 0x80) {
- _UNICERROR("invalid utf8");
- return true; //invalid utf8
- }
- if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
- _UNICERROR("invalid utf8 overlong");
- return true; //no overlong
+ if (c < 0x80 || c > 0xbf) {
+ *(dst++) = 0x20;
+ skip = 0;
+ } else {
+ unichar = (unichar << 6) | (c & 0x3f);
+ --skip;
+ if (skip == 0) {
+ if (unichar == 0) {
+ print_unicode_error("NUL character", true);
+ decode_failed = true;
+ unichar = 0x20;
+ }
+ if ((unichar & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
+ decode_error = true;
+ }
+ if (unichar > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
+ decode_error = true;
+ }
+ *(dst++) = unichar;
}
- unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
}
}
- if (unichar >= 0xd800 && unichar <= 0xdfff) {
- _UNICERROR("invalid code point");
- return CharString();
- }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf8 += len;
+ cstr_size--;
+ p_utf8++;
+ }
+ if (skip) {
+ *(dst++) = 0x20;
}
- return false;
-#undef _UNICERROR
+ if (decode_failed) {
+ return ERR_INVALID_DATA;
+ } else if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
CharString String::utf8() const {
@@ -1881,15 +1846,17 @@ CharString String::utf8() const {
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
- } else if (c <= 0x0010ffff) { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
fl += 4;
+ } else if (c <= 0x03ffffff) { // 26 bits
+ fl += 5;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
+ } else if (c <= 0x7fffffff) { // 31 bits
+ fl += 6;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
+ fl += 1;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
}
}
@@ -1915,11 +1882,26 @@ CharString String::utf8() const {
APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x03ffffff) { // 26 bits
+ APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x7fffffff) { // 31 bits
+ APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -1935,11 +1917,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
return ret;
}
-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
-#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
-
+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
if (!p_utf16) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1966,80 +1946,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
}
}
+ bool decode_error = false;
{
const char16_t *ptrtmp = p_utf16;
const char16_t *ptrtmp_limit = &p_utf16[p_len];
- int skip = 0;
+ uint32_t c_prev = 0;
+ bool skip = false;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
- if (skip == 0) {
- if ((c & 0xfffffc00) == 0xd800) {
- skip = 1; // lead surrogate
- } else if ((c & 0xfffffc00) == 0xdc00) {
- _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
- } else {
- skip = 0;
+
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
+ decode_error = true;
}
- str_size++;
- } else {
- if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
- --skip;
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ str_size--;
} else {
- _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
+ print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
+ decode_error = true;
}
+ skip = false;
+ } else {
+ skip = false;
}
+ c_prev = c;
+ str_size++;
cstr_size++;
ptrtmp++;
}
if (skip) {
- _UNICERROR("no space left");
- return true; // not enough space
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
+ decode_error = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ bool skip = false;
+ uint32_t c_prev = 0;
while (cstr_size) {
- int len = 0;
uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
- if ((c & 0xfffffc00) == 0xd800) {
- len = 2;
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ *(dst++) = c_prev; // unpaired, store as is
+ }
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
+ } else {
+ *(dst++) = c; // unpaired, store as is
+ }
+ skip = false;
} else {
- len = 1;
- }
-
- if (len > cstr_size) {
- _UNICERROR("no space left");
- return true; //not enough space
+ *(dst++) = c;
+ skip = false;
}
- uint32_t unichar = 0;
- if (len == 1) {
- unichar = c;
- } else {
- uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
- unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
- }
+ cstr_size--;
+ p_utf16++;
+ c_prev = c;
+ }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf16 += len;
+ if (skip) {
+ *(dst++) = c_prev;
}
- return false;
-#undef _UNICERROR
+ if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
Char16String String::utf16() const {
@@ -2054,15 +2044,14 @@ Char16String String::utf16() const {
uint32_t c = d[i];
if (c <= 0xffff) { // 16 bits.
fl += 1;
+ if ((c & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", c));
+ }
} else if (c <= 0x10ffff) { // 32 bits.
fl += 2;
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
+ fl += 1;
}
}
@@ -2081,9 +2070,11 @@ Char16String String::utf16() const {
if (c <= 0xffff) { // 16 bits.
APPEND_CHAR(c);
- } else { // 32 bits.
+ } else if (c <= 0x10ffff) { // 32 bits.
APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -2155,7 +2146,7 @@ int64_t String::hex_to_int() const {
}
// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
hex *= 16;
hex += n;
s++;
@@ -2194,7 +2185,7 @@ int64_t String::bin_to_int() const {
}
// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
binary *= 2;
binary += n;
s++;
@@ -2217,7 +2208,7 @@ int64_t String::to_int() const {
char32_t c = operator[](i);
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2246,7 +2237,7 @@ int64_t String::to_int(const char *p_str, int p_len) {
char c = p_str[i];
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2277,7 +2268,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) {
wchar_t c = p_str[i];
if (is_digit(c)) {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
- ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
integer *= 10;
integer += c - '0';
@@ -2308,7 +2299,7 @@ bool String::is_numeric() const {
return false;
}
dot = true;
- } else if (c < '0' || c > '9') {
+ } else if (!is_digit(c)) {
return false;
}
}
@@ -2317,28 +2308,33 @@ bool String::is_numeric() const {
}
template <class C>
-static double built_in_strtod(const C *string, /* A decimal ASCII floating-point number,
- * optionally preceded by white space. Must
- * have form "-I.FE-X", where I is the integer
- * part of the mantissa, F is the fractional
- * part of the mantissa, and X is the
- * exponent. Either of the signs may be "+",
- * "-", or omitted. Either I or F may be
- * omitted, or both. The decimal point isn't
- * necessary unless F is present. The "E" may
- * actually be an "e". E and X may both be
- * omitted (but not just one). */
- C **endPtr = nullptr) /* If non-nullptr, store terminating Cacter's
- * address here. */
-{
- static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
- * exponent larger than this will already
- * produce underflow or overflow, so there's
- * no need to worry about additional digits.
- */
- static const double powersOf10[] = { /* Table giving binary powers of 10. Entry */
- 10., /* is 10^2^i. Used to convert decimal */
- 100., /* exponents into floating-point numbers. */
+static double built_in_strtod(
+ /* A decimal ASCII floating-point number,
+ * optionally preceded by white space. Must
+ * have form "-I.FE-X", where I is the integer
+ * part of the mantissa, F is the fractional
+ * part of the mantissa, and X is the
+ * exponent. Either of the signs may be "+",
+ * "-", or omitted. Either I or F may be
+ * omitted, or both. The decimal point isn't
+ * necessary unless F is present. The "E" may
+ * actually be an "e". E and X may both be
+ * omitted (but not just one). */
+ const C *string,
+ /* If non-nullptr, store terminating Cacter's
+ * address here. */
+ C **endPtr = nullptr) {
+ /* Largest possible base 10 exponent. Any
+ * exponent larger than this will already
+ * produce underflow or overflow, so there's
+ * no need to worry about additional digits. */
+ static const int maxExponent = 511;
+ /* Table giving binary powers of 10. Entry
+ * is 10^2^i. Used to convert decimal
+ * exponents into floating-point numbers. */
+ static const double powersOf10[] = {
+ 10.,
+ 100.,
1.0e4,
1.0e8,
1.0e16,
@@ -2353,25 +2349,28 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
const double *d;
const C *p;
int c;
- int exp = 0; /* Exponent read from "EX" field. */
- int fracExp = 0; /* Exponent that derives from the fractional
- * part. Under normal circumstances, it is
- * the negative of the number of digits in F.
- * However, if I is very long, the last digits
- * of I get dropped (otherwise a long I with a
- * large negative exponent could cause an
- * unnecessary overflow on I alone). In this
- * case, fracExp is incremented one for each
- * dropped digit. */
- int mantSize; /* Number of digits in mantissa. */
- int decPt; /* Number of mantissa digits BEFORE decimal
- * point. */
- const C *pExp; /* Temporarily holds location of exponent in
- * string. */
+ /* Exponent read from "EX" field. */
+ int exp = 0;
+ /* Exponent that derives from the fractional
+ * part. Under normal circumstances, it is
+ * the negative of the number of digits in F.
+ * However, if I is very long, the last digits
+ * of I get dropped (otherwise a long I with a
+ * large negative exponent could cause an
+ * unnecessary overflow on I alone). In this
+ * case, fracExp is incremented one for each
+ * dropped digit. */
+ int fracExp = 0;
+ /* Number of digits in mantissa. */
+ int mantSize;
+ /* Number of mantissa digits BEFORE decimal point. */
+ int decPt;
+ /* Temporarily holds location of exponent in string. */
+ const C *pExp;
/*
- * Strip off leading blanks and check for a sign.
- */
+ * Strip off leading blanks and check for a sign.
+ */
p = string;
while (*p == ' ' || *p == '\t' || *p == '\n') {
@@ -2388,9 +2387,9 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Count the number of digits in the mantissa (including the decimal
- * point), and also locate the decimal point.
- */
+ * Count the number of digits in the mantissa (including the decimal
+ * point), and also locate the decimal point.
+ */
decPt = -1;
for (mantSize = 0;; mantSize += 1) {
@@ -2405,11 +2404,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Now suck up the digits in the mantissa. Use two integers to collect 9
- * digits each (this is faster than using floating-point). If the mantissa
- * has more than 18 digits, ignore the extras, since they can't affect the
- * value anyway.
- */
+ * Now suck up the digits in the mantissa. Use two integers to collect 9
+ * digits each (this is faster than using floating-point). If the mantissa
+ * has more than 18 digits, ignore the extras, since they can't affect the
+ * value anyway.
+ */
pExp = p;
p -= mantSize;
@@ -2455,8 +2454,8 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Skim off the exponent.
- */
+ * Skim off the exponent.
+ */
p = pExp;
if ((*p == 'E') || (*p == 'e')) {
@@ -2486,10 +2485,10 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
/*
- * Generate a floating-point number that represents the exponent. Do this
- * by processing the exponent one bit at a time to combine many powers of
- * 2 of 10. Then combine the exponent with the fraction.
- */
+ * Generate a floating-point number that represents the exponent. Do this
+ * by processing the exponent one bit at a time to combine many powers of
+ * 2 of 10. Then combine the exponent with the fraction.
+ */
if (exp < 0) {
expSign = true;
@@ -2591,7 +2590,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
return INT64_MIN;
}
} else {
- ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
}
}
integer *= 10;
@@ -3168,7 +3167,7 @@ bool String::is_subsequence_of(const String &p_string) const {
return _base_is_subsequence_of(p_string, false);
}
-bool String::is_subsequence_ofi(const String &p_string) const {
+bool String::is_subsequence_ofn(const String &p_string) const {
return _base_is_subsequence_of(p_string, true);
}
@@ -3452,51 +3451,52 @@ String String::replacen(const String &p_key, const String &p_with) const {
String String::repeat(int p_count) const {
ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
- String new_string;
- const char32_t *src = this->get_data();
-
- new_string.resize(length() * p_count + 1);
- new_string[length() * p_count] = 0;
-
- for (int i = 0; i < p_count; i++) {
- for (int j = 0; j < length(); j++) {
- new_string[i * length() + j] = src[j];
- }
- }
-
+ int len = length();
+ String new_string = *this;
+ new_string.resize(p_count * len + 1);
+
+ char32_t *dst = new_string.ptrw();
+ int offset = 1;
+ int stride = 1;
+ while (offset < p_count) {
+ memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
+ offset += stride;
+ stride = MIN(stride * 2, p_count - offset);
+ }
+ dst[p_count * len] = _null;
return new_string;
}
-String String::left(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::left(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(0, p_pos);
+ return substr(0, p_len);
}
-String String::right(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::right(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(length() - p_pos);
+ return substr(length() - p_len);
}
char32_t String::unicode_at(int p_idx) const {
@@ -3504,6 +3504,27 @@ char32_t String::unicode_at(int p_idx) const {
return operator[](p_idx);
}
+String String::indent(const String &p_prefix) const {
+ String new_string;
+ int line_start = 0;
+
+ for (int i = 0; i < length(); i++) {
+ const char32_t c = operator[](i);
+ if (c == '\n') {
+ if (i == line_start) {
+ new_string += c; // Leave empty lines empty.
+ } else {
+ new_string += p_prefix + substr(line_start, i - line_start + 1);
+ }
+ line_start = i + 1;
+ }
+ }
+ if (line_start != length()) {
+ new_string += p_prefix + substr(line_start);
+ }
+ return new_string;
+}
+
String String::dedent() const {
String new_string;
String indent;
@@ -3625,6 +3646,10 @@ String String::rstrip(const String &p_chars) const {
return substr(0, end + 1);
}
+bool String::is_network_share_path() const {
+ return begins_with("//") || begins_with("\\\\");
+}
+
String String::simplify_path() const {
String s = *this;
String drive;
@@ -3637,6 +3662,9 @@ String String::simplify_path() const {
} else if (s.begins_with("user://")) {
drive = "user://";
s = s.substr(7, s.length());
+ } else if (is_network_share_path()) {
+ drive = s.substr(0, 2);
+ s = s.substr(2, s.length() - 2);
} else if (s.begins_with("/") || s.begins_with("\\")) {
drive = s.substr(0, 1);
s = s.substr(1, s.length() - 1);
@@ -3665,15 +3693,15 @@ String String::simplify_path() const {
for (int i = 0; i < dirs.size(); i++) {
String d = dirs[i];
if (d == ".") {
- dirs.remove(i);
+ dirs.remove_at(i);
i--;
} else if (d == "..") {
if (i == 0) {
- dirs.remove(i);
+ dirs.remove_at(i);
i--;
} else {
- dirs.remove(i);
- dirs.remove(i - 1);
+ dirs.remove_at(i);
+ dirs.remove_at(i - 1);
i -= 2;
}
}
@@ -3735,6 +3763,31 @@ bool String::is_absolute_path() const {
}
}
+static _FORCE_INLINE_ bool _is_valid_identifier_bit(int p_index, char32_t p_char) {
+ if (p_index == 0 && is_digit(p_char)) {
+ return false; // No start with number plz.
+ }
+ return is_ascii_identifier_char(p_char);
+}
+
+String String::validate_identifier() const {
+ if (is_empty()) {
+ return "_"; // Empty string is not a valid identifier;
+ }
+
+ String result = *this;
+ int len = result.length();
+ char32_t *buffer = result.ptrw();
+
+ for (int i = 0; i < len; i++) {
+ if (!_is_valid_identifier_bit(i, buffer[i])) {
+ buffer[i] = '_';
+ }
+ }
+
+ return result;
+}
+
bool String::is_valid_identifier() const {
int len = length();
@@ -3745,15 +3798,7 @@ bool String::is_valid_identifier() const {
const char32_t *str = &operator[](0);
for (int i = 0; i < len; i++) {
- if (i == 0) {
- if (is_digit(str[0])) {
- return false; // no start with number plz
- }
- }
-
- bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
-
- if (!valid_char) {
+ if (!_is_valid_identifier_bit(i, str[i])) {
return false;
}
}
@@ -3775,18 +3820,15 @@ String String::uri_encode() const {
const CharString temp = utf8();
String res;
for (int i = 0; i < temp.length(); ++i) {
- char ord = temp[i];
- if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+ uint8_t ord = temp[i];
+ if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
res += ord;
} else {
- char h_Val[3];
-#if defined(__GNUC__) || defined(_MSC_VER)
- snprintf(h_Val, 3, "%02hhX", ord);
-#else
- sprintf(h_Val, "%02hhX", ord);
-#endif
- res += "%";
- res += h_Val;
+ char p[4] = { '%', 0, 0, 0 };
+ static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ p[1] = hex[ord >> 4];
+ p[2] = hex[ord & 0xF];
+ res += p;
}
}
return res;
@@ -3798,9 +3840,9 @@ String String::uri_decode() const {
for (int i = 0; i < src.length(); ++i) {
if (src[i] == '%' && i + 2 < src.length()) {
char ord1 = src[i + 1];
- if (is_digit(ord1) || is_upper_case(ord1)) {
+ if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
char ord2 = src[i + 2];
- if (is_digit(ord2) || is_upper_case(ord2)) {
+ if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
char bytes[3] = { (char)ord1, (char)ord2, 0 };
res += (char)strtol(bytes, nullptr, 16);
i += 2;
@@ -3927,7 +3969,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
for (int i = 2; i < p_src_len; i++) {
eat = i + 1;
char32_t ct = p_src[i];
- if (ct == ';' || ct < '0' || ct > '9') {
+ if (ct == ';' || !is_digit(ct)) {
break;
}
}
@@ -4057,7 +4099,7 @@ String String::pad_zeros(int p_digits) const {
int begin = 0;
- while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+ while (begin < end && !is_digit(s[begin])) {
begin++;
}
@@ -4102,7 +4144,7 @@ bool String::is_valid_int() const {
}
for (int i = from; i < len; i++) {
- if (operator[](i) < '0' || operator[](i) > '9') {
+ if (!is_digit(operator[](i))) {
return false; // no start with number plz
}
}
@@ -4204,15 +4246,11 @@ String String::path_to(const String &p_path) const {
dst += "/";
}
- String base;
-
if (src.begins_with("res://") && dst.begins_with("res://")) {
- base = "res:/";
src = src.replace("res://", "/");
dst = dst.replace("res://", "/");
} else if (src.begins_with("user://") && dst.begins_with("user://")) {
- base = "user:/";
src = src.replace("user://", "/");
dst = dst.replace("user://", "/");
@@ -4227,7 +4265,6 @@ String String::path_to(const String &p_path) const {
return p_path; //impossible to do this
}
- base = src_begin;
src = src.substr(src_begin.length(), src.length());
dst = dst.substr(dst_begin.length(), dst.length());
}
@@ -4280,7 +4317,7 @@ bool String::is_valid_filename() const {
return false;
}
- if (stripped == String()) {
+ if (stripped.is_empty()) {
return false;
}
@@ -4338,13 +4375,13 @@ bool String::is_relative_path() const {
String String::get_base_dir() const {
int end = 0;
- // url scheme style base
+ // URL scheme style base.
int basepos = find("://");
if (basepos != -1) {
end = basepos + 3;
}
- // windows top level directory base
+ // Windows top level directory base.
if (end == 0) {
basepos = find(":/");
if (basepos == -1) {
@@ -4355,7 +4392,24 @@ String String::get_base_dir() const {
}
}
- // unix root directory base
+ // Windows UNC network share path.
+ if (end == 0) {
+ if (is_network_share_path()) {
+ basepos = find("/", 2);
+ if (basepos == -1) {
+ basepos = find("\\", 2);
+ }
+ int servpos = find("/", basepos + 1);
+ if (servpos == -1) {
+ servpos = find("\\", basepos + 1);
+ }
+ if (servpos != -1) {
+ end = servpos + 1;
+ }
+ }
+ }
+
+ // Unix root directory base.
if (end == 0) {
if (begins_with("/")) {
end = 1;
@@ -4412,7 +4466,7 @@ String String::property_name_encode() const {
// as well as '"', '=' or ' ' (32)
const char32_t *cstr = get_data();
for (int i = 0; cstr[i]; i++) {
- if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) {
+ if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
return "\"" + c_escape_multiline() + "\"";
}
}
@@ -4421,7 +4475,7 @@ String String::property_name_encode() const {
}
// Changes made to the set of invalid characters must also be reflected in the String documentation.
-const String String::invalid_node_name_characters = ". : @ / \"";
+const String String::invalid_node_name_characters = ". : @ / \" " UNIQUE_NODE_PREFIX;
String String::validate_node_name() const {
Vector<String> chars = String::invalid_node_name_characters.split(" ");
@@ -4495,7 +4549,7 @@ String String::sprintf(const Array &values, bool *error) const {
int min_chars = 0;
int min_decimals = 0;
bool in_decimals = false;
- bool pad_with_zeroes = false;
+ bool pad_with_zeros = false;
bool left_justified = false;
bool show_sign = false;
@@ -4548,7 +4602,7 @@ String String::sprintf(const Array &values, bool *error) const {
// Padding.
int pad_chars_count = (value < 0 || show_sign) ? min_chars - 1 : min_chars;
- String pad_char = pad_with_zeroes ? String("0") : String(" ");
+ String pad_char = pad_with_zeros ? String("0") : String(" ");
if (left_justified) {
str = str.rpad(pad_chars_count, pad_char);
} else {
@@ -4556,10 +4610,13 @@ String String::sprintf(const Array &values, bool *error) const {
}
// Sign.
- if (show_sign && value >= 0) {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "+");
- } else if (value < 0) {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - number_len, "-");
+ if (show_sign || value < 0) {
+ String sign_char = value < 0 ? "-" : "+";
+ if (left_justified) {
+ str = str.insert(0, sign_char);
+ } else {
+ str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
+ }
}
formatted += str;
@@ -4588,13 +4645,9 @@ String String::sprintf(const Array &values, bool *error) const {
// Padding. Leave room for sign later if required.
int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
- String pad_char = pad_with_zeroes ? String("0") : String(" ");
+ String pad_char = pad_with_zeros ? String("0") : String(" ");
if (left_justified) {
- if (pad_with_zeroes) {
- return "left justification cannot be used with zeros as the padding";
- } else {
- str = str.rpad(pad_chars_count, pad_char);
- }
+ str = str.rpad(pad_chars_count, pad_char);
} else {
str = str.lpad(pad_chars_count, pad_char);
}
@@ -4605,7 +4658,7 @@ String String::sprintf(const Array &values, bool *error) const {
if (left_justified) {
str = str.insert(0, sign_char);
} else {
- str = str.insert(pad_with_zeroes ? 0 : str.length() - initial_len, sign_char);
+ str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
}
}
@@ -4694,7 +4747,11 @@ String String::sprintf(const Array &values, bool *error) const {
min_decimals += n;
} else {
if (c == '0' && min_chars == 0) {
- pad_with_zeroes = true;
+ if (left_justified) {
+ WARN_PRINT("'0' flag ignored with '-' flag in string format");
+ } else {
+ pad_with_zeros = true;
+ }
} else {
min_chars *= 10;
min_chars += n;
@@ -4743,7 +4800,7 @@ String String::sprintf(const Array &values, bool *error) const {
// Back to defaults:
min_chars = 0;
min_decimals = 6;
- pad_with_zeroes = false;
+ pad_with_zeros = false;
left_justified = false;
show_sign = false;
in_decimals = false;
@@ -4844,6 +4901,17 @@ Vector<uint8_t> String::to_utf32_buffer() const {
}
#ifdef TOOLS_ENABLED
+/**
+ * "Tools TRanslate". Performs string replacement for internationalization
+ * within the editor. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `TTRN()` instead of `TTR()`.
+ *
+ * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTR()` instead.
+ */
String TTR(const String &p_text, const String &p_context) {
if (TranslationServer::get_singleton()) {
return TranslationServer::get_singleton()->tool_translate(p_text, p_context);
@@ -4852,6 +4920,18 @@ String TTR(const String &p_text, const String &p_context) {
return p_text;
}
+/**
+ * "Tools TRanslate for N items". Performs string replacement for
+ * internationalization within the editor. A translation context can optionally
+ * be specified to disambiguate between identical source strings in
+ * translations. Use `TTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
+ * For translations that can be supplied by exported projects, use `RTRN()` instead.
+ */
String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
if (TranslationServer::get_singleton()) {
return TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
@@ -4864,37 +4944,62 @@ String TTRN(const String &p_text, const String &p_text_plural, int p_n, const St
return p_text_plural;
}
+/**
+ * "Docs TRanslate". Used for the editor class reference documentation,
+ * handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
String DTR(const String &p_text, const String &p_context) {
// Comes straight from the XML, so remove indentation and any trailing whitespace.
const String text = p_text.dedent().strip_edges();
if (TranslationServer::get_singleton()) {
- return TranslationServer::get_singleton()->doc_translate(text, p_context);
+ return String(TranslationServer::get_singleton()->doc_translate(text, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
}
- return text;
+ return text.replace("$DOCS_URL", VERSION_DOCS_URL);
}
+/**
+ * "Docs TRanslate for N items". Used for the editor class reference documentation
+ * (with support for plurals), handling descriptions extracted from the XML.
+ * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
+ * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
+ */
String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
const String text = p_text.dedent().strip_edges();
const String text_plural = p_text_plural.dedent().strip_edges();
if (TranslationServer::get_singleton()) {
- return TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context);
+ return String(TranslationServer::get_singleton()->doc_translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", VERSION_DOCS_URL);
}
// Return message based on English plural rule if translation is not possible.
if (p_n == 1) {
- return text;
+ return text.replace("$DOCS_URL", VERSION_DOCS_URL);
}
- return text_plural;
+ return text_plural.replace("$DOCS_URL", VERSION_DOCS_URL);
}
#endif
+/**
+ * "Run-time TRanslate". Performs string replacement for internationalization
+ * within a running project. The translation string must be supplied by the
+ * project, as Godot does not provide built-in translations for `RTR()` strings
+ * to keep binary size low. A translation context can optionally be specified to
+ * disambiguate between identical source strings in translations. When
+ * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
+ * If a string mentions a quantity (and may therefore need a dynamic plural form),
+ * use `RTRN()` instead of `RTR()`.
+ *
+ * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTR()` instead.
+ */
String RTR(const String &p_text, const String &p_context) {
if (TranslationServer::get_singleton()) {
String rtr = TranslationServer::get_singleton()->tool_translate(p_text, p_context);
- if (rtr == String() || rtr == p_text) {
+ if (rtr.is_empty() || rtr == p_text) {
return TranslationServer::get_singleton()->translate(p_text, p_context);
} else {
return rtr;
@@ -4904,10 +5009,24 @@ String RTR(const String &p_text, const String &p_context) {
return p_text;
}
+/**
+ * "Run-time TRanslate for N items". Performs string replacement for
+ * internationalization within a running project. The translation string must be
+ * supplied by the project, as Godot does not provide built-in translations for
+ * `RTRN()` strings to keep binary size low. A translation context can
+ * optionally be specified to disambiguate between identical source strings in
+ * translations. Use `RTR()` if the string doesn't need dynamic plural form.
+ * When placeholders are desired, use
+ * `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
+ * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
+ *
+ * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
+ * folder). For editor translations, use `TTRN()` instead.
+ */
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
if (TranslationServer::get_singleton()) {
String rtr = TranslationServer::get_singleton()->tool_translate_plural(p_text, p_text_plural, p_n, p_context);
- if (rtr == String() || rtr == p_text || rtr == p_text_plural) {
+ if (rtr.is_empty() || rtr == p_text || rtr == p_text_plural) {
return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
} else {
return rtr;