summaryrefslogtreecommitdiff
path: root/core/string/ustring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r--core/string/ustring.cpp777
1 files changed, 512 insertions, 265 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index d189f3224b..4e26b61334 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -161,6 +161,18 @@ bool CharString::operator<(const CharString &p_right) const {
return is_str_less(get_data(), p_right.get_data());
}
+bool CharString::operator==(const CharString &p_right) const {
+ if (length() == 0) {
+ // True if both have length 0, false if only p_right has a length
+ return p_right.length() == 0;
+ } else if (p_right.length() == 0) {
+ // False due to unequal length
+ return false;
+ }
+
+ return strcmp(ptr(), p_right.ptr()) == 0;
+}
+
CharString &CharString::operator+=(char p_char) {
const int lhs_len = length();
resize(lhs_len + 2);
@@ -323,7 +335,13 @@ void String::copy_from(const char *p_cstr) {
char32_t *dst = this->ptrw();
for (size_t i = 0; i <= len; i++) {
- dst[i] = p_cstr[i];
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0 && i < len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
}
@@ -350,7 +368,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
char32_t *dst = this->ptrw();
for (int i = 0; i < len; i++) {
- dst[i] = p_cstr[i];
+ uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
+ if (c == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
dst[len] = 0;
}
@@ -376,14 +400,21 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
}
void String::copy_from(const char32_t &p_char) {
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
+ }
+
resize(2);
+
char32_t *dst = ptrw();
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- dst[0] = 0xfffd;
- } else {
- dst[0] = p_char;
- }
+ dst[0] = p_char;
dst[1] = 0;
}
@@ -437,12 +468,18 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
dst[p_length] = 0;
for (int i = 0; i < p_length; i++) {
- if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
- dst[i] = 0xfffd;
- } else {
- dst[i] = p_char[i];
+ if (p_char[i] == 0) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ continue;
+ }
+ if ((p_char[i] & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
}
+ if (p_char[i] > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
+ }
+ dst[i] = p_char[i];
}
}
@@ -481,7 +518,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
// wchar_t is 16-bit
String tmp = String::utf16((const char16_t *)p_chr);
#else
- // wchar_t is 32-bi
+ // wchar_t is 32-bit
String tmp = (const char32_t *)p_chr;
#endif
tmp += p_str;
@@ -506,10 +543,12 @@ String &String::operator+=(const String &p_str) {
resize(lhs_len + rhs_len + 1);
- const char32_t *src = p_str.get_data();
+ const char32_t *src = p_str.ptr();
char32_t *dst = ptrw() + lhs_len;
- memcpy(dst, src, (rhs_len + 1) * sizeof(char32_t));
+ // Don't copy the terminating null with `memcpy` to avoid undefined behavior when string is being added to itself (it would overlap the destination).
+ memcpy(dst, src, rhs_len * sizeof(char32_t));
+ *(dst + rhs_len) = _null;
return *this;
}
@@ -527,7 +566,13 @@ String &String::operator+=(const char *p_str) {
char32_t *dst = ptrw() + lhs_len;
for (size_t i = 0; i <= rhs_len; i++) {
- dst[i] = p_str[i];
+ uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
+ if (c == 0 && i < rhs_len) {
+ print_unicode_error("NUL character", true);
+ dst[i] = 0x20;
+ } else {
+ dst[i] = c;
+ }
}
return *this;
@@ -550,15 +595,21 @@ String &String::operator+=(const char32_t *p_str) {
}
String &String::operator+=(char32_t p_char) {
+ if (p_char == 0) {
+ print_unicode_error("NUL character", true);
+ return *this;
+ }
+ if ((p_char & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
+ }
+ if (p_char > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
+ }
+
const int lhs_len = length();
resize(lhs_len + 2);
char32_t *dst = ptrw();
- if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
- dst[lhs_len] = 0xfffd;
- } else {
- dst[lhs_len] = p_char;
- }
+ dst[lhs_len] = p_char;
dst[lhs_len + 1] = 0;
return *this;
@@ -931,62 +982,71 @@ const char32_t *String::get_data() const {
return size() ? &operator[](0) : &zero;
}
-String String::capitalize() const {
- String aux = this->camelcase_to_underscore(true).replace("_", " ").strip_edges();
- String cap;
- for (int i = 0; i < aux.get_slice_count(" "); i++) {
- String slice = aux.get_slicec(' ', i);
- if (slice.length() > 0) {
- slice[0] = _find_upper(slice[0]);
- if (i > 0) {
- cap += " ";
- }
- cap += slice;
- }
- }
-
- return cap;
-}
-
-String String::camelcase_to_underscore(bool lowercase) const {
+String String::_camelcase_to_underscore() const {
const char32_t *cstr = get_data();
String new_string;
int start_index = 0;
for (int i = 1; i < this->size(); i++) {
- bool is_upper = is_ascii_upper_case(cstr[i]);
- bool is_number = is_digit(cstr[i]);
-
- bool are_next_2_lower = false;
- bool is_next_lower = false;
- bool is_next_number = false;
- bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
- bool was_precedent_number = is_digit(cstr[i - 1]);
+ bool is_prev_upper = is_ascii_upper_case(cstr[i - 1]);
+ bool is_prev_lower = is_ascii_lower_case(cstr[i - 1]);
+ bool is_prev_digit = is_digit(cstr[i - 1]);
- if (i + 2 < this->size()) {
- are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
- }
+ bool is_curr_upper = is_ascii_upper_case(cstr[i]);
+ bool is_curr_lower = is_ascii_lower_case(cstr[i]);
+ bool is_curr_digit = is_digit(cstr[i]);
+ bool is_next_lower = false;
if (i + 1 < this->size()) {
is_next_lower = is_ascii_lower_case(cstr[i + 1]);
- is_next_number = is_digit(cstr[i + 1]);
}
- const bool cond_a = is_upper && !was_precedent_upper && !was_precedent_number;
- const bool cond_b = was_precedent_upper && is_upper && are_next_2_lower;
- const bool cond_c = is_number && !was_precedent_number;
- const bool can_break_number_letter = is_number && !was_precedent_number && is_next_lower;
- const bool can_break_letter_number = !is_number && was_precedent_number && (is_next_lower || is_next_number);
+ const bool cond_a = is_prev_lower && is_curr_upper; // aA
+ const bool cond_b = (is_prev_upper || is_prev_digit) && is_curr_upper && is_next_lower; // AAa, 2Aa
+ const bool cond_c = is_prev_digit && is_curr_lower && is_next_lower; // 2aa
+ const bool cond_d = (is_prev_upper || is_prev_lower) && is_curr_digit; // A2, a2
- bool should_split = cond_a || cond_b || cond_c || can_break_number_letter || can_break_letter_number;
- if (should_split) {
+ if (cond_a || cond_b || cond_c || cond_d) {
new_string += this->substr(start_index, i - start_index) + "_";
start_index = i;
}
}
new_string += this->substr(start_index, this->size() - start_index);
- return lowercase ? new_string.to_lower() : new_string;
+ return new_string.to_lower();
+}
+
+String String::capitalize() const {
+ String aux = this->_camelcase_to_underscore().replace("_", " ").strip_edges();
+ String cap;
+ for (int i = 0; i < aux.get_slice_count(" "); i++) {
+ String slice = aux.get_slicec(' ', i);
+ if (slice.length() > 0) {
+ slice[0] = _find_upper(slice[0]);
+ if (i > 0) {
+ cap += " ";
+ }
+ cap += slice;
+ }
+ }
+
+ return cap;
+}
+
+String String::to_camel_case() const {
+ String s = this->to_pascal_case();
+ if (!s.is_empty()) {
+ s[0] = _find_lower(s[0]);
+ }
+ return s;
+}
+
+String String::to_pascal_case() const {
+ return this->capitalize().replace(" ", "");
+}
+
+String String::to_snake_case() const {
+ return this->_camelcase_to_underscore().replace(" ", "_").strip_edges();
}
String String::get_with_code_lines() const {
@@ -1132,9 +1192,14 @@ Vector<String> String::split(const String &p_splitter, bool p_allow_empty, int p
int len = length();
while (true) {
- int end = find(p_splitter, from);
- if (end < 0) {
- end = len;
+ int end;
+ if (p_splitter.is_empty()) {
+ end = from + 1;
+ } else {
+ end = find(p_splitter, from);
+ if (end < 0) {
+ end = len;
+ }
}
if (p_allow_empty || (end > from)) {
if (p_maxsplit <= 0) {
@@ -1175,7 +1240,15 @@ Vector<String> String::rsplit(const String &p_splitter, bool p_allow_empty, int
break;
}
- int left_edge = rfind(p_splitter, remaining_len - p_splitter.length());
+ int left_edge;
+ if (p_splitter.is_empty()) {
+ left_edge = remaining_len - 1;
+ if (left_edge == 0) {
+ left_edge--; // Skip to the < 0 condition.
+ }
+ } else {
+ left_edge = rfind(p_splitter, remaining_len - p_splitter.length());
+ }
if (left_edge < 0) {
// no more splitters, we're done
@@ -1195,8 +1268,8 @@ Vector<String> String::rsplit(const String &p_splitter, bool p_allow_empty, int
return ret;
}
-Vector<float> String::split_floats(const String &p_splitter, bool p_allow_empty) const {
- Vector<float> ret;
+Vector<double> String::split_floats(const String &p_splitter, bool p_allow_empty) const {
+ Vector<double> ret;
int from = 0;
int len = length();
@@ -1399,15 +1472,25 @@ String String::num(double p_num, int p_decimals) {
fmt[5] = 'f';
fmt[6] = 0;
}
- char buf[256];
+ // if we want to convert a double with as much decimal places as as
+ // DBL_MAX or DBL_MIN then we would theoretically need a buffer of at least
+ // DBL_MAX_10_EXP + 2 for DBL_MAX and DBL_MAX_10_EXP + 4 for DBL_MIN.
+ // BUT those values where still giving me exceptions, so I tested from
+ // DBL_MAX_10_EXP + 10 incrementing one by one and DBL_MAX_10_EXP + 17 (325)
+ // was the first buffer size not to throw an exception
+ char buf[325];
#if defined(__GNUC__) || defined(_MSC_VER)
- snprintf(buf, 256, fmt, p_num);
+ // PLEASE NOTE that, albeit vcrt online reference states that snprintf
+ // should safely truncate the output to the given buffer size, we have
+ // found a case where this is not true, so we should create a buffer
+ // as big as needed
+ snprintf(buf, 325, fmt, p_num);
#else
sprintf(buf, fmt, p_num);
#endif
- buf[255] = 0;
+ buf[324] = 0;
//destroy trailing zeroes
{
bool period = false;
@@ -1519,10 +1602,11 @@ String String::num_real(double p_num, bool p_trailing) {
#else
int decimals = 6;
#endif
- // We want to align the digits to the above sane default, so we only
- // need to subtract log10 for numbers with a positive power of ten.
- if (p_num > 10) {
- decimals -= (int)floor(log10(p_num));
+ // We want to align the digits to the above sane default, so we only need
+ // to subtract log10 for numbers with a positive power of ten magnitude.
+ double abs_num = Math::abs(p_num);
+ if (abs_num > 10) {
+ decimals -= (int)floor(log10(abs_num));
}
return num(p_num, decimals);
}
@@ -1583,6 +1667,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
return ret;
}
+void String::print_unicode_error(const String &p_message, bool p_critical) const {
+ if (p_critical) {
+ print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
+ } else {
+ print_error(vformat("Unicode parsing error: %s", p_message));
+ }
+}
+
CharString String::ascii(bool p_allow_extended) const {
if (!length()) {
return CharString();
@@ -1596,7 +1688,7 @@ CharString String::ascii(bool p_allow_extended) const {
if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
cs[i] = c;
} else {
- print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
cs[i] = 0x20;
}
}
@@ -1611,11 +1703,9 @@ String String::utf8(const char *p_utf8, int p_len) {
return ret;
}
-bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
-
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
if (!p_utf8) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1635,14 +1725,21 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
}
+ bool decode_error = false;
+ bool decode_failed = false;
{
const char *ptrtmp = p_utf8;
const char *ptrtmp_limit = &p_utf8[p_len];
int skip = 0;
+ uint8_t c_start = 0;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
- if (skip == 0) {
- uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ ptrtmp++;
+ continue;
+ }
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
skip = 0;
@@ -1652,20 +1749,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
skip = 2;
} else if ((c & 0xf8) == 0xf0) {
skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ skip = 5;
} else {
- UNICERROR("invalid skip at " + num_int64(cstr_size));
- return true; //invalid utf8
+ skip = 0;
+ print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
+ decode_failed = true;
}
+ c_start = c;
if (skip == 1 && (c & 0x1e) == 0) {
- UNICERROR("overlong rejected at " + num_int64(cstr_size));
- return true; //reject overlong
+ print_unicode_error(vformat("Overlong encoding (%x ...)", c));
+ decode_error = true;
}
-
str_size++;
-
} else {
- --skip;
+ if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
+ print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
+ decode_error = true;
+ }
+ if (c < 0x80 || c > 0xbf) {
+ print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
+ decode_failed = true;
+ skip = 0;
+ } else {
+ --skip;
+ }
}
cstr_size++;
@@ -1673,80 +1784,95 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
if (skip) {
- UNICERROR("no space left");
- return true; //not enough space
+ print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
+ decode_failed = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ int skip = 0;
+ uint32_t unichar = 0;
while (cstr_size) {
- int len = 0;
-
- /* Determine the number of characters in sequence */
- if ((*p_utf8 & 0x80) == 0) {
- len = 1;
- } else if ((*p_utf8 & 0xe0) == 0xc0) {
- len = 2;
- } else if ((*p_utf8 & 0xf0) == 0xe0) {
- len = 3;
- } else if ((*p_utf8 & 0xf8) == 0xf0) {
- len = 4;
- } else {
- UNICERROR("invalid len");
- return true; //invalid UTF8
- }
-
- if (len > cstr_size) {
- UNICERROR("no space left");
- return true; //not enough space
- }
-
- if (len == 2 && (*p_utf8 & 0x1E) == 0) {
- UNICERROR("no space left");
- return true; //reject overlong
- }
-
- /* Convert the first character */
-
- uint32_t unichar = 0;
+ uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
- if (len == 1) {
- unichar = *p_utf8;
+ if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ p_utf8++;
+ continue;
+ }
+ /* Determine the number of characters in sequence */
+ if ((c & 0x80) == 0) {
+ *(dst++) = c;
+ unichar = 0;
+ skip = 0;
+ } else if ((c & 0xe0) == 0xc0) {
+ unichar = (0xff >> 3) & c;
+ skip = 1;
+ } else if ((c & 0xf0) == 0xe0) {
+ unichar = (0xff >> 4) & c;
+ skip = 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ unichar = (0xff >> 5) & c;
+ skip = 3;
+ } else if ((c & 0xfc) == 0xf8) {
+ unichar = (0xff >> 6) & c;
+ skip = 4;
+ } else if ((c & 0xfe) == 0xfc) {
+ unichar = (0xff >> 7) & c;
+ skip = 5;
+ } else {
+ *(dst++) = 0x20;
+ unichar = 0;
+ skip = 0;
+ }
} else {
- unichar = (0xff >> (len + 1)) & *p_utf8;
-
- for (int i = 1; i < len; i++) {
- if ((p_utf8[i] & 0xc0) != 0x80) {
- UNICERROR("invalid utf8");
- return true; //invalid utf8
- }
- if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
- UNICERROR("invalid utf8 overlong");
- return true; //no overlong
+ if (c < 0x80 || c > 0xbf) {
+ *(dst++) = 0x20;
+ skip = 0;
+ } else {
+ unichar = (unichar << 6) | (c & 0x3f);
+ --skip;
+ if (skip == 0) {
+ if (unichar == 0) {
+ print_unicode_error("NUL character", true);
+ decode_failed = true;
+ unichar = 0x20;
+ }
+ if ((unichar & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
+ decode_error = true;
+ }
+ if (unichar > 0x10ffff) {
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
+ decode_error = true;
+ }
+ *(dst++) = unichar;
}
- unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
}
}
- if (unichar >= 0xd800 && unichar <= 0xdfff) {
- UNICERROR("invalid code point");
- return CharString();
- }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf8 += len;
+ cstr_size--;
+ p_utf8++;
+ }
+ if (skip) {
+ *(dst++) = 0x20;
}
- return false;
-#undef UNICERROR
+ if (decode_failed) {
+ return ERR_INVALID_DATA;
+ } else if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
CharString String::utf8() const {
@@ -1765,15 +1891,17 @@ CharString String::utf8() const {
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
- } else if (c <= 0x0010ffff) { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
fl += 4;
+ } else if (c <= 0x03ffffff) { // 26 bits
+ fl += 5;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
+ } else if (c <= 0x7fffffff) { // 31 bits
+ fl += 6;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return CharString();
+ fl += 1;
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
}
}
@@ -1799,11 +1927,26 @@ CharString String::utf8() const {
APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else { // 21 bits
+ } else if (c <= 0x001fffff) { // 21 bits
APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x03ffffff) { // 26 bits
+ APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else if (c <= 0x7fffffff) { // 31 bits
+ APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
+ APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -1819,11 +1962,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
return ret;
}
-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
-#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
-
+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
if (!p_utf16) {
- return true;
+ return ERR_INVALID_DATA;
}
String aux;
@@ -1850,80 +1991,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
}
}
+ bool decode_error = false;
{
const char16_t *ptrtmp = p_utf16;
const char16_t *ptrtmp_limit = &p_utf16[p_len];
- int skip = 0;
+ uint32_t c_prev = 0;
+ bool skip = false;
while (ptrtmp != ptrtmp_limit && *ptrtmp) {
uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
- if (skip == 0) {
- if ((c & 0xfffffc00) == 0xd800) {
- skip = 1; // lead surrogate
- } else if ((c & 0xfffffc00) == 0xdc00) {
- UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
- } else {
- skip = 0;
+
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
+ decode_error = true;
}
- str_size++;
- } else {
- if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
- --skip;
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ str_size--;
} else {
- UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
- return true; // invalid UTF16
+ print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
+ decode_error = true;
}
+ skip = false;
+ } else {
+ skip = false;
}
+ c_prev = c;
+ str_size++;
cstr_size++;
ptrtmp++;
}
if (skip) {
- UNICERROR("no space left");
- return true; // not enough space
+ print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
+ decode_error = true;
}
}
if (str_size == 0) {
clear();
- return false;
+ return OK; // empty string
}
resize(str_size + 1);
char32_t *dst = ptrw();
dst[str_size] = 0;
+ bool skip = false;
+ uint32_t c_prev = 0;
while (cstr_size) {
- int len = 0;
uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
- if ((c & 0xfffffc00) == 0xd800) {
- len = 2;
+ if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
+ if (skip) {
+ *(dst++) = c_prev; // unpaired, store as is
+ }
+ skip = true;
+ } else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ if (skip) {
+ *(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
+ } else {
+ *(dst++) = c; // unpaired, store as is
+ }
+ skip = false;
} else {
- len = 1;
+ *(dst++) = c;
+ skip = false;
}
- if (len > cstr_size) {
- UNICERROR("no space left");
- return true; //not enough space
- }
-
- uint32_t unichar = 0;
- if (len == 1) {
- unichar = c;
- } else {
- uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
- unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
- }
+ cstr_size--;
+ p_utf16++;
+ c_prev = c;
+ }
- *(dst++) = unichar;
- cstr_size -= len;
- p_utf16 += len;
+ if (skip) {
+ *(dst++) = c_prev;
}
- return false;
-#undef UNICERROR
+ if (decode_error) {
+ return ERR_PARSE_ERROR;
+ } else {
+ return OK;
+ }
}
Char16String String::utf16() const {
@@ -1938,15 +2089,14 @@ Char16String String::utf16() const {
uint32_t c = d[i];
if (c <= 0xffff) { // 16 bits.
fl += 1;
+ if ((c & 0xfffff800) == 0xd800) {
+ print_unicode_error(vformat("Unpaired surrogate (%x)", c));
+ }
} else if (c <= 0x10ffff) { // 32 bits.
fl += 2;
} else {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
- }
- if (c >= 0xd800 && c <= 0xdfff) {
- print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
- return Char16String();
+ print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
+ fl += 1;
}
}
@@ -1965,9 +2115,11 @@ Char16String String::utf16() const {
if (c <= 0xffff) { // 16 bits.
APPEND_CHAR(c);
- } else { // 32 bits.
+ } else if (c <= 0x10ffff) { // 32 bits.
APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+ } else {
+ APPEND_CHAR(0x20);
}
}
#undef APPEND_CHAR
@@ -2508,10 +2660,11 @@ double String::to_float() const {
uint32_t String::hash(const char *p_cstr) {
uint32_t hashv = 5381;
- uint32_t c;
+ uint32_t c = *p_cstr++;
- while ((c = *p_cstr++)) {
+ while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ c = *p_cstr++;
}
return hashv;
@@ -2537,10 +2690,11 @@ uint32_t String::hash(const wchar_t *p_cstr, int p_len) {
uint32_t String::hash(const wchar_t *p_cstr) {
uint32_t hashv = 5381;
- uint32_t c;
+ uint32_t c = *p_cstr++;
- while ((c = *p_cstr++)) {
+ while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ c = *p_cstr++;
}
return hashv;
@@ -2557,10 +2711,11 @@ uint32_t String::hash(const char32_t *p_cstr, int p_len) {
uint32_t String::hash(const char32_t *p_cstr) {
uint32_t hashv = 5381;
- uint32_t c;
+ uint32_t c = *p_cstr++;
- while ((c = *p_cstr++)) {
+ while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ c = *p_cstr++;
}
return hashv;
@@ -2571,10 +2726,11 @@ uint32_t String::hash() const {
const char32_t *chr = get_data();
uint32_t hashv = 5381;
- uint32_t c;
+ uint32_t c = *chr++;
- while ((c = *chr++)) {
+ while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ c = *chr++;
}
return hashv;
@@ -2585,10 +2741,11 @@ uint64_t String::hash64() const {
const char32_t *chr = get_data();
uint64_t hashv = 5381;
- uint64_t c;
+ uint64_t c = *chr++;
- while ((c = *chr++)) {
+ while (c) {
hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ c = *chr++;
}
return hashv;
@@ -2694,7 +2851,7 @@ String String::substr(int p_from, int p_chars) const {
return String(*this);
}
- String s = String();
+ String s;
s.copy_from_unchecked(&get_data()[p_from], p_chars);
return s;
}
@@ -3344,51 +3501,52 @@ String String::replacen(const String &p_key, const String &p_with) const {
String String::repeat(int p_count) const {
ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
- String new_string;
- const char32_t *src = this->get_data();
-
- new_string.resize(length() * p_count + 1);
- new_string[length() * p_count] = 0;
-
- for (int i = 0; i < p_count; i++) {
- for (int j = 0; j < length(); j++) {
- new_string[i * length() + j] = src[j];
- }
- }
-
+ int len = length();
+ String new_string = *this;
+ new_string.resize(p_count * len + 1);
+
+ char32_t *dst = new_string.ptrw();
+ int offset = 1;
+ int stride = 1;
+ while (offset < p_count) {
+ memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
+ offset += stride;
+ stride = MIN(stride * 2, p_count - offset);
+ }
+ dst[p_count * len] = _null;
return new_string;
}
-String String::left(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::left(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(0, p_pos);
+ return substr(0, p_len);
}
-String String::right(int p_pos) const {
- if (p_pos < 0) {
- p_pos = length() + p_pos;
+String String::right(int p_len) const {
+ if (p_len < 0) {
+ p_len = length() + p_len;
}
- if (p_pos <= 0) {
+ if (p_len <= 0) {
return "";
}
- if (p_pos >= length()) {
+ if (p_len >= length()) {
return *this;
}
- return substr(length() - p_pos);
+ return substr(length() - p_len);
}
char32_t String::unicode_at(int p_idx) const {
@@ -3545,29 +3703,43 @@ bool String::is_network_share_path() const {
String String::simplify_path() const {
String s = *this;
String drive;
- if (s.begins_with("local://")) {
- drive = "local://";
- s = s.substr(8, s.length());
- } else if (s.begins_with("res://")) {
- drive = "res://";
- s = s.substr(6, s.length());
- } else if (s.begins_with("user://")) {
- drive = "user://";
- s = s.substr(7, s.length());
- } else if (is_network_share_path()) {
- drive = s.substr(0, 2);
- s = s.substr(2, s.length() - 2);
- } else if (s.begins_with("/") || s.begins_with("\\")) {
- drive = s.substr(0, 1);
- s = s.substr(1, s.length() - 1);
- } else {
- int p = s.find(":/");
- if (p == -1) {
- p = s.find(":\\");
+
+ // Check if we have a special path (like res://) or a protocol identifier.
+ int p = s.find("://");
+ bool found = false;
+ if (p > 0) {
+ bool only_chars = true;
+ for (int i = 0; i < p; i++) {
+ if (!is_ascii_alphanumeric_char(s[i])) {
+ only_chars = false;
+ break;
+ }
}
- if (p != -1 && p < s.find("/")) {
- drive = s.substr(0, p + 2);
- s = s.substr(p + 2, s.length());
+ if (only_chars) {
+ found = true;
+ drive = s.substr(0, p + 3);
+ s = s.substr(p + 3);
+ }
+ }
+ if (!found) {
+ if (is_network_share_path()) {
+ // Network path, beginning with // or \\.
+ drive = s.substr(0, 2);
+ s = s.substr(2);
+ } else if (s.begins_with("/") || s.begins_with("\\")) {
+ // Absolute path.
+ drive = s.substr(0, 1);
+ s = s.substr(1);
+ } else {
+ // Windows-style drive path, like C:/ or C:\.
+ p = s.find(":/");
+ if (p == -1) {
+ p = s.find(":\\");
+ }
+ if (p != -1 && p < s.find("/")) {
+ drive = s.substr(0, p + 2);
+ s = s.substr(p + 2);
+ }
}
}
@@ -3762,7 +3934,6 @@ String String::c_unescape() const {
escaped = escaped.replace("\\v", "\v");
escaped = escaped.replace("\\'", "\'");
escaped = escaped.replace("\\\"", "\"");
- escaped = escaped.replace("\\?", "\?");
escaped = escaped.replace("\\\\", "\\");
return escaped;
@@ -3779,7 +3950,6 @@ String String::c_escape() const {
escaped = escaped.replace("\t", "\\t");
escaped = escaped.replace("\v", "\\v");
escaped = escaped.replace("\'", "\\'");
- escaped = escaped.replace("\?", "\\?");
escaped = escaped.replace("\"", "\\\"");
return escaped;
@@ -4343,7 +4513,7 @@ String String::get_extension() const {
return substr(pos + 1, length());
}
-String String::plus_file(const String &p_file) const {
+String String::path_join(const String &p_file) const {
if (is_empty()) {
return p_file;
}
@@ -4529,15 +4699,18 @@ String String::sprintf(const Array &values, bool *error) const {
double value = values[value_index];
bool is_negative = (value < 0);
String str = String::num(ABS(value), min_decimals);
+ const bool is_finite = Math::is_finite(value);
// Pad decimals out.
- str = str.pad_decimals(min_decimals);
+ if (is_finite) {
+ str = str.pad_decimals(min_decimals);
+ }
int initial_len = str.length();
// Padding. Leave room for sign later if required.
int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
- String pad_char = pad_with_zeros ? String("0") : String(" ");
+ String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros
if (left_justified) {
str = str.rpad(pad_chars_count, pad_char);
} else {
@@ -4559,6 +4732,76 @@ String String::sprintf(const Array &values, bool *error) const {
in_format = false;
break;
}
+ case 'v': { // Vector2/3/4/2i/3i/4i
+ if (value_index >= values.size()) {
+ return "not enough arguments for format string";
+ }
+
+ int count;
+ switch (values[value_index].get_type()) {
+ case Variant::VECTOR2:
+ case Variant::VECTOR2I: {
+ count = 2;
+ } break;
+ case Variant::VECTOR3:
+ case Variant::VECTOR3I: {
+ count = 3;
+ } break;
+ case Variant::VECTOR4:
+ case Variant::VECTOR4I: {
+ count = 4;
+ } break;
+ default: {
+ return "%v requires a vector type (Vector2/3/4/2i/3i/4i)";
+ }
+ }
+
+ Vector4 vec = values[value_index];
+ String str = "(";
+ for (int i = 0; i < count; i++) {
+ double val = vec[i];
+ String number_str = String::num(ABS(val), min_decimals);
+ const bool is_finite = Math::is_finite(val);
+
+ // Pad decimals out.
+ if (is_finite) {
+ number_str = number_str.pad_decimals(min_decimals);
+ }
+
+ int initial_len = number_str.length();
+
+ // Padding. Leave room for sign later if required.
+ int pad_chars_count = val < 0 ? min_chars - 1 : min_chars;
+ String pad_char = (pad_with_zeros && is_finite) ? String("0") : String(" "); // Never pad NaN or inf with zeros
+ if (left_justified) {
+ number_str = number_str.rpad(pad_chars_count, pad_char);
+ } else {
+ number_str = number_str.lpad(pad_chars_count, pad_char);
+ }
+
+ // Add sign if needed.
+ if (val < 0) {
+ if (left_justified) {
+ number_str = number_str.insert(0, "-");
+ } else {
+ number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, "-");
+ }
+ }
+
+ // Add number to combined string
+ str += number_str;
+
+ if (i < count - 1) {
+ str += ", ";
+ }
+ }
+ str += ")";
+
+ formatted += str;
+ ++value_index;
+ in_format = false;
+ break;
+ }
case 's': { // String
if (value_index >= values.size()) {
return "not enough arguments for format string";
@@ -4651,7 +4894,7 @@ String String::sprintf(const Array &values, bool *error) const {
}
break;
}
- case '.': { // Float separator.
+ case '.': { // Float/Vector separator.
if (in_decimals) {
return "too many decimal points in format";
}
@@ -4665,8 +4908,12 @@ String String::sprintf(const Array &values, bool *error) const {
return "not enough arguments for format string";
}
- if (!values[value_index].is_num()) {
- return "* wants number";
+ Variant::Type value_type = values[value_index].get_type();
+ if (!values[value_index].is_num() &&
+ value_type != Variant::VECTOR2 && value_type != Variant::VECTOR2I &&
+ value_type != Variant::VECTOR3 && value_type != Variant::VECTOR3I &&
+ value_type != Variant::VECTOR4 && value_type != Variant::VECTOR4I) {
+ return "* wants number or vector";
}
int size = values[value_index];