diff options
Diffstat (limited to 'core/string/ustring.cpp')
-rw-r--r-- | core/string/ustring.cpp | 273 |
1 files changed, 130 insertions, 143 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index d630e987ea..a57c7b2504 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -427,12 +427,12 @@ String operator+(char32_t p_chr, const String &p_str) { } String &String::operator+=(const String &p_str) { - if (empty()) { + if (is_empty()) { *this = p_str; return *this; } - if (p_str.empty()) { + if (p_str.is_empty()) { return *this; } @@ -519,7 +519,7 @@ bool String::operator==(const char *p_str) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -558,7 +558,7 @@ bool String::operator==(const char32_t *p_str) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -580,7 +580,7 @@ bool String::operator==(const String &p_str) const { if (length() != p_str.length()) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -605,7 +605,7 @@ bool String::operator==(const StrRange &p_str_range) const { if (length() != len) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -678,20 +678,20 @@ bool String::operator>=(const String &p_str) const { } bool String::operator<(const char *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } return is_str_less(get_data(), p_str); } bool String::operator<(const wchar_t *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -705,10 +705,10 @@ bool String::operator<(const wchar_t *p_str) const { } bool String::operator<(const char32_t *p_str) const { - if (empty() && p_str[0] == 0) { + if (is_empty() && p_str[0] == 0) { return false; } - if (empty()) { + if (is_empty()) { return true; } @@ -720,13 +720,13 @@ bool String::operator<(const String &p_str) const { } signed char String::nocasecmp_to(const String &p_str) const { - if (empty() && p_str.empty()) { + if (is_empty() && p_str.is_empty()) { return 0; } - if (empty()) { + if (is_empty()) { return -1; } - if (p_str.empty()) { + if (p_str.is_empty()) { return 1; } @@ -752,13 +752,13 @@ signed char String::nocasecmp_to(const String &p_str) const { } signed char String::casecmp_to(const String &p_str) const { - if (empty() && p_str.empty()) { + if (is_empty() && p_str.is_empty()) { return 0; } - if (empty()) { + if (is_empty()) { return -1; } - if (p_str.empty()) { + if (p_str.is_empty()) { return 1; } @@ -949,10 +949,10 @@ String String::get_with_code_lines() const { } int String::get_slice_count(String p_splitter) const { - if (empty()) { + if (is_empty()) { return 0; } - if (p_splitter.empty()) { + if (p_splitter.is_empty()) { return 0; } @@ -968,7 +968,7 @@ int String::get_slice_count(String p_splitter) const { } String String::get_slice(String p_splitter, int p_slice) const { - if (empty() || p_splitter.empty()) { + if (is_empty() || p_splitter.is_empty()) { return ""; } @@ -1008,7 +1008,7 @@ String String::get_slice(String p_splitter, int p_slice) const { } String String::get_slicec(char32_t p_splitter, int p_slice) const { - if (empty()) { + if (is_empty()) { return String(); } @@ -1409,8 +1409,9 @@ String String::num(double p_num, int p_decimals) { if (digit == MAX_DIGITS) //no point in going to infinite break; - if ((dec - (double)((int)dec)) < 1e-6) + if (dec - (double)((int)dec) < 1e-6) { break; + } } if (digit == p_decimals) @@ -2096,8 +2097,9 @@ String::String(const StrRange &p_range) { copy_from(p_range.c_str, p_range.len); } -int64_t String::hex_to_int(bool p_with_prefix) const { - if (p_with_prefix && length() < 3) { +int64_t String::hex_to_int() const { + int len = length(); + if (len == 0) { return 0; } @@ -2109,10 +2111,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const { s++; } - if (p_with_prefix) { - if (s[0] != '0' || s[1] != 'x') { - return 0; - } + if (len > 2 && s[0] == '0' && s[1] == 'x') { s += 2; } @@ -2139,8 +2138,9 @@ int64_t String::hex_to_int(bool p_with_prefix) const { return hex * sign; } -int64_t String::bin_to_int(bool p_with_prefix) const { - if (p_with_prefix && length() < 3) { +int64_t String::bin_to_int() const { + int len = length(); + if (len == 0) { return 0; } @@ -2152,10 +2152,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const { s++; } - if (p_with_prefix) { - if (s[0] != '0' || s[1] != 'b') { - return 0; - } + if (len > 2 && s[0] == '0' && s[1] == 'b') { s += 2; } @@ -2585,7 +2582,7 @@ int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { } double String::to_float() const { - if (empty()) { + if (is_empty()) { return 0; } return built_in_strtod<char32_t>(get_data()); @@ -2767,7 +2764,7 @@ String String::substr(int p_from, int p_chars) const { p_chars = length() - p_from; } - if (empty() || p_from < 0 || p_from >= length() || p_chars <= 0) { + if (is_empty() || p_from < 0 || p_from >= length() || p_chars <= 0) { return ""; } @@ -3074,35 +3071,47 @@ int String::rfindn(const String &p_str, int p_from) const { } bool String::ends_with(const String &p_string) const { - int pos = rfind(p_string); - if (pos == -1) { + int l = p_string.length(); + if (l > length()) { return false; } - return pos + p_string.length() == length(); + + if (l == 0) { + return true; + } + + const char32_t *p = &p_string[0]; + const char32_t *s = &operator[](length() - l); + + for (int i = 0; i < l; i++) { + if (p[i] != s[i]) { + return false; + } + } + + return true; } bool String::begins_with(const String &p_string) const { - if (p_string.length() > length()) { + int l = p_string.length(); + if (l > length()) { return false; } - int l = p_string.length(); if (l == 0) { return true; } - const char32_t *src = &p_string[0]; - const char32_t *str = &operator[](0); + const char32_t *p = &p_string[0]; + const char32_t *s = &operator[](0); - int i = 0; - for (; i < l; i++) { - if (src[i] != str[i]) { + for (int i = 0; i < l; i++) { + if (p[i] != s[i]) { return false; } } - // only if i == l the p_string matches the beginning - return i == l; + return true; } bool String::begins_with(const char *p_string) const { @@ -3142,7 +3151,7 @@ bool String::is_quoted() const { } int String::_count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const { - if (p_string.empty()) { + if (p_string.is_empty()) { return 0; } int len = length(); @@ -3250,8 +3259,8 @@ float String::similarity(const String &p_string) const { int src_size = src_bigrams.size(); int tgt_size = tgt_bigrams.size(); - double sum = src_size + tgt_size; - double inter = 0; + int sum = src_size + tgt_size; + int inter = 0; for (int i = 0; i < src_size; i++) { for (int j = 0; j < tgt_size; j++) { if (src_bigrams[i] == tgt_bigrams[j]) { @@ -3478,7 +3487,7 @@ String String::right(int p_pos) const { return substr(p_pos, (length() - p_pos)); } -char32_t String::ord_at(int p_idx) const { +char32_t String::unicode_at(int p_idx) const { ERR_FAIL_INDEX_V(p_idx, length(), 0); return operator[](p_idx); } @@ -3750,7 +3759,7 @@ bool String::is_valid_string() const { return valid; } -String String::http_escape() const { +String String::uri_encode() const { const CharString temp = utf8(); String res; for (int i = 0; i < temp.length(); ++i) { @@ -3774,23 +3783,25 @@ String String::http_escape() const { return res; } -String String::http_unescape() const { +String String::uri_decode() const { String res; for (int i = 0; i < length(); ++i) { - if (ord_at(i) == '%' && i + 2 < length()) { - char32_t ord1 = ord_at(i + 1); + if (unicode_at(i) == '%' && i + 2 < length()) { + char32_t ord1 = unicode_at(i + 1); if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) { - char32_t ord2 = ord_at(i + 2); + char32_t ord2 = unicode_at(i + 2); if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; } } else { - res += ord_at(i); + res += unicode_at(i); } + } else if (unicode_at(i) == '+') { + res += ' '; } else { - res += ord_at(i); + res += unicode_at(i); } } return String::utf8(res.ascii()); @@ -3877,25 +3888,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch if (p_src_len >= 4 && p_src[1] == '#') { char32_t c = 0; - - for (int i = 2; i < p_src_len; i++) { - eat = i + 1; - char32_t ct = p_src[i]; - if (ct == ';') { - break; - } else if (ct >= '0' && ct <= '9') { - ct = ct - '0'; - } else if (ct >= 'a' && ct <= 'f') { - ct = (ct - 'a') + 10; - } else if (ct >= 'A' && ct <= 'F') { - ct = (ct - 'A') + 10; - } else { - continue; + bool overflow = false; + if (p_src[2] == 'x') { + // Hex entity &#x<num>; + for (int i = 3; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';') { + break; + } else if (ct >= '0' && ct <= '9') { + ct = ct - '0'; + } else if (ct >= 'a' && ct <= 'f') { + ct = (ct - 'a') + 10; + } else if (ct >= 'A' && ct <= 'F') { + ct = (ct - 'A') + 10; + } else { + break; + } + if (c > (UINT32_MAX >> 4)) { + overflow = true; + break; + } + c <<= 4; + c |= ct; + } + } else { + // Decimal entity &#<num>; + for (int i = 2; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';' || ct < '0' || ct > '9') { + break; + } + } + if (p_src[eat - 1] == ';') { + int64_t val = String::to_int(p_src + 2, eat - 3); + if (val > 0 && val <= UINT32_MAX) { + c = (char32_t)val; + } else { + overflow = true; + } } - c <<= 4; - c |= ct; } + // Value must be non-zero, in the range of char32_t, + // actually end with ';'. If invalid, leave the entity as-is + if (c == '\0' || overflow || p_src[eat - 1] != ';') { + eat = 1; + c = *p_src; + } if (p_dst) { *p_dst = c; } @@ -4241,11 +4282,11 @@ bool String::is_valid_ip_address() const { Vector<String> ip = split(":"); for (int i = 0; i < ip.size(); i++) { String n = ip[i]; - if (n.empty()) { + if (n.is_empty()) { continue; } if (n.is_valid_hex_number(false)) { - int64_t nint = n.hex_to_int(false); + int64_t nint = n.hex_to_int(); if (nint < 0 || nint > 0xffff) { return false; } @@ -4285,7 +4326,10 @@ bool String::is_rel_path() const { } String String::get_base_dir() const { - int basepos = find("://"); + int basepos = find(":/"); + if (basepos == -1) { + basepos = find(":\\"); + } String rs; String base; if (basepos != -1) { @@ -4328,7 +4372,7 @@ String String::get_extension() const { } String String::plus_file(const String &p_file) const { - if (empty()) { + if (is_empty()) { return p_file; } if (operator[](length() - 1) == '/' || (p_file.size() > 0 && p_file.operator[](0) == '/')) { @@ -4337,63 +4381,6 @@ String String::plus_file(const String &p_file) const { return *this + "/" + p_file; } -String String::percent_encode() const { - CharString cs = utf8(); - String encoded; - for (int i = 0; i < cs.length(); i++) { - uint8_t c = cs[i]; - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '~' || c == '.') { - char p[2] = { (char)c, 0 }; - encoded += p; - } else { - char p[4] = { '%', 0, 0, 0 }; - static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - - p[1] = hex[c >> 4]; - p[2] = hex[c & 0xF]; - encoded += p; - } - } - - return encoded; -} - -String String::percent_decode() const { - CharString pe; - - CharString cs = utf8(); - for (int i = 0; i < cs.length(); i++) { - uint8_t c = cs[i]; - if (c == '%' && i < length() - 2) { - uint8_t a = LOWERCASE(cs[i + 1]); - uint8_t b = LOWERCASE(cs[i + 2]); - - if (a >= '0' && a <= '9') { - c = (a - '0') << 4; - } else if (a >= 'a' && a <= 'f') { - c = (a - 'a' + 10) << 4; - } else { - continue; - } - - uint8_t d = 0; - - if (b >= '0' && b <= '9') { - d = (b - '0'); - } else if (b >= 'a' && b <= 'f') { - d = (b - 'a' + 10); - } else { - continue; - } - c += d; - i += 2; - } - pe += c; - } - - return String::utf8(pe.ptr()); -} - String String::property_name_encode() const { // Escape and quote strings with extended ASCII or further Unicode characters // as well as '"', '=' or ' ' (32) @@ -4757,7 +4744,7 @@ String String::unquote() const { Vector<uint8_t> String::to_ascii_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } CharString charstr = s->ascii(); @@ -4773,7 +4760,7 @@ Vector<uint8_t> String::to_ascii_buffer() const { Vector<uint8_t> String::to_utf8_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } CharString charstr = s->utf8(); @@ -4789,7 +4776,7 @@ Vector<uint8_t> String::to_utf8_buffer() const { Vector<uint8_t> String::to_utf16_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } Char16String charstr = s->utf16(); @@ -4805,7 +4792,7 @@ Vector<uint8_t> String::to_utf16_buffer() const { Vector<uint8_t> String::to_utf32_buffer() const { const String *s = this; - if (s->empty()) { + if (s->is_empty()) { return Vector<uint8_t>(); } |