diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2020-07-27 13:43:20 +0300 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2020-09-03 19:56:24 +0300 |
commit | 80b8eff6aa41ba79175a5152ba5b2b9b16f6de3f (patch) | |
tree | 39ed96f7b9062e2f4ae1e20560fdb1f2f04c4d67 /core | |
parent | 0864f12f0de50ffecbc9964cdf4edbae75e27be5 (diff) |
[Complex Test Layouts] Change `String` to use UTF-32 encoding on all platforms.
Diffstat (limited to 'core')
-rw-r--r-- | core/color.cpp | 4 | ||||
-rw-r--r-- | core/cowdata.h | 2 | ||||
-rw-r--r-- | core/hashfuncs.h | 2 | ||||
-rw-r--r-- | core/io/file_access_pack.cpp | 2 | ||||
-rw-r--r-- | core/io/file_access_zip.cpp | 4 | ||||
-rw-r--r-- | core/io/ip_address.cpp | 4 | ||||
-rw-r--r-- | core/io/json.cpp | 20 | ||||
-rw-r--r-- | core/io/json.h | 8 | ||||
-rw-r--r-- | core/io/xml_parser.cpp | 4 | ||||
-rw-r--r-- | core/math/expression.cpp | 20 | ||||
-rw-r--r-- | core/method_bind.h | 14 | ||||
-rw-r--r-- | core/os/file_access.cpp | 8 | ||||
-rw-r--r-- | core/string_buffer.h | 24 | ||||
-rw-r--r-- | core/string_builder.cpp | 4 | ||||
-rw-r--r-- | core/string_name.cpp | 2 | ||||
-rw-r--r-- | core/string_name.h | 2 | ||||
-rw-r--r-- | core/type_info.h | 3 | ||||
-rw-r--r-- | core/ustring.cpp | 1114 | ||||
-rw-r--r-- | core/ustring.h | 178 | ||||
-rw-r--r-- | core/variant.cpp | 4 | ||||
-rw-r--r-- | core/variant.h | 4 | ||||
-rw-r--r-- | core/variant_call.cpp | 63 | ||||
-rw-r--r-- | core/variant_op.cpp | 2 | ||||
-rw-r--r-- | core/variant_parser.cpp | 30 | ||||
-rw-r--r-- | core/variant_parser.h | 8 |
25 files changed, 1068 insertions, 462 deletions
diff --git a/core/color.cpp b/core/color.cpp index c85cd9100d..c61ee0e64a 100644 --- a/core/color.cpp +++ b/core/color.cpp @@ -390,7 +390,7 @@ String _to_hex(float p_val) { String ret; for (int i = 0; i < 2; i++) { - CharType c[2] = { 0, 0 }; + char32_t c[2] = { 0, 0 }; int lv = v & 0xF; if (lv < 10) { c[0] = '0' + lv; @@ -399,7 +399,7 @@ String _to_hex(float p_val) { } v >>= 4; - String cs = (const CharType *)c; + String cs = (const char32_t *)c; ret = cs + ret; } diff --git a/core/cowdata.h b/core/cowdata.h index 82daefb5bd..79676e6d80 100644 --- a/core/cowdata.h +++ b/core/cowdata.h @@ -40,6 +40,7 @@ template <class T> class Vector; class String; +class Char16String; class CharString; template <class T, class V> class VMap; @@ -49,6 +50,7 @@ class CowData { template <class TV> friend class Vector; friend class String; + friend class Char16String; friend class CharString; template <class TV, class VV> friend class VMap; diff --git a/core/hashfuncs.h b/core/hashfuncs.h index d984f6c524..f4048843fc 100644 --- a/core/hashfuncs.h +++ b/core/hashfuncs.h @@ -146,6 +146,8 @@ struct HashMapHasherDefault { static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return p_int; } static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return (uint32_t)p_int; } static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return (uint32_t)p_wchar; } + static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return (uint32_t)p_uchar; } + static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return (uint32_t)p_uchar; } static _FORCE_INLINE_ uint32_t hash(const RID &p_rid) { return hash_one_uint64(p_rid.get_id()); } static _FORCE_INLINE_ uint32_t hash(const StringName &p_string_name) { return p_string_name.hash(); } diff --git a/core/io/file_access_pack.cpp b/core/io/file_access_pack.cpp index 024ec3b2b5..cc8d68be83 100644 --- a/core/io/file_access_pack.cpp +++ b/core/io/file_access_pack.cpp @@ -46,7 +46,7 @@ Error PackedData::add_pack(const String &p_path, bool p_replace_files, size_t p_ void PackedData::add_path(const String &pkg_path, const String &path, uint64_t ofs, uint64_t size, const uint8_t *p_md5, PackSource *p_src, bool p_replace_files) { PathMD5 pmd5(path.md5_buffer()); - //printf("adding path %ls, %lli, %lli\n", path.c_str(), pmd5.a, pmd5.b); + //printf("adding path %s, %lli, %lli\n", path.utf8().get_data(), pmd5.a, pmd5.b); bool exists = files.has(pmd5); diff --git a/core/io/file_access_zip.cpp b/core/io/file_access_zip.cpp index 974bb65a18..d75ca2fdc6 100644 --- a/core/io/file_access_zip.cpp +++ b/core/io/file_access_zip.cpp @@ -148,7 +148,7 @@ unzFile ZipArchive::get_file_handle(String p_file) const { } bool ZipArchive::try_open_pack(const String &p_path, bool p_replace_files, size_t p_offset = 0) { - //printf("opening zip pack %ls, %i, %i\n", p_name.c_str(), p_name.extension().nocasecmp_to("zip"), p_name.extension().nocasecmp_to("pcz")); + //printf("opening zip pack %s, %i, %i\n", p_name.utf8().get_data(), p_name.extension().nocasecmp_to("zip"), p_name.extension().nocasecmp_to("pcz")); // load with offset feature only supported for PCK files ERR_FAIL_COND_V_MSG(p_offset != 0, false, "Invalid PCK data. Note that loading files with a non-zero offset isn't supported with ZIP archives."); @@ -201,7 +201,7 @@ bool ZipArchive::try_open_pack(const String &p_path, bool p_replace_files, size_ uint8_t md5[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; PackedData::get_singleton()->add_path(p_path, fname, 1, 0, md5, this, p_replace_files); - //printf("packed data add path %ls, %ls\n", p_name.c_str(), fname.c_str()); + //printf("packed data add path %s, %s\n", p_name.utf8().get_data(), fname.utf8().get_data()); if ((i + 1) < gi.number_entry) { unzGoToNextFile(zfile); diff --git a/core/io/ip_address.cpp b/core/io/ip_address.cpp index c7a0ae5605..d0fb63b958 100644 --- a/core/io/ip_address.cpp +++ b/core/io/ip_address.cpp @@ -71,7 +71,7 @@ static void _parse_hex(const String &p_string, int p_start, uint8_t *p_dst) { } int n = 0; - CharType c = p_string[i]; + char32_t c = p_string[i]; if (c >= '0' && c <= '9') { n = c - '0'; } else if (c >= 'a' && c <= 'f') { @@ -101,7 +101,7 @@ void IP_Address::_parse_ipv6(const String &p_string) { int parts_idx = 0; for (int i = 0; i < p_string.length(); i++) { - CharType c = p_string[i]; + char32_t c = p_string[i]; if (c == ':') { if (i == 0) { continue; // next must be a ":" diff --git a/core/io/json.cpp b/core/io/json.cpp index 8bdd6385cb..1b89d966fd 100644 --- a/core/io/json.cpp +++ b/core/io/json.cpp @@ -125,7 +125,7 @@ String JSON::print(const Variant &p_var, const String &p_indent, bool p_sort_key return _print_var(p_var, p_indent, 0, p_sort_keys); } -Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str) { +Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str) { while (p_len > 0) { switch (p_str[index]) { case '\n': { @@ -180,12 +180,12 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to } else if (p_str[index] == '\\') { //escaped characters... index++; - CharType next = p_str[index]; + char32_t next = p_str[index]; if (next == 0) { r_err_str = "Unterminated String"; return ERR_PARSE_ERROR; } - CharType res = 0; + char32_t res = 0; switch (next) { case 'b': @@ -206,7 +206,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to case 'u': { // hex number for (int j = 0; j < 4; j++) { - CharType c = p_str[index + j + 1]; + char32_t c = p_str[index + j + 1]; if (c == 0) { r_err_str = "Unterminated String"; return ERR_PARSE_ERROR; @@ -215,7 +215,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to r_err_str = "Malformed hex constant in string"; return ERR_PARSE_ERROR; } - CharType v; + char32_t v; if (c >= '0' && c <= '9') { v = c - '0'; } else if (c >= 'a' && c <= 'f') { @@ -264,7 +264,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to if (p_str[index] == '-' || (p_str[index] >= '0' && p_str[index] <= '9')) { //a number - const CharType *rptr; + const char32_t *rptr; double number = String::to_float(&p_str[index], &rptr); index += (rptr - &p_str[index]); r_token.type = TK_NUMBER; @@ -293,7 +293,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to return ERR_PARSE_ERROR; } -Error JSON::_parse_value(Variant &value, Token &token, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) { +Error JSON::_parse_value(Variant &value, Token &token, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) { if (token.type == TK_CURLY_BRACKET_OPEN) { Dictionary d; Error err = _parse_object(d, p_str, index, p_len, line, r_err_str); @@ -337,7 +337,7 @@ Error JSON::_parse_value(Variant &value, Token &token, const CharType *p_str, in } } -Error JSON::_parse_array(Array &array, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) { +Error JSON::_parse_array(Array &array, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) { Token token; bool need_comma = false; @@ -375,7 +375,7 @@ Error JSON::_parse_array(Array &array, const CharType *p_str, int &index, int p_ return ERR_PARSE_ERROR; } -Error JSON::_parse_object(Dictionary &object, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) { +Error JSON::_parse_object(Dictionary &object, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) { bool at_key = true; String key; Token token; @@ -439,7 +439,7 @@ Error JSON::_parse_object(Dictionary &object, const CharType *p_str, int &index, } Error JSON::parse(const String &p_json, Variant &r_ret, String &r_err_str, int &r_err_line) { - const CharType *str = p_json.ptr(); + const char32_t *str = p_json.ptr(); int idx = 0; int len = p_json.length(); Token token; diff --git a/core/io/json.h b/core/io/json.h index 4fc5630a93..9122228163 100644 --- a/core/io/json.h +++ b/core/io/json.h @@ -65,10 +65,10 @@ class JSON { static String _print_var(const Variant &p_var, const String &p_indent, int p_cur_indent, bool p_sort_keys); - static Error _get_token(const CharType *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str); - static Error _parse_value(Variant &value, Token &token, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str); - static Error _parse_array(Array &array, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str); - static Error _parse_object(Dictionary &object, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str); + static Error _get_token(const char32_t *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str); + static Error _parse_value(Variant &value, Token &token, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str); + static Error _parse_array(Array &array, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str); + static Error _parse_object(Dictionary &object, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str); public: static String print(const Variant &p_var, const String &p_indent = "", bool p_sort_keys = true); diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp index b11267b60f..fc75ac7d1e 100644 --- a/core/io/xml_parser.cpp +++ b/core/io/xml_parser.cpp @@ -36,7 +36,7 @@ VARIANT_ENUM_CAST(XMLParser::NodeType); -static bool _equalsn(const CharType *str1, const CharType *str2, int len) { +static bool _equalsn(const char32_t *str1, const char32_t *str2, int len) { int i; for (i = 0; i < len && str1[i] && str2[i]; ++i) { if (str1[i] != str2[i]) { @@ -64,7 +64,7 @@ String XMLParser::_replace_special_characters(const String &origstr) { int specialChar = -1; for (int i = 0; i < (int)special_characters.size(); ++i) { - const CharType *p = &origstr[pos] + 1; + const char32_t *p = &origstr[pos] + 1; if (_equalsn(&special_characters[i][1], p, special_characters[i].length() - 1)) { specialChar = i; diff --git a/core/math/expression.cpp b/core/math/expression.cpp index 735a30f6cc..1040f9e0e4 100644 --- a/core/math/expression.cpp +++ b/core/math/expression.cpp @@ -596,7 +596,7 @@ void Expression::exec_func(BuiltinFunc p_func, const Variant **p_inputs, Variant } break; case TEXT_CHAR: { - CharType result[2] = { *p_inputs[0], 0 }; + char32_t result[2] = { *p_inputs[0], 0 }; *r_return = String(result); @@ -739,7 +739,7 @@ void Expression::exec_func(BuiltinFunc p_func, const Variant **p_inputs, Variant //////// -static bool _is_number(CharType c) { +static bool _is_number(char32_t c) { return (c >= '0' && c <= '9'); } @@ -747,7 +747,7 @@ Error Expression::_get_token(Token &r_token) { while (true) { #define GET_CHAR() (str_ofs >= expression.length() ? 0 : expression[str_ofs++]) - CharType cchar = GET_CHAR(); + char32_t cchar = GET_CHAR(); switch (cchar) { case 0: { @@ -900,7 +900,7 @@ Error Expression::_get_token(Token &r_token) { case '"': { String str; while (true) { - CharType ch = GET_CHAR(); + char32_t ch = GET_CHAR(); if (ch == 0) { _set_error("Unterminated String"); @@ -912,13 +912,13 @@ Error Expression::_get_token(Token &r_token) { } else if (ch == '\\') { //escaped characters... - CharType next = GET_CHAR(); + char32_t next = GET_CHAR(); if (next == 0) { _set_error("Unterminated String"); r_token.type = TK_ERROR; return ERR_PARSE_ERROR; } - CharType res = 0; + char32_t res = 0; switch (next) { case 'b': @@ -939,7 +939,7 @@ Error Expression::_get_token(Token &r_token) { case 'u': { // hex number for (int j = 0; j < 4; j++) { - CharType c = GET_CHAR(); + char32_t c = GET_CHAR(); if (c == 0) { _set_error("Unterminated String"); @@ -951,7 +951,7 @@ Error Expression::_get_token(Token &r_token) { r_token.type = TK_ERROR; return ERR_PARSE_ERROR; } - CharType v; + char32_t v; if (_is_number(c)) { v = c - '0'; } else if (c >= 'a' && c <= 'f') { @@ -992,7 +992,7 @@ Error Expression::_get_token(Token &r_token) { break; } - CharType next_char = (str_ofs >= expression.length()) ? 0 : expression[str_ofs]; + char32_t next_char = (str_ofs >= expression.length()) ? 0 : expression[str_ofs]; if (_is_number(cchar) || (cchar == '.' && _is_number(next_char))) { //a number @@ -1004,7 +1004,7 @@ Error Expression::_get_token(Token &r_token) { #define READING_DONE 4 int reading = READING_INT; - CharType c = cchar; + char32_t c = cchar; bool exp_sign = false; bool exp_beg = false; bool is_float = false; diff --git a/core/method_bind.h b/core/method_bind.h index ff2c771f81..942e2e0036 100644 --- a/core/method_bind.h +++ b/core/method_bind.h @@ -181,18 +181,18 @@ VARIANT_ENUM_CAST(Variant::Type); VARIANT_ENUM_CAST(Variant::Operator); template <> -struct VariantCaster<wchar_t> { - static _FORCE_INLINE_ wchar_t cast(const Variant &p_variant) { - return (wchar_t)p_variant.operator int(); +struct VariantCaster<char32_t> { + static _FORCE_INLINE_ char32_t cast(const Variant &p_variant) { + return (char32_t)p_variant.operator int(); } }; #ifdef PTRCALL_ENABLED template <> -struct PtrToArg<wchar_t> { - _FORCE_INLINE_ static wchar_t convert(const void *p_ptr) { - return wchar_t(*reinterpret_cast<const int *>(p_ptr)); +struct PtrToArg<char32_t> { + _FORCE_INLINE_ static char32_t convert(const void *p_ptr) { + return char32_t(*reinterpret_cast<const int *>(p_ptr)); } - _FORCE_INLINE_ static void encode(wchar_t p_val, const void *p_ptr) { + _FORCE_INLINE_ static void encode(char32_t p_val, const void *p_ptr) { *(int *)p_ptr = p_val; } }; diff --git a/core/os/file_access.cpp b/core/os/file_access.cpp index 20b3435911..9dbb2952f7 100644 --- a/core/os/file_access.cpp +++ b/core/os/file_access.cpp @@ -234,7 +234,7 @@ double FileAccess::get_double() const { String FileAccess::get_token() const { CharString token; - CharType c = get_8(); + char32_t c = get_8(); while (!eof_reached()) { if (c <= ' ') { @@ -299,7 +299,7 @@ public: String FileAccess::get_line() const { CharBuffer line; - CharType c = get_8(); + char32_t c = get_8(); while (!eof_reached()) { if (c == '\n' || c == '\0') { @@ -342,8 +342,8 @@ Vector<String> FileAccess::get_csv_line(const String &p_delim) const { bool in_quote = false; String current; for (int i = 0; i < l.length(); i++) { - CharType c = l[i]; - CharType s[2] = { 0, 0 }; + char32_t c = l[i]; + char32_t s[2] = { 0, 0 }; if (!in_quote && c == p_delim[0]) { strings.push_back(current); diff --git a/core/string_buffer.h b/core/string_buffer.h index f9cf31075a..a685720851 100644 --- a/core/string_buffer.h +++ b/core/string_buffer.h @@ -35,21 +35,21 @@ template <int SHORT_BUFFER_SIZE = 64> class StringBuffer { - CharType short_buffer[SHORT_BUFFER_SIZE]; + char32_t short_buffer[SHORT_BUFFER_SIZE]; String buffer; int string_length = 0; - _FORCE_INLINE_ CharType *current_buffer_ptr() { + _FORCE_INLINE_ char32_t *current_buffer_ptr() { return static_cast<String &>(buffer).empty() ? short_buffer : buffer.ptrw(); } public: - StringBuffer &append(CharType p_char); + StringBuffer &append(char32_t p_char); StringBuffer &append(const String &p_string); StringBuffer &append(const char *p_str); - StringBuffer &append(const CharType *p_str, int p_clip_to_len = -1); + StringBuffer &append(const char32_t *p_str, int p_clip_to_len = -1); - _FORCE_INLINE_ void operator+=(CharType p_char) { + _FORCE_INLINE_ void operator+=(char32_t p_char) { append(p_char); } @@ -61,7 +61,7 @@ public: append(p_str); } - _FORCE_INLINE_ void operator+=(const CharType *p_str) { + _FORCE_INLINE_ void operator+=(const char32_t *p_str) { append(p_str); } @@ -80,7 +80,7 @@ public: }; template <int SHORT_BUFFER_SIZE> -StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(CharType p_char) { +StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(char32_t p_char) { reserve(string_length + 2); current_buffer_ptr()[string_length++] = p_char; return *this; @@ -88,7 +88,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(CharTyp template <int SHORT_BUFFER_SIZE> StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const String &p_string) { - return append(p_string.c_str()); + return append(p_string.get_data()); } template <int SHORT_BUFFER_SIZE> @@ -96,7 +96,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const c int len = strlen(p_str); reserve(string_length + len + 1); - CharType *buf = current_buffer_ptr(); + char32_t *buf = current_buffer_ptr(); for (const char *c_ptr = p_str; *c_ptr; ++c_ptr) { buf[string_length++] = *c_ptr; } @@ -104,13 +104,13 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const c } template <int SHORT_BUFFER_SIZE> -StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const CharType *p_str, int p_clip_to_len) { +StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const char32_t *p_str, int p_clip_to_len) { int len = 0; while ((p_clip_to_len < 0 || len < p_clip_to_len) && p_str[len]) { ++len; } reserve(string_length + len + 1); - memcpy(&(current_buffer_ptr()[string_length]), p_str, len * sizeof(CharType)); + memcpy(&(current_buffer_ptr()[string_length]), p_str, len * sizeof(char32_t)); string_length += len; return *this; @@ -125,7 +125,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::reserve(int p_ bool need_copy = string_length > 0 && buffer.empty(); buffer.resize(next_power_of_2(p_size)); if (need_copy) { - memcpy(buffer.ptrw(), short_buffer, string_length * sizeof(CharType)); + memcpy(buffer.ptrw(), short_buffer, string_length * sizeof(char32_t)); } return *this; diff --git a/core/string_builder.cpp b/core/string_builder.cpp index c8d6498f27..dec299ffa3 100644 --- a/core/string_builder.cpp +++ b/core/string_builder.cpp @@ -61,7 +61,7 @@ String StringBuilder::as_string() const { return ""; } - CharType *buffer = memnew_arr(CharType, string_length); + char32_t *buffer = memnew_arr(char32_t, string_length); int current_position = 0; @@ -73,7 +73,7 @@ String StringBuilder::as_string() const { // Godot string const String &s = strings[godot_string_elem]; - memcpy(buffer + current_position, s.ptr(), s.length() * sizeof(CharType)); + memcpy(buffer + current_position, s.ptr(), s.length() * sizeof(char32_t)); current_position += s.length(); diff --git a/core/string_name.cpp b/core/string_name.cpp index cbf6009681..6260e3ce8c 100644 --- a/core/string_name.cpp +++ b/core/string_name.cpp @@ -317,7 +317,7 @@ StringName StringName::search(const char *p_name) { return StringName(); //does not exist } -StringName StringName::search(const CharType *p_name) { +StringName StringName::search(const char32_t *p_name) { ERR_FAIL_COND_V(!configured, StringName()); ERR_FAIL_COND_V(!p_name, StringName()); diff --git a/core/string_name.h b/core/string_name.h index 886ddd0ee7..4f90479bda 100644 --- a/core/string_name.h +++ b/core/string_name.h @@ -122,7 +122,7 @@ public: } static StringName search(const char *p_name); - static StringName search(const CharType *p_name); + static StringName search(const char32_t *p_name); static StringName search(const String &p_name); struct AlphCompare { diff --git a/core/type_info.h b/core/type_info.h index e3d2b5bd53..3c7f59bb84 100644 --- a/core/type_info.h +++ b/core/type_info.h @@ -132,7 +132,8 @@ MAKE_TYPE_INFO_WITH_META(uint32_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_ MAKE_TYPE_INFO_WITH_META(int32_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_INT32) MAKE_TYPE_INFO_WITH_META(uint64_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_UINT64) MAKE_TYPE_INFO_WITH_META(int64_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_INT64) -MAKE_TYPE_INFO(wchar_t, Variant::INT) +MAKE_TYPE_INFO(char16_t, Variant::INT) +MAKE_TYPE_INFO(char32_t, Variant::INT) MAKE_TYPE_INFO_WITH_META(float, Variant::FLOAT, GodotTypeInfo::METADATA_REAL_IS_FLOAT) MAKE_TYPE_INFO_WITH_META(double, Variant::FLOAT, GodotTypeInfo::METADATA_REAL_IS_DOUBLE) diff --git a/core/ustring.cpp b/core/ustring.cpp index 9d2d938eaf..d5afbc2b47 100644 --- a/core/ustring.cpp +++ b/core/ustring.cpp @@ -39,7 +39,6 @@ #include "core/ucaps.h" #include "core/variant.h" -#include <wchar.h> #include <cstdint> #ifndef NO_USE_STDLIB @@ -62,9 +61,10 @@ #define IS_HEX_DIGIT(m_d) (((m_d) >= '0' && (m_d) <= '9') || ((m_d) >= 'a' && (m_d) <= 'f') || ((m_d) >= 'A' && (m_d) <= 'F')) const char CharString::_null = 0; -const CharType String::_null = 0; +const char16_t Char16String::_null = 0; +const char32_t String::_null = 0; -bool is_symbol(CharType c) { +bool is_symbol(char32_t c) { return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' '); } @@ -96,9 +96,11 @@ bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) { } } -/** STRING **/ +/*************************************************************************/ +/* Char16String */ +/*************************************************************************/ -bool CharString::operator<(const CharString &p_right) const { +bool Char16String::operator<(const Char16String &p_right) const { if (length() == 0) { return p_right.length() != 0; } @@ -106,7 +108,7 @@ bool CharString::operator<(const CharString &p_right) const { return is_str_less(get_data(), p_right.get_data()); } -CharString &CharString::operator+=(char p_char) { +Char16String &Char16String::operator+=(char16_t p_char) { resize(size() ? size() + 1 : 2); set(length(), 0); set(length() - 1, p_char); @@ -114,19 +116,75 @@ CharString &CharString::operator+=(char p_char) { return *this; } -const char *CharString::get_data() const { +Char16String &Char16String::operator=(const char16_t *p_cstr) { + copy_from(p_cstr); + return *this; +} + +const char16_t *Char16String::get_data() const { if (size()) { return &operator[](0); } else { - return ""; + return u""; } } +void Char16String::copy_from(const char16_t *p_cstr) { + if (!p_cstr) { + resize(0); + return; + } + + const char16_t *s = p_cstr; + for (; *s; s++) { + } + size_t len = s - p_cstr; + + if (len == 0) { + resize(0); + return; + } + + Error err = resize(++len); // include terminating null char + + ERR_FAIL_COND_MSG(err != OK, "Failed to copy char16_t string."); + + memcpy(ptrw(), p_cstr, len * sizeof(char16_t)); +} + +/*************************************************************************/ +/* CharString */ +/*************************************************************************/ + +bool CharString::operator<(const CharString &p_right) const { + if (length() == 0) { + return p_right.length() != 0; + } + + return is_str_less(get_data(), p_right.get_data()); +} + +CharString &CharString::operator+=(char p_char) { + resize(size() ? size() + 1 : 2); + set(length(), 0); + set(length() - 1, p_char); + + return *this; +} + CharString &CharString::operator=(const char *p_cstr) { copy_from(p_cstr); return *this; } +const char *CharString::get_data() const { + if (size()) { + return &operator[](0); + } else { + return ""; + } +} + void CharString::copy_from(const char *p_cstr) { if (!p_cstr) { resize(0); @@ -147,7 +205,44 @@ void CharString::copy_from(const char *p_cstr) { memcpy(ptrw(), p_cstr, len); } +/*************************************************************************/ +/* String */ +/*************************************************************************/ + +//TODO: move to TextServer +//kind of poor should be rewritten properly +String String::word_wrap(int p_chars_per_line) const { + int from = 0; + int last_space = 0; + String ret; + for (int i = 0; i < length(); i++) { + if (i - from >= p_chars_per_line) { + if (last_space == -1) { + ret += substr(from, i - from + 1) + "\n"; + } else { + ret += substr(from, last_space - from) + "\n"; + i = last_space; //rewind + } + from = i + 1; + last_space = -1; + } else if (operator[](i) == ' ' || operator[](i) == '\t') { + last_space = i; + } else if (operator[](i) == '\n') { + ret += substr(from, i - from) + "\n"; + from = i + 1; + last_space = -1; + } + } + + if (from < length()) { + ret += substr(from, length()); + } + + return ret; +} + void String::copy_from(const char *p_cstr) { + // copy Latin-1 encoded c-string directly if (!p_cstr) { resize(0); return; @@ -166,21 +261,22 @@ void String::copy_from(const char *p_cstr) { resize(len + 1); // include 0 - CharType *dst = this->ptrw(); + char32_t *dst = this->ptrw(); for (int i = 0; i < len + 1; i++) { dst[i] = p_cstr[i]; } } -void String::copy_from(const CharType *p_cstr, const int p_clip_to) { +void String::copy_from(const char *p_cstr, const int p_clip_to) { + // copy Latin-1 encoded c-string directly if (!p_cstr) { resize(0); return; } int len = 0; - const CharType *ptr = p_cstr; + const char *ptr = p_cstr; while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) { len++; } @@ -190,55 +286,117 @@ void String::copy_from(const CharType *p_cstr, const int p_clip_to) { return; } - copy_from_unchecked(p_cstr, len); -} - -// assumes the following have already been validated: -// p_char != nullptr -// p_length > 0 -// p_length <= p_char strlen -void String::copy_from_unchecked(const CharType *p_char, const int p_length) { - resize(p_length + 1); - set(p_length, 0); + resize(len + 1); // include 0 - CharType *dst = ptrw(); + char32_t *dst = this->ptrw(); - for (int i = 0; i < p_length; i++) { - dst[i] = p_char[i]; + for (int i = 0; i < len; i++) { + dst[i] = p_cstr[i]; } + dst[len] = 0; } -void String::copy_from(const CharType &p_char) { +void String::copy_from(const wchar_t *p_cstr) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit, parse as UTF-16 + parse_utf16((const char16_t *)p_cstr); +#else + // wchar_t is 32-bit, copy directly + copy_from((const char32_t *)p_cstr); +#endif +} + +void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit, parse as UTF-16 + parse_utf16((const char16_t *)p_cstr, p_clip_to); +#else + // wchar_t is 32-bit, copy directly + copy_from((const char32_t *)p_cstr, p_clip_to); +#endif +} + +void String::copy_from(const char32_t &p_char) { resize(2); - set(0, p_char); + if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); + set(0, 0xfffd); + } else { + set(0, p_char); + } set(1, 0); } -bool String::operator==(const String &p_str) const { - if (length() != p_str.length()) { - return false; +void String::copy_from(const char32_t *p_cstr) { + if (!p_cstr) { + resize(0); + return; } - if (empty()) { - return true; + + int len = 0; + const char32_t *ptr = p_cstr; + while (*(ptr++) != 0) { + len++; } - int l = length(); + if (len == 0) { + resize(0); + return; + } - const CharType *src = c_str(); - const CharType *dst = p_str.c_str(); + copy_from_unchecked(p_cstr, len); +} - /* Compare char by char */ - for (int i = 0; i < l; i++) { - if (src[i] != dst[i]) { - return false; +void String::copy_from(const char32_t *p_cstr, const int p_clip_to) { + if (!p_cstr) { + resize(0); + return; + } + + int len = 0; + const char32_t *ptr = p_cstr; + while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) { + len++; + } + + if (len == 0) { + resize(0); + return; + } + + copy_from_unchecked(p_cstr, len); +} + +// assumes the following have already been validated: +// p_char != nullptr +// p_length > 0 +// p_length <= p_char strlen +void String::copy_from_unchecked(const char32_t *p_char, const int p_length) { + resize(p_length + 1); + set(p_length, 0); + + char32_t *dst = ptrw(); + + for (int i = 0; i < p_length; i++) { + if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + "."); + dst[i] = 0xfffd; + } else { + dst[i] = p_char[i]; } } +} - return true; +void String::operator=(const char *p_str) { + copy_from(p_str); } -bool String::operator!=(const String &p_str) const { - return !(*this == p_str); +void String::operator=(const char32_t *p_str) { + copy_from(p_str); +} + +void String::operator=(const wchar_t *p_str) { + copy_from(p_str); } String String::operator+(const String &p_str) const { @@ -247,6 +405,28 @@ String String::operator+(const String &p_str) const { return res; } +String operator+(const char *p_chr, const String &p_str) { + String tmp = p_chr; + tmp += p_str; + return tmp; +} + +String operator+(const wchar_t *p_chr, const String &p_str) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit + String tmp = String::utf16((const char16_t *)p_chr); +#else + // wchar_t is 32-bi + String tmp = (const char32_t *)p_chr; +#endif + tmp += p_str; + return tmp; +} + +String operator+(char32_t p_chr, const String &p_str) { + return (String::chr(p_chr) + p_str); +} + String &String::operator+=(const String &p_str) { if (empty()) { *this = p_str; @@ -261,8 +441,8 @@ String &String::operator+=(const String &p_str) { resize(length() + p_str.size()); - const CharType *src = p_str.c_str(); - CharType *dst = ptrw(); + const char32_t *src = p_str.get_data(); + char32_t *dst = ptrw(); set(length(), 0); @@ -273,19 +453,6 @@ String &String::operator+=(const String &p_str) { return *this; } -String &String::operator+=(const CharType *p_str) { - *this += String(p_str); - return *this; -} - -String &String::operator+=(CharType p_char) { - resize(size() ? size() + 1 : 2); - set(length(), 0); - set(length() - 1, p_char); - - return *this; -} - String &String::operator+=(const char *p_str) { if (!p_str || p_str[0] == 0) { return *this; @@ -301,7 +468,7 @@ String &String::operator+=(const char *p_str) { resize(from + src_len + 1); - CharType *dst = ptrw(); + char32_t *dst = ptrw(); set(length(), 0); @@ -312,16 +479,43 @@ String &String::operator+=(const char *p_str) { return *this; } -void String::operator=(const char *p_str) { - copy_from(p_str); +String &String::operator+=(const wchar_t *p_str) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit + *this += String::utf16((const char16_t *)p_str); +#else + // wchar_t is 32-bit + *this += String((const char32_t *)p_str); +#endif + return *this; } -void String::operator=(const CharType *p_str) { - copy_from(p_str); +String &String::operator+=(const char32_t *p_str) { + *this += String(p_str); + return *this; } -bool String::operator==(const StrRange &p_str_range) const { - int len = p_str_range.len; +String &String::operator+=(char32_t p_char) { + resize(size() ? size() + 1 : 2); + set(length(), 0); + if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + "."); + set(length() - 1, 0xfffd); + } else { + set(length() - 1, p_char); + } + + return *this; +} + +bool String::operator==(const char *p_str) const { + // compare Latin-1 encoded c-string + int len = 0; + const char *aux = p_str; + + while (*(aux++) != 0) { + len++; + } if (length() != len) { return false; @@ -330,12 +524,13 @@ bool String::operator==(const StrRange &p_str_range) const { return true; } - const CharType *c_str = p_str_range.c_str; - const CharType *dst = &operator[](0); + int l = length(); - /* Compare char by char */ - for (int i = 0; i < len; i++) { - if (c_str[i] != dst[i]) { + const char32_t *dst = get_data(); + + // Compare char by char + for (int i = 0; i < l; i++) { + if ((char32_t)p_str[i] != dst[i]) { return false; } } @@ -343,9 +538,19 @@ bool String::operator==(const StrRange &p_str_range) const { return true; } -bool String::operator==(const char *p_str) const { +bool String::operator==(const wchar_t *p_str) const { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit, parse as UTF-16 + return *this == String::utf16((const char16_t *)p_str); +#else + // wchar_t is 32-bit, compare char by char + return *this == (const char32_t *)p_str; +#endif +} + +bool String::operator==(const char32_t *p_str) const { int len = 0; - const char *aux = p_str; + const char32_t *aux = p_str; while (*(aux++) != 0) { len++; @@ -360,7 +565,7 @@ bool String::operator==(const char *p_str) const { int l = length(); - const CharType *dst = c_str(); + const char32_t *dst = get_data(); /* Compare char by char */ for (int i = 0; i < l; i++) { @@ -372,14 +577,32 @@ bool String::operator==(const char *p_str) const { return true; } -bool String::operator==(const CharType *p_str) const { - int len = 0; - const CharType *aux = p_str; +bool String::operator==(const String &p_str) const { + if (length() != p_str.length()) { + return false; + } + if (empty()) { + return true; + } - while (*(aux++) != 0) { - len++; + int l = length(); + + const char32_t *src = get_data(); + const char32_t *dst = p_str.get_data(); + + /* Compare char by char */ + for (int i = 0; i < l; i++) { + if (src[i] != dst[i]) { + return false; + } } + return true; +} + +bool String::operator==(const StrRange &p_str_range) const { + int len = p_str_range.len; + if (length() != len) { return false; } @@ -387,13 +610,12 @@ bool String::operator==(const CharType *p_str) const { return true; } - int l = length(); - - const CharType *dst = c_str(); + const char32_t *c_str = p_str_range.c_str; + const char32_t *dst = &operator[](0); /* Compare char by char */ - for (int i = 0; i < l; i++) { - if (p_str[i] != dst[i]) { + for (int i = 0; i < len; i++) { + if (c_str[i] != dst[i]) { return false; } } @@ -401,30 +623,68 @@ bool String::operator==(const CharType *p_str) const { return true; } +bool operator==(const char *p_chr, const String &p_str) { + return p_str == p_chr; +} + +bool operator==(const wchar_t *p_chr, const String &p_str) { +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit + return p_str == String::utf16((const char16_t *)p_chr); +#else + // wchar_t is 32-bi + return p_str == String((const char32_t *)p_chr); +#endif +} + bool String::operator!=(const char *p_str) const { return (!(*this == p_str)); } -bool String::operator!=(const CharType *p_str) const { +bool String::operator!=(const wchar_t *p_str) const { return (!(*this == p_str)); } -bool String::operator<(const CharType *p_str) const { +bool String::operator!=(const char32_t *p_str) const { + return (!(*this == p_str)); +} + +bool String::operator!=(const String &p_str) const { + return !((*this == p_str)); +} + +bool String::operator<=(const String &p_str) const { + return (*this < p_str) || (*this == p_str); +} + +bool String::operator<(const char *p_str) const { if (empty() && p_str[0] == 0) { return false; } if (empty()) { return true; } - - return is_str_less(c_str(), p_str); + return is_str_less(get_data(), p_str); } -bool String::operator<=(const String &p_str) const { - return (*this < p_str) || (*this == p_str); +bool String::operator<(const wchar_t *p_str) const { + if (empty() && p_str[0] == 0) { + return false; + } + if (empty()) { + return true; + } + +#ifdef WINDOWS_ENABLED + // wchar_t is 16-bit + return is_str_less(get_data(), String::utf16((const char16_t *)p_str).get_data()); +#else + // wchar_t is 32-bit + return is_str_less(get_data(), (const char32_t *)p_str); +#endif } -bool String::operator<(const char *p_str) const { +bool String::operator<(const char32_t *p_str) const { if (empty() && p_str[0] == 0) { return false; } @@ -432,11 +692,11 @@ bool String::operator<(const char *p_str) const { return true; } - return is_str_less(c_str(), p_str); + return is_str_less(get_data(), p_str); } bool String::operator<(const String &p_str) const { - return operator<(p_str.c_str()); + return operator<(p_str.get_data()); } signed char String::nocasecmp_to(const String &p_str) const { @@ -450,8 +710,8 @@ signed char String::nocasecmp_to(const String &p_str) const { return 1; } - const CharType *that_str = p_str.c_str(); - const CharType *this_str = c_str(); + const char32_t *that_str = p_str.get_data(); + const char32_t *this_str = get_data(); while (true) { if (*that_str == 0 && *this_str == 0) { @@ -482,8 +742,8 @@ signed char String::casecmp_to(const String &p_str) const { return 1; } - const CharType *that_str = p_str.c_str(); - const CharType *this_str = c_str(); + const char32_t *that_str = p_str.get_data(); + const char32_t *this_str = get_data(); while (true) { if (*that_str == 0 && *this_str == 0) { @@ -504,8 +764,8 @@ signed char String::casecmp_to(const String &p_str) const { } signed char String::naturalnocasecmp_to(const String &p_str) const { - const CharType *this_str = c_str(); - const CharType *that_str = p_str.c_str(); + const char32_t *this_str = get_data(); + const char32_t *that_str = p_str.get_data(); if (this_str && that_str) { while (*this_str == '.' || *that_str == '.') { @@ -571,6 +831,11 @@ signed char String::naturalnocasecmp_to(const String &p_str) const { return 0; } +const char32_t *String::get_data() const { + static const char32_t zero = 0; + return size() ? &operator[](0) : &zero; +} + void String::erase(int p_pos, int p_chars) { *this = left(p_pos) + substr(p_pos + p_chars, length() - ((p_pos + p_chars))); } @@ -593,7 +858,7 @@ String String::capitalize() const { } String String::camelcase_to_underscore(bool lowercase) const { - const CharType *cstr = c_str(); + const char32_t *cstr = get_data(); String new_string; const char A = 'A', Z = 'Z'; const char a = 'a', z = 'z'; @@ -705,7 +970,7 @@ String String::get_slice(String p_splitter, int p_slice) const { return ""; //no find! } -String String::get_slicec(CharType p_splitter, int p_slice) const { +String String::get_slicec(char32_t p_splitter, int p_slice) const { if (empty()) { return String(); } @@ -714,7 +979,7 @@ String String::get_slicec(CharType p_splitter, int p_slice) const { return String(); } - const CharType *c = this->ptr(); + const char32_t *c = this->ptr(); int i = 0; int prev = 0; int count = 0; @@ -851,7 +1116,7 @@ Vector<float> String::split_floats(const String &p_splitter, bool p_allow_empty) end = len; } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_float(&c_str()[from])); + ret.push_back(String::to_float(&get_data()[from])); } if (end == len) { @@ -880,7 +1145,7 @@ Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_ } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_float(&c_str()[from])); + ret.push_back(String::to_float(&get_data()[from])); } if (end == len) { @@ -904,7 +1169,7 @@ Vector<int> String::split_ints(const String &p_splitter, bool p_allow_empty) con end = len; } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_int(&c_str()[from], end - from)); + ret.push_back(String::to_int(&get_data()[from], end - from)); } if (end == len) { @@ -933,7 +1198,7 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo } if (p_allow_empty || (end > from)) { - ret.push_back(String::to_int(&c_str()[from], end - from)); + ret.push_back(String::to_int(&get_data()[from], end - from)); } if (end == len) { @@ -946,7 +1211,7 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo return ret; } -String String::join(Vector<String> parts) { +String String::join(Vector<String> parts) const { String ret; for (int i = 0; i < parts.size(); ++i) { if (i > 0) { @@ -957,11 +1222,11 @@ String String::join(Vector<String> parts) { return ret; } -CharType String::char_uppercase(CharType p_char) { +char32_t String::char_uppercase(char32_t p_char) { return _find_upper(p_char); } -CharType String::char_lowercase(CharType p_char) { +char32_t String::char_lowercase(char32_t p_char) { return _find_lower(p_char); } @@ -969,8 +1234,8 @@ String String::to_upper() const { String upper = *this; for (int i = 0; i < upper.size(); i++) { - const CharType s = upper[i]; - const CharType t = _find_upper(s); + const char32_t s = upper[i]; + const char32_t t = _find_upper(s); if (s != t) { // avoid copy on write upper[i] = t; } @@ -983,8 +1248,8 @@ String String::to_lower() const { String lower = *this; for (int i = 0; i < lower.size(); i++) { - const CharType s = lower[i]; - const CharType t = _find_lower(s); + const char32_t s = lower[i]; + const char32_t t = _find_lower(s); if (s != t) { // avoid copy on write lower[i] = t; } @@ -993,34 +1258,8 @@ String String::to_lower() const { return lower; } -const CharType *String::c_str() const { - static const CharType zero = 0; - - return size() ? &operator[](0) : &zero; -} - -String String::md5(const uint8_t *p_md5) { - return String::hex_encode_buffer(p_md5, 16); -} - -String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { - static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - - String ret; - char v[2] = { 0, 0 }; - - for (int i = 0; i < p_len; i++) { - v[0] = hex[p_buffer[i] >> 4]; - ret += v; - v[0] = hex[p_buffer[i] & 0xF]; - ret += v; - } - - return ret; -} - -String String::chr(CharType p_char) { - CharType c[2] = { p_char, 0 }; +String String::chr(char32_t p_char) { + char32_t c[2] = { p_char, 0 }; return String(c); } @@ -1028,6 +1267,14 @@ String String::num(double p_num, int p_decimals) { if (Math::is_nan(p_num)) { return "nan"; } + + if (Math::is_inf(p_num)) { + if (signbit(p_num)) { + return "-inf"; + } else { + return "inf"; + } + } #ifndef NO_USE_STDLIB if (p_decimals > 16) { @@ -1106,7 +1353,7 @@ String String::num(double p_num, int p_decimals) { /* decimal part */ if (p_decimals > 0 || (p_decimals == -1 && (int)p_num != p_num)) { - double dec = p_num - (float)((int)p_num); + double dec = p_num - (double)((int)p_num); int digit = 0; if (p_decimals > MAX_DIGITS) @@ -1125,7 +1372,7 @@ String String::num(double p_num, int p_decimals) { if (digit == MAX_DIGITS) //no point in going to infinite break; - if ((dec - (float)((int)dec)) < 1e-6) + if ((dec - (double)((int)dec)) < 1e-6) break; } @@ -1159,7 +1406,7 @@ String String::num(double p_num, int p_decimals) { s = "0"; else { while (intn) { - CharType num = '0' + (intn % 10); + char32_t num = '0' + (intn % 10); intn /= 10; s = num + s; } @@ -1188,7 +1435,7 @@ String String::num_int64(int64_t p_num, int base, bool capitalize_hex) { } String s; s.resize(chars + 1); - CharType *c = s.ptrw(); + char32_t *c = s.ptrw(); c[chars] = 0; n = p_num; do { @@ -1221,7 +1468,7 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { String s; s.resize(chars + 1); - CharType *c = s.ptrw(); + char32_t *c = s.ptrw(); c[chars] = 0; n = p_num; do { @@ -1240,6 +1487,18 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) { } String String::num_real(double p_num) { + if (Math::is_nan(p_num)) { + return "nan"; + } + + if (Math::is_inf(p_num)) { + if (signbit(p_num)) { + return "-inf"; + } else { + return "inf"; + } + } + String s; String sd; /* integer part */ @@ -1251,7 +1510,7 @@ String String::num_real(double p_num) { /* decimal part */ if ((int)p_num != p_num) { - double dec = p_num - (float)((int)p_num); + double dec = p_num - (double)((int)p_num); int digit = 0; int decimals = MAX_DIGITS; @@ -1265,7 +1524,7 @@ String String::num_real(double p_num) { dec_max = dec_max * 10 + 9; digit++; - if ((dec - (float)((int)dec)) < 1e-6) { + if ((dec - (double)((int)dec)) < 1e-6) { break; } @@ -1302,7 +1561,7 @@ String String::num_real(double p_num) { s = "0"; } else { while (intn) { - CharType num = '0' + (intn % 10); + char32_t num = '0' + (intn % 10); intn /= 10; s = num + s; } @@ -1319,6 +1578,14 @@ String String::num_scientific(double p_num) { if (Math::is_nan(p_num)) { return "nan"; } + + if (Math::is_inf(p_num)) { + if (signbit(p_num)) { + return "-inf"; + } else { + return "inf"; + } + } #ifndef NO_USE_STDLIB char buf[256]; @@ -1348,6 +1615,26 @@ String String::num_scientific(double p_num) { #endif } +String String::md5(const uint8_t *p_md5) { + return String::hex_encode_buffer(p_md5, 16); +} + +String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { + static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + + String ret; + char v[2] = { 0, 0 }; + + for (int i = 0; i < p_len; i++) { + v[0] = hex[p_buffer[i] >> 4]; + ret += v; + v[0] = hex[p_buffer[i] & 0xF]; + ret += v; + } + + return ret; +} + CharString String::ascii(bool p_allow_extended) const { if (!length()) { return CharString(); @@ -1357,7 +1644,13 @@ CharString String::ascii(bool p_allow_extended) const { cs.resize(size()); for (int i = 0; i < size(); i++) { - cs[i] = operator[](i); + char32_t c = operator[](i); + if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { + cs[i] = c; + } else { + print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character."); + cs[i] = 0x20; + } } return cs; @@ -1371,7 +1664,7 @@ String String::utf8(const char *p_utf8, int p_len) { } bool String::parse_utf8(const char *p_utf8, int p_len) { -#define _UNICERROR(m_err) print_line("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); +#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?"); if (!p_utf8) { return true; @@ -1384,9 +1677,9 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { /* HANDLE BOM (Byte Order Mark) */ if (p_len < 0 || p_len >= 3) { - bool has_bom = uint8_t(p_utf8[0]) == 0xEF && uint8_t(p_utf8[1]) == 0xBB && uint8_t(p_utf8[2]) == 0xBF; + bool has_bom = uint8_t(p_utf8[0]) == 0xef && uint8_t(p_utf8[1]) == 0xbb && uint8_t(p_utf8[2]) == 0xbf; if (has_bom) { - //just skip it + //8-bit encoding, byte order has no meaning in UTF-8, just skip it if (p_len >= 0) { p_len -= 3; } @@ -1405,24 +1698,19 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { /* Determine the number of characters in sequence */ if ((c & 0x80) == 0) { skip = 0; - } else if ((c & 0xE0) == 0xC0) { + } else if ((c & 0xe0) == 0xc0) { skip = 1; - } else if ((c & 0xF0) == 0xE0) { + } else if ((c & 0xf0) == 0xe0) { skip = 2; - } else if ((c & 0xF8) == 0xF0) { + } else if ((c & 0xf8) == 0xf0) { skip = 3; - } else if ((c & 0xFC) == 0xF8) { - skip = 4; - } else if ((c & 0xFE) == 0xFC) { - skip = 5; } else { - _UNICERROR("invalid skip"); + _UNICERROR("invalid skip at " + num_int64(cstr_size)); return true; //invalid utf8 } - if (skip == 1 && (c & 0x1E) == 0) { - //printf("overlong rejected\n"); - _UNICERROR("overlong rejected"); + if (skip == 1 && (c & 0x1e) == 0) { + _UNICERROR("overlong rejected at " + num_int64(cstr_size)); return true; //reject overlong } @@ -1448,7 +1736,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } resize(str_size + 1); - CharType *dst = ptrw(); + char32_t *dst = ptrw(); dst[str_size] = 0; while (cstr_size) { @@ -1457,19 +1745,14 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { /* Determine the number of characters in sequence */ if ((*p_utf8 & 0x80) == 0) { len = 1; - } else if ((*p_utf8 & 0xE0) == 0xC0) { + } else if ((*p_utf8 & 0xe0) == 0xc0) { len = 2; - } else if ((*p_utf8 & 0xF0) == 0xE0) { + } else if ((*p_utf8 & 0xf0) == 0xe0) { len = 3; - } else if ((*p_utf8 & 0xF8) == 0xF0) { + } else if ((*p_utf8 & 0xf8) == 0xf0) { len = 4; - } else if ((*p_utf8 & 0xFC) == 0xF8) { - len = 5; - } else if ((*p_utf8 & 0xFE) == 0xFC) { - len = 6; } else { _UNICERROR("invalid len"); - return true; //invalid UTF8 } @@ -1479,7 +1762,6 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } if (len == 2 && (*p_utf8 & 0x1E) == 0) { - //printf("overlong rejected\n"); _UNICERROR("no space left"); return true; //reject overlong } @@ -1491,24 +1773,23 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { if (len == 1) { unichar = *p_utf8; } else { - unichar = (0xFF >> (len + 1)) & *p_utf8; + unichar = (0xff >> (len + 1)) & *p_utf8; for (int i = 1; i < len; i++) { - if ((p_utf8[i] & 0xC0) != 0x80) { + if ((p_utf8[i] & 0xc0) != 0x80) { _UNICERROR("invalid utf8"); return true; //invalid utf8 } - if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7F) >> (7 - len)) == 0) { + if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) { _UNICERROR("invalid utf8 overlong"); return true; //no overlong } - unichar = (unichar << 6) | (p_utf8[i] & 0x3F); + unichar = (unichar << 6) | (p_utf8[i] & 0x3f); } } - - //printf("char %i, len %i\n",unichar,len); - if (sizeof(wchar_t) == 2 && unichar > 0xFFFF) { - unichar = ' '; //too long for windows + if (unichar >= 0xd800 && unichar <= 0xdfff) { + _UNICERROR("invalid code point"); + return CharString(); } *(dst++) = unichar; @@ -1517,6 +1798,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) { } return false; +#undef _UNICERROR } CharString String::utf8() const { @@ -1525,7 +1807,7 @@ CharString String::utf8() const { return CharString(); } - const CharType *d = &operator[](0); + const char32_t *d = &operator[](0); int fl = 0; for (int i = 0; i < l; i++) { uint32_t c = d[i]; @@ -1535,13 +1817,15 @@ CharString String::utf8() const { fl += 2; } else if (c <= 0xffff) { // 16 bits fl += 3; - } else if (c <= 0x001fffff) { // 21 bits + } else if (c <= 0x0010ffff) { // 21 bits fl += 4; - - } else if (c <= 0x03ffffff) { // 26 bits - fl += 5; - } else if (c <= 0x7fffffff) { // 31 bits - fl += 6; + } else { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); + return CharString(); + } + if (c >= 0xd800 && c <= 0xdfff) { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); + return CharString(); } } @@ -1561,35 +1845,17 @@ CharString String::utf8() const { if (c <= 0x7f) { // 7 bits. APPEND_CHAR(c); } else if (c <= 0x7ff) { // 11 bits - APPEND_CHAR(uint32_t(0xc0 | ((c >> 6) & 0x1f))); // Top 5 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. } else if (c <= 0xffff) { // 16 bits - APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else if (c <= 0x001fffff) { // 21 bits - + } else { // 21 bits APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits. APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else if (c <= 0x03ffffff) { // 26 bits - - APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. - } else if (c <= 0x7fffffff) { // 31 bits - - APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit. - APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits. - APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits. } } #undef APPEND_CHAR @@ -1598,21 +1864,191 @@ CharString String::utf8() const { return utf8s; } -/* -String::String(CharType p_char) { +String String::utf16(const char16_t *p_utf16, int p_len) { + String ret; + ret.parse_utf16(p_utf16, p_len); + + return ret; +} + +bool String::parse_utf16(const char16_t *p_utf16, int p_len) { +#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?"); + + if (!p_utf16) { + return true; + } + + String aux; + + int cstr_size = 0; + int str_size = 0; + + /* HANDLE BOM (Byte Order Mark) */ + bool byteswap = false; // assume correct endianness if no BOM found + if (p_len < 0 || p_len >= 1) { + bool has_bom = false; + if (uint16_t(p_utf16[0]) == 0xfeff) { // correct BOM, read as is + has_bom = true; + byteswap = false; + } else if (uint16_t(p_utf16[0]) == 0xfffe) { // backwards BOM, swap bytes + has_bom = true; + byteswap = true; + } + if (has_bom) { + if (p_len >= 0) { + p_len -= 1; + } + p_utf16 += 1; + } + } + + { + const char16_t *ptrtmp = p_utf16; + const char16_t *ptrtmp_limit = &p_utf16[p_len]; + int skip = 0; + while (ptrtmp != ptrtmp_limit && *ptrtmp) { + uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp; + if (skip == 0) { + if ((c & 0xfffffc00) == 0xd800) { + skip = 1; // lead surrogate + } else if ((c & 0xfffffc00) == 0xdc00) { + _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); + return true; // invalid UTF16 + } else { + skip = 0; + } + str_size++; + } else { + if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate + --skip; + } else { + _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size)); + return true; // invalid UTF16 + } + } + + cstr_size++; + ptrtmp++; + } + + if (skip) { + _UNICERROR("no space left"); + return true; // not enough space + } + } + + if (str_size == 0) { + clear(); + return false; + } + + resize(str_size + 1); + char32_t *dst = ptrw(); + dst[str_size] = 0; + + while (cstr_size) { + int len = 0; + uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16; + + if ((c & 0xfffffc00) == 0xd800) { + len = 2; + } else { + len = 1; + } + + if (len > cstr_size) { + _UNICERROR("no space left"); + return true; //not enough space + } + + uint32_t unichar = 0; + if (len == 1) { + unichar = c; + } else { + uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1]; + unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + } + + *(dst++) = unichar; + cstr_size -= len; + p_utf16 += len; + } - shared=nullptr; - copy_from(p_char); + return false; +#undef _UNICERROR } +Char16String String::utf16() const { + int l = length(); + if (!l) { + return Char16String(); + } -*/ + const char32_t *d = &operator[](0); + int fl = 0; + for (int i = 0; i < l; i++) { + uint32_t c = d[i]; + if (c <= 0xffff) { // 16 bits. + fl += 1; + } else if (c <= 0x10ffff) { // 32 bits. + fl += 2; + } else { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); + return Char16String(); + } + if (c >= 0xd800 && c <= 0xdfff) { + print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + "."); + return Char16String(); + } + } + + Char16String utf16s; + if (fl == 0) { + return utf16s; + } + + utf16s.resize(fl + 1); + uint16_t *cdst = (uint16_t *)utf16s.get_data(); + +#define APPEND_CHAR(m_c) *(cdst++) = m_c + + for (int i = 0; i < l; i++) { + uint32_t c = d[i]; + + if (c <= 0xffff) { // 16 bits. + APPEND_CHAR(c); + } else { // 32 bits. + APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate. + APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate. + } + } +#undef APPEND_CHAR + *cdst = 0; //trailing zero + + return utf16s; +} String::String(const char *p_str) { copy_from(p_str); } -String::String(const CharType *p_str, int p_clip_to_len) { +String::String(const wchar_t *p_str) { + copy_from(p_str); +} + +String::String(const char32_t *p_str) { + copy_from(p_str); +} + +String::String(const char *p_str, int p_clip_to_len) { + copy_from(p_str, p_clip_to_len); +} + +String::String(const wchar_t *p_str, int p_clip_to_len) { + copy_from(p_str, p_clip_to_len); +} + +String::String(const char32_t *p_str, int p_clip_to_len) { copy_from(p_str, p_clip_to_len); } @@ -1620,7 +2056,6 @@ String::String(const StrRange &p_range) { if (!p_range.c_str) { return; } - copy_from(p_range.c_str, p_range.len); } @@ -1629,7 +2064,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const { return 0; } - const CharType *s = ptr(); + const char32_t *s = ptr(); int64_t sign = s[0] == '-' ? -1 : 1; @@ -1647,7 +2082,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const { int64_t hex = 0; while (*s) { - CharType c = LOWERCASE(*s); + char32_t c = LOWERCASE(*s); int64_t n; if (c >= '0' && c <= '9') { n = c - '0'; @@ -1672,7 +2107,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const { return 0; } - const CharType *s = ptr(); + const char32_t *s = ptr(); int64_t sign = s[0] == '-' ? -1 : 1; @@ -1690,7 +2125,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const { int64_t binary = 0; while (*s) { - CharType c = LOWERCASE(*s); + char32_t c = LOWERCASE(*s); int64_t n; if (c == '0' || c == '1') { n = c - '0'; @@ -1719,7 +2154,7 @@ int64_t String::to_int() const { int64_t sign = 1; for (int i = 0; i < to; i++) { - CharType c = operator[](i); + char32_t c = operator[](i); if (c >= '0' && c <= '9') { bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small.")); @@ -1765,6 +2200,37 @@ int64_t String::to_int(const char *p_str, int p_len) { return integer * sign; } +int64_t String::to_int(const wchar_t *p_str, int p_len) { + int to = 0; + if (p_len >= 0) { + to = p_len; + } else { + while (p_str[to] != 0 && p_str[to] != '.') { + to++; + } + } + + int64_t integer = 0; + int64_t sign = 1; + + for (int i = 0; i < to; i++) { + wchar_t c = p_str[i]; + if (c >= '0' && c <= '9') { + bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8'))); + ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small.")); + integer *= 10; + integer += c - '0'; + + } else if (c == '-' && integer == 0) { + sign = -sign; + } else if (c != ' ') { + break; + } + } + + return integer * sign; +} + bool String::is_numeric() const { if (length() == 0) { return false; @@ -1776,14 +2242,13 @@ bool String::is_numeric() const { } bool dot = false; for (int i = s; i < length(); i++) { - CharType c = operator[](i); + char32_t c = operator[](i); if (c == '.') { if (dot) { return false; } dot = true; - } - if (c < '0' || c > '9') { + } else if (c < '0' || c > '9') { return false; } } @@ -1945,11 +2410,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point } expSign = false; } - if (!IS_DIGIT(CharType(*p))) { + if (!IS_DIGIT(char32_t(*p))) { p = pExp; goto done; } - while (IS_DIGIT(CharType(*p))) { + while (IS_DIGIT(char32_t(*p))) { exp = exp * 10 + (*p - '0'); p += 1; } @@ -2007,19 +2472,18 @@ done: #define READING_DONE 4 double String::to_float(const char *p_str) { -#ifndef NO_USE_STDLIB - return built_in_strtod<char>(p_str); -//return atof(p_str); DOES NOT WORK ON ANDROID(??) -#else return built_in_strtod<char>(p_str); -#endif } -double String::to_float(const CharType *p_str, const CharType **r_end) { - return built_in_strtod<CharType>(p_str, (CharType **)r_end); +double String::to_float(const char32_t *p_str, const char32_t **r_end) { + return built_in_strtod<char32_t>(p_str, (char32_t **)r_end); } -int64_t String::to_int(const CharType *p_str, int p_len, bool p_clamp) { +double String::to_float(const wchar_t *p_str, const wchar_t **r_end) { + return built_in_strtod<wchar_t>(p_str, (wchar_t **)r_end); +} + +int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) { if (p_len == 0 || !p_str[0]) { return 0; } @@ -2029,11 +2493,11 @@ int64_t String::to_int(const CharType *p_str, int p_len, bool p_clamp) { int64_t sign = 1; int reading = READING_SIGN; - const CharType *str = p_str; - const CharType *limit = &p_str[p_len]; + const char32_t *str = p_str; + const char32_t *limit = &p_str[p_len]; while (*str && reading != READING_DONE && str != limit) { - CharType c = *(str++); + char32_t c = *(str++); switch (reading) { case READING_SIGN: { if (c >= '0' && c <= '9') { @@ -2087,26 +2551,7 @@ double String::to_float() const { if (empty()) { return 0; } -#ifndef NO_USE_STDLIB - return built_in_strtod<CharType>(c_str()); -//return wcstod(c_str(),nullptr ); DOES NOT WORK ON ANDROID :( -#else - return built_in_strtod<CharType>(c_str()); -#endif -} - -bool operator==(const char *p_chr, const String &p_str) { - return p_str == p_chr; -} - -String operator+(const char *p_chr, const String &p_str) { - String tmp = p_chr; - tmp += p_str; - return tmp; -} - -String operator+(CharType p_chr, const String &p_str) { - return (String::chr(p_chr) + p_str); + return built_in_strtod<char32_t>(get_data()); } uint32_t String::hash(const char *p_cstr) { @@ -2129,7 +2574,27 @@ uint32_t String::hash(const char *p_cstr, int p_len) { return hashv; } -uint32_t String::hash(const CharType *p_cstr, int p_len) { +uint32_t String::hash(const wchar_t *p_cstr, int p_len) { + uint32_t hashv = 5381; + for (int i = 0; i < p_len; i++) { + hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */ + } + + return hashv; +} + +uint32_t String::hash(const wchar_t *p_cstr) { + uint32_t hashv = 5381; + uint32_t c; + + while ((c = *p_cstr++)) { + hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */ + } + + return hashv; +} + +uint32_t String::hash(const char32_t *p_cstr, int p_len) { uint32_t hashv = 5381; for (int i = 0; i < p_len; i++) { hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */ @@ -2138,7 +2603,7 @@ uint32_t String::hash(const CharType *p_cstr, int p_len) { return hashv; } -uint32_t String::hash(const CharType *p_cstr) { +uint32_t String::hash(const char32_t *p_cstr) { uint32_t hashv = 5381; uint32_t c; @@ -2152,7 +2617,7 @@ uint32_t String::hash(const CharType *p_cstr) { uint32_t String::hash() const { /* simple djb2 hashing */ - const CharType *chr = c_str(); + const char32_t *chr = get_data(); uint32_t hashv = 5381; uint32_t c; @@ -2166,7 +2631,7 @@ uint32_t String::hash() const { uint64_t String::hash64() const { /* simple djb2 hashing */ - const CharType *chr = c_str(); + const char32_t *chr = get_data(); uint64_t hashv = 5381; uint64_t c; @@ -2278,7 +2743,7 @@ String String::substr(int p_from, int p_chars) const { } String s = String(); - s.copy_from_unchecked(&c_str()[p_from], p_chars); + s.copy_from_unchecked(&get_data()[p_from], p_chars); return s; } @@ -2295,8 +2760,8 @@ int String::find(const String &p_str, int p_from) const { return -1; // won't find anything! } - const CharType *src = c_str(); - const CharType *str = p_str.c_str(); + const char32_t *src = get_data(); + const char32_t *str = p_str.get_data(); for (int i = p_from; i <= (len - src_len); i++) { bool found = true; @@ -2333,7 +2798,7 @@ int String::find(const char *p_str, int p_from) const { return -1; // won't find anything! } - const CharType *src = c_str(); + const char32_t *src = get_data(); int src_len = 0; while (p_str[src_len] != '\0') { @@ -2341,7 +2806,7 @@ int String::find(const char *p_str, int p_from) const { } if (src_len == 1) { - const char needle = p_str[0]; + const char32_t needle = p_str[0]; for (int i = p_from; i < len; i++) { if (src[i] == needle) { @@ -2360,7 +2825,7 @@ int String::find(const char *p_str, int p_from) const { return -1; } - if (src[read_pos] != p_str[j]) { + if (src[read_pos] != (char32_t)p_str[j]) { found = false; break; } @@ -2375,7 +2840,7 @@ int String::find(const char *p_str, int p_from) const { return -1; } -int String::find_char(const CharType &p_char, int p_from) const { +int String::find_char(const char32_t &p_char, int p_from) const { return _cowdata.find(p_char, p_from); } @@ -2396,7 +2861,7 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const { return -1; // won't find anything! } - const CharType *src = c_str(); + const char32_t *src = get_data(); for (int i = p_from; i < len; i++) { bool found = true; @@ -2405,7 +2870,7 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const { if (r_key) { *r_key = k; } - const CharType *cmp = keys[k].c_str(); + const char32_t *cmp = keys[k].get_data(); int l = keys[k].length(); for (int j = 0; j < l; j++) { @@ -2445,7 +2910,7 @@ int String::findn(const String &p_str, int p_from) const { return -1; // won't find anything! } - const CharType *srcd = c_str(); + const char32_t *srcd = get_data(); for (int i = p_from; i <= (length() - src_len); i++) { bool found = true; @@ -2457,8 +2922,8 @@ int String::findn(const String &p_str, int p_from) const { return -1; } - CharType src = _find_lower(srcd[read_pos]); - CharType dst = _find_lower(p_str[j]); + char32_t src = _find_lower(srcd[read_pos]); + char32_t dst = _find_lower(p_str[j]); if (src != dst) { found = false; @@ -2495,7 +2960,7 @@ int String::rfind(const String &p_str, int p_from) const { return -1; // won't find anything! } - const CharType *src = c_str(); + const char32_t *src = get_data(); for (int i = p_from; i >= 0; i--) { bool found = true; @@ -2542,7 +3007,7 @@ int String::rfindn(const String &p_str, int p_from) const { return -1; // won't find anything! } - const CharType *src = c_str(); + const char32_t *src = get_data(); for (int i = p_from; i >= 0; i--) { bool found = true; @@ -2554,8 +3019,8 @@ int String::rfindn(const String &p_str, int p_from) const { return -1; } - CharType srcc = _find_lower(src[read_pos]); - CharType dstc = _find_lower(p_str[j]); + char32_t srcc = _find_lower(src[read_pos]); + char32_t dstc = _find_lower(p_str[j]); if (srcc != dstc) { found = false; @@ -2589,8 +3054,8 @@ bool String::begins_with(const String &p_string) const { return true; } - const CharType *src = &p_string[0]; - const CharType *str = &operator[](0); + const char32_t *src = &p_string[0]; + const char32_t *str = &operator[](0); int i = 0; for (; i < l; i++) { @@ -2609,11 +3074,11 @@ bool String::begins_with(const char *p_string) const { return false; } - const CharType *str = &operator[](0); + const char32_t *str = &operator[](0); int i = 0; while (*p_string && i < l) { - if (*p_string != str[i]) { + if ((char32_t)*p_string != str[i]) { return false; } i++; @@ -2657,7 +3122,7 @@ int String::_count(const String &p_string, int p_from, int p_to, bool p_case_ins } if (p_from == 0 && p_to == len) { str = String(); - str.copy_from_unchecked(&c_str()[0], len); + str.copy_from_unchecked(&get_data()[0], len); } else { str = substr(p_from, p_to - p_from); } @@ -2695,14 +3160,14 @@ bool String::_base_is_subsequence_of(const String &p_string, bool case_insensiti return false; } - const CharType *src = &operator[](0); - const CharType *tgt = &p_string[0]; + const char32_t *src = &operator[](0); + const char32_t *tgt = &p_string[0]; for (; *src && *tgt; tgt++) { bool match = false; if (case_insensitive) { - CharType srcc = _find_lower(*src); - CharType tgtc = _find_lower(*tgt); + char32_t srcc = _find_lower(*src); + char32_t tgtc = _find_lower(*tgt); match = srcc == tgtc; } else { match = *src == *tgt; @@ -2748,8 +3213,8 @@ float String::similarity(const String &p_string) const { int src_size = src_bigrams.size(); int tgt_size = tgt_bigrams.size(); - float sum = src_size + tgt_size; - float inter = 0; + double sum = src_size + tgt_size; + double inter = 0; for (int i = 0; i < src_size; i++) { for (int j = 0; j < tgt_size; j++) { if (src_bigrams[i] == tgt_bigrams[j]) { @@ -2762,7 +3227,7 @@ float String::similarity(const String &p_string) const { return (2.0f * inter) / sum; } -static bool _wildcard_match(const CharType *p_pattern, const CharType *p_string, bool p_case_sensitive) { +static bool _wildcard_match(const char32_t *p_pattern, const char32_t *p_string, bool p_case_sensitive) { switch (*p_pattern) { case '\0': return !*p_string; @@ -2781,14 +3246,14 @@ bool String::match(const String &p_wildcard) const { return false; } - return _wildcard_match(p_wildcard.c_str(), c_str(), true); + return _wildcard_match(p_wildcard.get_data(), get_data(), true); } bool String::matchn(const String &p_wildcard) const { if (!p_wildcard.length() || !length()) { return false; } - return _wildcard_match(p_wildcard.c_str(), c_str(), false); + return _wildcard_match(p_wildcard.get_data(), get_data(), false); } String String::format(const Variant &values, String placeholder) const { @@ -2938,9 +3403,10 @@ String String::repeat(int p_count) const { ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number."); String new_string; - const CharType *src = this->c_str(); + const char32_t *src = this->get_data(); new_string.resize(length() * p_count + 1); + new_string[length() * p_count] = 0; for (int i = 0; i < p_count; i++) { for (int j = 0; j < length(); j++) { @@ -2975,7 +3441,7 @@ String String::right(int p_pos) const { return substr(p_pos, (length() - p_pos)); } -CharType String::ord_at(int p_idx) const { +char32_t String::ord_at(int p_idx) const { ERR_FAIL_INDEX_V(p_idx, length(), 0); return operator[](p_idx); } @@ -2989,7 +3455,7 @@ String String::dedent() const { int indent_stop = -1; for (int i = 0; i < length(); i++) { - CharType c = operator[](i); + char32_t c = operator[](i); if (c == '\n') { if (has_text) { new_string += substr(indent_stop, i - indent_stop); @@ -3218,7 +3684,7 @@ bool String::is_valid_identifier() const { return false; } - const wchar_t *str = &operator[](0); + const char32_t *str = &operator[](0); for (int i = 0; i < len; i++) { if (i == 0) { @@ -3237,36 +3703,14 @@ bool String::is_valid_identifier() const { return true; } -//kind of poor should be rewritten properly - -String String::word_wrap(int p_chars_per_line) const { - int from = 0; - int last_space = 0; - String ret; - for (int i = 0; i < length(); i++) { - if (i - from >= p_chars_per_line) { - if (last_space == -1) { - ret += substr(from, i - from + 1) + "\n"; - } else { - ret += substr(from, last_space - from) + "\n"; - i = last_space; //rewind - } - from = i + 1; - last_space = -1; - } else if (operator[](i) == ' ' || operator[](i) == '\t') { - last_space = i; - } else if (operator[](i) == '\n') { - ret += substr(from, i - from) + "\n"; - from = i + 1; - last_space = -1; - } - } - - if (from < length()) { - ret += substr(from, length()); +bool String::is_valid_string() const { + int l = length(); + const char32_t *src = get_data(); + bool valid = true; + for (int i = 0; i < l; i++) { + valid = valid && (src[i] < 0xd800 || (src[i] > 0xdfff && src[i] <= 0x10ffff)); } - - return ret; + return valid; } String String::http_escape() const { @@ -3297,9 +3741,9 @@ String String::http_unescape() const { String res; for (int i = 0; i < length(); ++i) { if (ord_at(i) == '%' && i + 2 < length()) { - CharType ord1 = ord_at(i + 1); + char32_t ord1 = ord_at(i + 1); if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) { - CharType ord2 = ord_at(i + 2); + char32_t ord2 = ord_at(i + 2); if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); @@ -3389,18 +3833,18 @@ for (int i=1;i<32;i++) { return str; } -static _FORCE_INLINE_ int _xml_unescape(const CharType *p_src, int p_src_len, CharType *p_dst) { +static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, char32_t *p_dst) { int len = 0; while (p_src_len) { if (*p_src == '&') { int eat = 0; if (p_src_len >= 4 && p_src[1] == '#') { - CharType c = 0; + char32_t c = 0; for (int i = 2; i < p_src_len; i++) { eat = i + 1; - CharType ct = p_src[i]; + char32_t ct = p_src[i]; if (ct == ';') { break; } else if (ct >= '0' && ct <= '9') { @@ -3476,12 +3920,12 @@ static _FORCE_INLINE_ int _xml_unescape(const CharType *p_src, int p_src_len, Ch String String::xml_unescape() const { String str; int l = length(); - int len = _xml_unescape(c_str(), l, nullptr); + int len = _xml_unescape(get_data(), l, nullptr); if (len == 0) { return String(); } str.resize(len + 1); - _xml_unescape(c_str(), l, str.ptrw()); + _xml_unescape(get_data(), l, str.ptrw()); str[len] = 0; return str; } @@ -3602,7 +4046,7 @@ bool String::is_valid_hex_number(bool p_with_prefix) const { } for (int i = from; i < len; i++) { - CharType c = operator[](i); + char32_t c = operator[](i); if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { continue; } @@ -3917,7 +4361,7 @@ String String::percent_decode() const { String String::property_name_encode() const { // Escape and quote strings with extended ASCII or further Unicode characters // as well as '"', '=' or ' ' (32) - const CharType *cstr = c_str(); + const char32_t *cstr = get_data(); for (int i = 0; cstr[i]; i++) { if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) { return "\"" + c_escape_multiline() + "\""; @@ -3984,7 +4428,7 @@ String String::lpad(int min_length, const String &character) const { // In case of an error, the string returned is the error description and "error" is true. String String::sprintf(const Array &values, bool *error) const { String formatted; - CharType *self = (CharType *)c_str(); + char32_t *self = (char32_t *)get_data(); bool in_format = false; int value_index = 0; int min_chars = 0; @@ -3997,7 +4441,7 @@ String String::sprintf(const Array &values, bool *error) const { *error = true; for (; *self; self++) { - const CharType c = *self; + const char32_t c = *self; if (in_format) { // We have % - lets see what else we get. switch (c) { @@ -4134,9 +4578,11 @@ String String::sprintf(const Array &values, bool *error) const { if (values[value_index].is_num()) { int value = values[value_index]; if (value < 0) { - return "unsigned byte integer is lower than maximum"; - } else if (value > 255) { - return "unsigned byte integer is greater than maximum"; + return "unsigned integer is lower than minimum"; + } else if (value >= 0xd800 && value <= 0xdfff) { + return "unsigned integer is invalid Unicode character"; + } else if (value > 0x10ffff) { + return "unsigned integer is greater than maximum"; } str = chr(values[value_index]); } else if (values[value_index].get_type() == Variant::STRING) { diff --git a/core/ustring.h b/core/ustring.h index 7a1c1a5232..65eeae6643 100644 --- a/core/ustring.h +++ b/core/ustring.h @@ -36,8 +36,13 @@ #include "core/typedefs.h" #include "core/vector.h" +/*************************************************************************/ +/* CharProxy */ +/*************************************************************************/ + template <class T> class CharProxy { + friend class Char16String; friend class CharString; friend class String; @@ -71,6 +76,54 @@ public: } }; +/*************************************************************************/ +/* Char16String */ +/*************************************************************************/ + +class Char16String { + CowData<char16_t> _cowdata; + static const char16_t _null; + +public: + _FORCE_INLINE_ char16_t *ptrw() { return _cowdata.ptrw(); } + _FORCE_INLINE_ const char16_t *ptr() const { return _cowdata.ptr(); } + _FORCE_INLINE_ int size() const { return _cowdata.size(); } + Error resize(int p_size) { return _cowdata.resize(p_size); } + + _FORCE_INLINE_ char16_t get(int p_index) const { return _cowdata.get(p_index); } + _FORCE_INLINE_ void set(int p_index, const char16_t &p_elem) { _cowdata.set(p_index, p_elem); } + _FORCE_INLINE_ const char16_t &operator[](int p_index) const { + if (unlikely(p_index == _cowdata.size())) { + return _null; + } + + return _cowdata.get(p_index); + } + _FORCE_INLINE_ CharProxy<char16_t> operator[](int p_index) { return CharProxy<char16_t>(p_index, _cowdata); } + + _FORCE_INLINE_ Char16String() {} + _FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); } + _FORCE_INLINE_ Char16String operator=(const Char16String &p_str) { + _cowdata._ref(p_str._cowdata); + return *this; + } + _FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); } + + Char16String &operator=(const char16_t *p_cstr); + bool operator<(const Char16String &p_right) const; + Char16String &operator+=(char16_t p_char); + int length() const { return size() ? size() - 1 : 0; } + const char16_t *get_data() const; + operator const char16_t *() const { return get_data(); }; + +protected: + void copy_from(const char16_t *p_cstr); +}; + +/*************************************************************************/ +/* CharString */ +/*************************************************************************/ + class CharString { CowData<char> _cowdata; static const char _null; @@ -111,26 +164,35 @@ protected: void copy_from(const char *p_cstr); }; -typedef wchar_t CharType; +/*************************************************************************/ +/* String */ +/*************************************************************************/ struct StrRange { - const CharType *c_str; + const char32_t *c_str; int len; - StrRange(const CharType *p_c_str = nullptr, int p_len = 0) { + StrRange(const char32_t *p_c_str = nullptr, int p_len = 0) { c_str = p_c_str; len = p_len; } }; class String { - CowData<CharType> _cowdata; - static const CharType _null; + CowData<char32_t> _cowdata; + static const char32_t _null; void copy_from(const char *p_cstr); - void copy_from(const CharType *p_cstr, const int p_clip_to = -1); - void copy_from(const CharType &p_char); - void copy_from_unchecked(const CharType *p_char, const int p_length); + void copy_from(const char *p_cstr, const int p_clip_to); + void copy_from(const wchar_t *p_cstr); + void copy_from(const wchar_t *p_cstr, const int p_clip_to); + void copy_from(const char32_t *p_cstr); + void copy_from(const char32_t *p_cstr, const int p_clip_to); + + void copy_from(const char32_t &p_char); + + void copy_from_unchecked(const char32_t *p_char, const int p_length); + bool _base_is_subsequence_of(const String &p_string, bool case_insensitive) const; int _count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const; @@ -140,48 +202,56 @@ public: npos = -1 ///<for "some" compatibility with std::string (npos is a huge value in std::string) }; - _FORCE_INLINE_ CharType *ptrw() { return _cowdata.ptrw(); } - _FORCE_INLINE_ const CharType *ptr() const { return _cowdata.ptr(); } + _FORCE_INLINE_ char32_t *ptrw() { return _cowdata.ptrw(); } + _FORCE_INLINE_ const char32_t *ptr() const { return _cowdata.ptr(); } void remove(int p_index) { _cowdata.remove(p_index); } _FORCE_INLINE_ void clear() { resize(0); } - _FORCE_INLINE_ CharType get(int p_index) const { return _cowdata.get(p_index); } - _FORCE_INLINE_ void set(int p_index, const CharType &p_elem) { _cowdata.set(p_index, p_elem); } + _FORCE_INLINE_ char32_t get(int p_index) const { return _cowdata.get(p_index); } + _FORCE_INLINE_ void set(int p_index, const char32_t &p_elem) { _cowdata.set(p_index, p_elem); } _FORCE_INLINE_ int size() const { return _cowdata.size(); } Error resize(int p_size) { return _cowdata.resize(p_size); } - _FORCE_INLINE_ const CharType &operator[](int p_index) const { + _FORCE_INLINE_ const char32_t &operator[](int p_index) const { if (unlikely(p_index == _cowdata.size())) { return _null; } return _cowdata.get(p_index); } - _FORCE_INLINE_ CharProxy<CharType> operator[](int p_index) { return CharProxy<CharType>(p_index, _cowdata); } + _FORCE_INLINE_ CharProxy<char32_t> operator[](int p_index) { return CharProxy<char32_t>(p_index, _cowdata); } bool operator==(const String &p_str) const; bool operator!=(const String &p_str) const; String operator+(const String &p_str) const; - //String operator+(CharType p_char) const; String &operator+=(const String &); - String &operator+=(CharType p_char); + String &operator+=(char32_t p_char); String &operator+=(const char *p_str); - String &operator+=(const CharType *p_str); + String &operator+=(const wchar_t *p_str); + String &operator+=(const char32_t *p_str); /* Compatibility Operators */ void operator=(const char *p_str); - void operator=(const CharType *p_str); + void operator=(const wchar_t *p_str); + void operator=(const char32_t *p_str); + bool operator==(const char *p_str) const; - bool operator==(const CharType *p_str) const; + bool operator==(const wchar_t *p_str) const; + bool operator==(const char32_t *p_str) const; bool operator==(const StrRange &p_str_range) const; + bool operator!=(const char *p_str) const; - bool operator!=(const CharType *p_str) const; - bool operator<(const CharType *p_str) const; + bool operator!=(const wchar_t *p_str) const; + bool operator!=(const char32_t *p_str) const; + + bool operator<(const char32_t *p_str) const; bool operator<(const char *p_str) const; + bool operator<(const wchar_t *p_str) const; + bool operator<(const String &p_str) const; bool operator<=(const String &p_str) const; @@ -189,7 +259,7 @@ public: signed char nocasecmp_to(const String &p_str) const; signed char naturalnocasecmp_to(const String &p_str) const; - const CharType *c_str() const; + const char32_t *get_data() const; /* standard size stuff */ _FORCE_INLINE_ int length() const { @@ -197,11 +267,13 @@ public: return s ? (s - 1) : 0; // length does not include zero } + bool is_valid_string() const; + /* complex helpers */ String substr(int p_from, int p_chars = -1) const; int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed int find(const char *p_str, int p_from = 0) const; ///< return <0 if failed - int find_char(const CharType &p_char, int p_from = 0) const; ///< return <0 if failed + int find_char(const char32_t &p_char, int p_from = 0) const; ///< return <0 if failed int findn(const String &p_str, int p_from = 0) const; ///< return <0 if failed, case insensitive int rfind(const String &p_str, int p_from = -1) const; ///< return <0 if failed int rfindn(const String &p_str, int p_from = -1) const; ///< return <0 if failed, case insensitive @@ -238,26 +310,31 @@ public: static String num_real(double p_num); static String num_int64(int64_t p_num, int base = 10, bool capitalize_hex = false); static String num_uint64(uint64_t p_num, int base = 10, bool capitalize_hex = false); - static String chr(CharType p_char); + static String chr(char32_t p_char); static String md5(const uint8_t *p_md5); static String hex_encode_buffer(const uint8_t *p_buffer, int p_len); bool is_numeric() const; - double to_float() const; + double to_float() const; int64_t hex_to_int(bool p_with_prefix = true) const; int64_t bin_to_int(bool p_with_prefix = true) const; int64_t to_int() const; + static int64_t to_int(const char *p_str, int p_len = -1); + static int64_t to_int(const wchar_t *p_str, int p_len = -1); + static int64_t to_int(const char32_t *p_str, int p_len = -1, bool p_clamp = false); + static double to_float(const char *p_str); - static double to_float(const CharType *p_str, const CharType **r_end = nullptr); - static int64_t to_int(const CharType *p_str, int p_len = -1, bool p_clamp = false); + static double to_float(const wchar_t *p_str, const wchar_t **r_end = nullptr); + static double to_float(const char32_t *p_str, const char32_t **r_end = nullptr); + String capitalize() const; String camelcase_to_underscore(bool lowercase = true) const; String get_with_code_lines() const; int get_slice_count(String p_splitter) const; String get_slice(String p_splitter, int p_slice) const; - String get_slicec(CharType p_splitter, int p_slice) const; + String get_slicec(char32_t p_splitter, int p_slice) const; Vector<String> split(const String &p_splitter, bool p_allow_empty = true, int p_maxsplit = 0) const; Vector<String> rsplit(const String &p_splitter, bool p_allow_empty = true, int p_maxsplit = 0) const; @@ -267,10 +344,10 @@ public: Vector<int> split_ints(const String &p_splitter, bool p_allow_empty = true) const; Vector<int> split_ints_mk(const Vector<String> &p_splitters, bool p_allow_empty = true) const; - String join(Vector<String> parts); + String join(Vector<String> parts) const; - static CharType char_uppercase(CharType p_char); - static CharType char_lowercase(CharType p_char); + static char32_t char_uppercase(char32_t p_char); + static char32_t char_lowercase(char32_t p_char); String to_upper() const; String to_lower() const; @@ -287,7 +364,7 @@ public: String get_extension() const; String get_basename() const; String plus_file(const String &p_file) const; - CharType ord_at(int p_idx) const; + char32_t ord_at(int p_idx) const; void erase(int p_pos, int p_chars); @@ -296,8 +373,14 @@ public: bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error static String utf8(const char *p_utf8, int p_len = -1); - static uint32_t hash(const CharType *p_cstr, int p_len); /* hash the string */ - static uint32_t hash(const CharType *p_cstr); /* hash the string */ + Char16String utf16() const; + bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error + static String utf16(const char16_t *p_utf16, int p_len = -1); + + static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */ + static uint32_t hash(const char32_t *p_cstr); /* hash the string */ + static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */ + static uint32_t hash(const wchar_t *p_cstr); /* hash the string */ static uint32_t hash(const char *p_cstr, int p_len); /* hash the string */ static uint32_t hash(const char *p_cstr); /* hash the string */ uint32_t hash() const; /* hash the string */ @@ -348,7 +431,7 @@ public: /** * The constructors must not depend on other overloads */ - /* String(CharType p_char);*/ + /* String(char32_t p_char);*/ _FORCE_INLINE_ String() {} _FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); } @@ -358,14 +441,20 @@ public: } String(const char *p_str); - String(const CharType *p_str, int p_clip_to_len = -1); + String(const wchar_t *p_str); + String(const char32_t *p_str); + String(const char *p_str, int p_clip_to_len); + String(const wchar_t *p_str, int p_clip_to_len); + String(const char32_t *p_str, int p_clip_to_len); String(const StrRange &p_range); }; bool operator==(const char *p_chr, const String &p_str); +bool operator==(const wchar_t *p_chr, const String &p_str); String operator+(const char *p_chr, const String &p_str); -String operator+(CharType p_chr, const String &p_str); +String operator+(const wchar_t *p_chr, const String &p_str); +String operator+(char32_t p_chr, const String &p_str); String itos(int64_t p_val); String uitos(uint64_t p_val); @@ -387,15 +476,18 @@ struct NaturalNoCaseComparator { template <typename L, typename R> _FORCE_INLINE_ bool is_str_less(const L *l_ptr, const R *r_ptr) { while (true) { - if (*l_ptr == 0 && *r_ptr == 0) { + const char32_t l = *l_ptr; + const char32_t r = *r_ptr; + + if (l == 0 && r == 0) { return false; - } else if (*l_ptr == 0) { + } else if (l == 0) { return true; - } else if (*r_ptr == 0) { + } else if (r == 0) { return false; - } else if (*l_ptr < *r_ptr) { + } else if (l < r) { return true; - } else if (*l_ptr > *r_ptr) { + } else if (l > r) { return false; } @@ -432,7 +524,7 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St String RTR(const String &p_text, const String &p_context = ""); String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = ""); -bool is_symbol(CharType c); +bool is_symbol(char32_t c); bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end); #endif // USTRING_H diff --git a/core/variant.cpp b/core/variant.cpp index c19ce79e64..181ced0f32 100644 --- a/core/variant.cpp +++ b/core/variant.cpp @@ -1558,7 +1558,7 @@ Variant::operator unsigned char() const { } } -Variant::operator CharType() const { +Variant::operator char32_t() const { return operator unsigned int(); } @@ -2445,7 +2445,7 @@ Variant::Variant(const char *const p_cstring) { memnew_placement(_data._mem, String((const char *)p_cstring)); } -Variant::Variant(const CharType *p_wstring) { +Variant::Variant(const char32_t *p_wstring) { type = STRING; memnew_placement(_data._mem, String(p_wstring)); } diff --git a/core/variant.h b/core/variant.h index 27a709b473..112003a7ae 100644 --- a/core/variant.h +++ b/core/variant.h @@ -246,7 +246,7 @@ public: operator ObjectID() const; - operator CharType() const; + operator char32_t() const; operator float() const; operator double() const; operator String() const; @@ -323,7 +323,7 @@ public: Variant(const String &p_string); Variant(const StringName &p_string); Variant(const char *const p_cstring); - Variant(const CharType *p_wstring); + Variant(const char32_t *p_wstring); Variant(const Vector2 &p_vector2); Variant(const Vector2i &p_vector2i); Variant(const Rect2 &p_rect2); diff --git a/core/variant_call.cpp b/core/variant_call.cpp index 91af127d32..7a1fdbaafe 100644 --- a/core/variant_call.cpp +++ b/core/variant_call.cpp @@ -239,6 +239,7 @@ struct _VariantCall { VCALL_LOCALMEM1R(String, casecmp_to); VCALL_LOCALMEM1R(String, nocasecmp_to); + VCALL_LOCALMEM1R(String, naturalnocasecmp_to); VCALL_LOCALMEM0R(String, length); VCALL_LOCALMEM3R(String, count); VCALL_LOCALMEM3R(String, countn); @@ -311,6 +312,8 @@ struct _VariantCall { VCALL_LOCALMEM0R(String, to_int); VCALL_LOCALMEM0R(String, to_float); VCALL_LOCALMEM0R(String, hex_to_int); + VCALL_LOCALMEM2R(String, lpad); + VCALL_LOCALMEM2R(String, rpad); VCALL_LOCALMEM1R(String, pad_decimals); VCALL_LOCALMEM1R(String, pad_zeros); VCALL_LOCALMEM1R(String, trim_prefix); @@ -350,6 +353,39 @@ struct _VariantCall { r_ret = retval; } + static void _call_String_to_utf16(Variant &r_ret, Variant &p_self, const Variant **p_args) { + String *s = reinterpret_cast<String *>(p_self._data._mem); + if (s->empty()) { + r_ret = PackedByteArray(); + return; + } + Char16String charstr = s->utf16(); + + PackedByteArray retval; + size_t len = charstr.length() * 2; + retval.resize(len); + uint8_t *w = retval.ptrw(); + copymem(w, (const void *)charstr.ptr(), len); + + r_ret = retval; + } + + static void _call_String_to_utf32(Variant &r_ret, Variant &p_self, const Variant **p_args) { + String *s = reinterpret_cast<String *>(p_self._data._mem); + if (s->empty()) { + r_ret = PackedByteArray(); + return; + } + + PackedByteArray retval; + size_t len = s->length() * 4; + retval.resize(len); + uint8_t *w = retval.ptrw(); + copymem(w, (const void *)s->ptr(), len); + + r_ret = retval; + } + VCALL_LOCALMEM1R(Vector2, distance_to); VCALL_LOCALMEM1R(Vector2, distance_squared_to); VCALL_LOCALMEM0R(Vector2, length); @@ -618,6 +654,26 @@ struct _VariantCall { r_ret = s; } + static void _call_PackedByteArray_get_string_from_utf16(Variant &r_ret, Variant &p_self, const Variant **p_args) { + PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem); + String s; + if (ba->size() > 0) { + const uint8_t *r = ba->ptr(); + s.parse_utf16((const char16_t *)r, ba->size() / 2); + } + r_ret = s; + } + + static void _call_PackedByteArray_get_string_from_utf32(Variant &r_ret, Variant &p_self, const Variant **p_args) { + PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem); + String s; + if (ba->size() > 0) { + const uint8_t *r = ba->ptr(); + s = String((const char32_t *)r, ba->size() / 4); + } + r_ret = s; + } + static void _call_PackedByteArray_compress(Variant &r_ret, Variant &p_self, const Variant **p_args) { PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem); PackedByteArray compressed; @@ -1789,6 +1845,7 @@ void register_variant_methods() { /* STRING */ ADDFUNC1R(STRING, INT, String, casecmp_to, STRING, "to", varray()); ADDFUNC1R(STRING, INT, String, nocasecmp_to, STRING, "to", varray()); + ADDFUNC1R(STRING, INT, String, naturalnocasecmp_to, STRING, "to", varray()); ADDFUNC0R(STRING, INT, String, length, varray()); ADDFUNC2R(STRING, STRING, String, substr, INT, "from", INT, "len", varray(-1)); @@ -1867,6 +1924,8 @@ void register_variant_methods() { ADDFUNC0R(STRING, INT, String, to_int, varray()); ADDFUNC0R(STRING, FLOAT, String, to_float, varray()); ADDFUNC0R(STRING, INT, String, hex_to_int, varray()); + ADDFUNC2R(STRING, STRING, String, lpad, INT, "min_length", STRING, "character", varray(" ")); + ADDFUNC2R(STRING, STRING, String, rpad, INT, "min_length", STRING, "character", varray(" ")); ADDFUNC1R(STRING, STRING, String, pad_decimals, INT, "digits", varray()); ADDFUNC1R(STRING, STRING, String, pad_zeros, INT, "digits", varray()); ADDFUNC1R(STRING, STRING, String, trim_prefix, STRING, "prefix", varray()); @@ -1874,6 +1933,8 @@ void register_variant_methods() { ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_ascii, varray()); ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf8, varray()); + ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf16, varray()); + ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf32, varray()); ADDFUNC0R(VECTOR2, FLOAT, Vector2, angle, varray()); ADDFUNC1R(VECTOR2, FLOAT, Vector2, angle_to, VECTOR2, "to", varray()); @@ -2109,6 +2170,8 @@ void register_variant_methods() { ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_ascii, varray()); ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf8, varray()); + ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf16, varray()); + ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf32, varray()); ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, hex_encode, varray()); ADDFUNC1R(PACKED_BYTE_ARRAY, PACKED_BYTE_ARRAY, PackedByteArray, compress, INT, "compression_mode", varray(0)); ADDFUNC2R(PACKED_BYTE_ARRAY, PACKED_BYTE_ARRAY, PackedByteArray, decompress, INT, "buffer_size", INT, "compression_mode", varray(0)); diff --git a/core/variant_op.cpp b/core/variant_op.cpp index 0cb2fe29a1..95b488230d 100644 --- a/core/variant_op.cpp +++ b/core/variant_op.cpp @@ -4215,7 +4215,7 @@ void Variant::interpolate(const Variant &a, const Variant &b, float c, Variant & int split = csize / 2; for (int i = 0; i < csize; i++) { - CharType chr = ' '; + char32_t chr = ' '; if (i < split) { if (i < sa.length()) { diff --git a/core/variant_parser.cpp b/core/variant_parser.cpp index 74f4f32c0e..3c4fed68fb 100644 --- a/core/variant_parser.cpp +++ b/core/variant_parser.cpp @@ -35,7 +35,7 @@ #include "core/os/keyboard.h" #include "core/string_buffer.h" -CharType VariantParser::StreamFile::get_char() { +char32_t VariantParser::StreamFile::get_char() { return f->get_8(); } @@ -47,7 +47,7 @@ bool VariantParser::StreamFile::is_eof() const { return f->eof_reached(); } -CharType VariantParser::StreamString::get_char() { +char32_t VariantParser::StreamString::get_char() { if (pos > s.length()) { return 0; } else if (pos == s.length()) { @@ -94,7 +94,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri bool string_name = false; while (true) { - CharType cchar; + char32_t cchar; if (p_stream->saved) { cchar = p_stream->saved; p_stream->saved = 0; @@ -145,7 +145,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri } case ';': { while (true) { - CharType ch = p_stream->get_char(); + char32_t ch = p_stream->get_char(); if (p_stream->is_eof()) { r_token.type = TK_EOF; return OK; @@ -173,7 +173,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri StringBuffer<> color_str; color_str += '#'; while (true) { - CharType ch = p_stream->get_char(); + char32_t ch = p_stream->get_char(); if (p_stream->is_eof()) { r_token.type = TK_EOF; return OK; @@ -204,7 +204,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri case '"': { String str; while (true) { - CharType ch = p_stream->get_char(); + char32_t ch = p_stream->get_char(); if (ch == 0) { r_err_str = "Unterminated String"; @@ -214,13 +214,13 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri break; } else if (ch == '\\') { //escaped characters... - CharType next = p_stream->get_char(); + char32_t next = p_stream->get_char(); if (next == 0) { r_err_str = "Unterminated String"; r_token.type = TK_ERROR; return ERR_PARSE_ERROR; } - CharType res = 0; + char32_t res = 0; switch (next) { case 'b': @@ -241,7 +241,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri case 'u': { //hex number for (int j = 0; j < 4; j++) { - CharType c = p_stream->get_char(); + char32_t c = p_stream->get_char(); if (c == 0) { r_err_str = "Unterminated String"; r_token.type = TK_ERROR; @@ -252,7 +252,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri r_token.type = TK_ERROR; return ERR_PARSE_ERROR; } - CharType v; + char32_t v; if (c >= '0' && c <= '9') { v = c - '0'; } else if (c >= 'a' && c <= 'f') { @@ -321,7 +321,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri cchar = p_stream->get_char(); } - CharType c = cchar; + char32_t c = cchar; bool exp_sign = false; bool exp_beg = false; bool is_float = false; @@ -421,7 +421,7 @@ Error VariantParser::_parse_enginecfg(Stream *p_stream, Vector<String> &strings, String accum; while (true) { - CharType c = p_stream->get_char(); + char32_t c = p_stream->get_char(); if (p_stream->is_eof()) { r_err_str = "Unexpected EOF while parsing old-style project.godot construct"; @@ -1206,7 +1206,7 @@ Error VariantParser::_parse_tag(Token &token, Stream *p_stream, int &line, Strin r_tag.fields.clear(); while (true) { - CharType c = p_stream->get_char(); + char32_t c = p_stream->get_char(); if (p_stream->is_eof()) { r_err_str = "Unexpected EOF while parsing simple tag"; return ERR_PARSE_ERROR; @@ -1305,7 +1305,7 @@ Error VariantParser::parse_tag_assign_eof(Stream *p_stream, int &line, String &r String what; while (true) { - CharType c; + char32_t c; if (p_stream->saved) { c = p_stream->saved; p_stream->saved = 0; @@ -1320,7 +1320,7 @@ Error VariantParser::parse_tag_assign_eof(Stream *p_stream, int &line, String &r if (c == ';') { //comment while (true) { - CharType ch = p_stream->get_char(); + char32_t ch = p_stream->get_char(); if (p_stream->is_eof()) { return ERR_FILE_EOF; } diff --git a/core/variant_parser.h b/core/variant_parser.h index b55d7b2df0..12329e2db6 100644 --- a/core/variant_parser.h +++ b/core/variant_parser.h @@ -38,11 +38,11 @@ class VariantParser { public: struct Stream { - virtual CharType get_char() = 0; + virtual char32_t get_char() = 0; virtual bool is_utf8() const = 0; virtual bool is_eof() const = 0; - CharType saved = 0; + char32_t saved = 0; Stream() {} virtual ~Stream() {} @@ -51,7 +51,7 @@ public: struct StreamFile : public Stream { FileAccess *f = nullptr; - virtual CharType get_char(); + virtual char32_t get_char(); virtual bool is_utf8() const; virtual bool is_eof() const; @@ -62,7 +62,7 @@ public: String s; int pos = 0; - virtual CharType get_char(); + virtual char32_t get_char(); virtual bool is_utf8() const; virtual bool is_eof() const; |