summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorbruvzg <7645683+bruvzg@users.noreply.github.com>2020-07-27 13:43:20 +0300
committerbruvzg <7645683+bruvzg@users.noreply.github.com>2020-09-03 19:56:24 +0300
commit80b8eff6aa41ba79175a5152ba5b2b9b16f6de3f (patch)
tree39ed96f7b9062e2f4ae1e20560fdb1f2f04c4d67 /core
parent0864f12f0de50ffecbc9964cdf4edbae75e27be5 (diff)
[Complex Test Layouts] Change `String` to use UTF-32 encoding on all platforms.
Diffstat (limited to 'core')
-rw-r--r--core/color.cpp4
-rw-r--r--core/cowdata.h2
-rw-r--r--core/hashfuncs.h2
-rw-r--r--core/io/file_access_pack.cpp2
-rw-r--r--core/io/file_access_zip.cpp4
-rw-r--r--core/io/ip_address.cpp4
-rw-r--r--core/io/json.cpp20
-rw-r--r--core/io/json.h8
-rw-r--r--core/io/xml_parser.cpp4
-rw-r--r--core/math/expression.cpp20
-rw-r--r--core/method_bind.h14
-rw-r--r--core/os/file_access.cpp8
-rw-r--r--core/string_buffer.h24
-rw-r--r--core/string_builder.cpp4
-rw-r--r--core/string_name.cpp2
-rw-r--r--core/string_name.h2
-rw-r--r--core/type_info.h3
-rw-r--r--core/ustring.cpp1114
-rw-r--r--core/ustring.h178
-rw-r--r--core/variant.cpp4
-rw-r--r--core/variant.h4
-rw-r--r--core/variant_call.cpp63
-rw-r--r--core/variant_op.cpp2
-rw-r--r--core/variant_parser.cpp30
-rw-r--r--core/variant_parser.h8
25 files changed, 1068 insertions, 462 deletions
diff --git a/core/color.cpp b/core/color.cpp
index c85cd9100d..c61ee0e64a 100644
--- a/core/color.cpp
+++ b/core/color.cpp
@@ -390,7 +390,7 @@ String _to_hex(float p_val) {
String ret;
for (int i = 0; i < 2; i++) {
- CharType c[2] = { 0, 0 };
+ char32_t c[2] = { 0, 0 };
int lv = v & 0xF;
if (lv < 10) {
c[0] = '0' + lv;
@@ -399,7 +399,7 @@ String _to_hex(float p_val) {
}
v >>= 4;
- String cs = (const CharType *)c;
+ String cs = (const char32_t *)c;
ret = cs + ret;
}
diff --git a/core/cowdata.h b/core/cowdata.h
index 82daefb5bd..79676e6d80 100644
--- a/core/cowdata.h
+++ b/core/cowdata.h
@@ -40,6 +40,7 @@
template <class T>
class Vector;
class String;
+class Char16String;
class CharString;
template <class T, class V>
class VMap;
@@ -49,6 +50,7 @@ class CowData {
template <class TV>
friend class Vector;
friend class String;
+ friend class Char16String;
friend class CharString;
template <class TV, class VV>
friend class VMap;
diff --git a/core/hashfuncs.h b/core/hashfuncs.h
index d984f6c524..f4048843fc 100644
--- a/core/hashfuncs.h
+++ b/core/hashfuncs.h
@@ -146,6 +146,8 @@ struct HashMapHasherDefault {
static _FORCE_INLINE_ uint32_t hash(const uint8_t p_int) { return p_int; }
static _FORCE_INLINE_ uint32_t hash(const int8_t p_int) { return (uint32_t)p_int; }
static _FORCE_INLINE_ uint32_t hash(const wchar_t p_wchar) { return (uint32_t)p_wchar; }
+ static _FORCE_INLINE_ uint32_t hash(const char16_t p_uchar) { return (uint32_t)p_uchar; }
+ static _FORCE_INLINE_ uint32_t hash(const char32_t p_uchar) { return (uint32_t)p_uchar; }
static _FORCE_INLINE_ uint32_t hash(const RID &p_rid) { return hash_one_uint64(p_rid.get_id()); }
static _FORCE_INLINE_ uint32_t hash(const StringName &p_string_name) { return p_string_name.hash(); }
diff --git a/core/io/file_access_pack.cpp b/core/io/file_access_pack.cpp
index 024ec3b2b5..cc8d68be83 100644
--- a/core/io/file_access_pack.cpp
+++ b/core/io/file_access_pack.cpp
@@ -46,7 +46,7 @@ Error PackedData::add_pack(const String &p_path, bool p_replace_files, size_t p_
void PackedData::add_path(const String &pkg_path, const String &path, uint64_t ofs, uint64_t size, const uint8_t *p_md5, PackSource *p_src, bool p_replace_files) {
PathMD5 pmd5(path.md5_buffer());
- //printf("adding path %ls, %lli, %lli\n", path.c_str(), pmd5.a, pmd5.b);
+ //printf("adding path %s, %lli, %lli\n", path.utf8().get_data(), pmd5.a, pmd5.b);
bool exists = files.has(pmd5);
diff --git a/core/io/file_access_zip.cpp b/core/io/file_access_zip.cpp
index 974bb65a18..d75ca2fdc6 100644
--- a/core/io/file_access_zip.cpp
+++ b/core/io/file_access_zip.cpp
@@ -148,7 +148,7 @@ unzFile ZipArchive::get_file_handle(String p_file) const {
}
bool ZipArchive::try_open_pack(const String &p_path, bool p_replace_files, size_t p_offset = 0) {
- //printf("opening zip pack %ls, %i, %i\n", p_name.c_str(), p_name.extension().nocasecmp_to("zip"), p_name.extension().nocasecmp_to("pcz"));
+ //printf("opening zip pack %s, %i, %i\n", p_name.utf8().get_data(), p_name.extension().nocasecmp_to("zip"), p_name.extension().nocasecmp_to("pcz"));
// load with offset feature only supported for PCK files
ERR_FAIL_COND_V_MSG(p_offset != 0, false, "Invalid PCK data. Note that loading files with a non-zero offset isn't supported with ZIP archives.");
@@ -201,7 +201,7 @@ bool ZipArchive::try_open_pack(const String &p_path, bool p_replace_files, size_
uint8_t md5[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
PackedData::get_singleton()->add_path(p_path, fname, 1, 0, md5, this, p_replace_files);
- //printf("packed data add path %ls, %ls\n", p_name.c_str(), fname.c_str());
+ //printf("packed data add path %s, %s\n", p_name.utf8().get_data(), fname.utf8().get_data());
if ((i + 1) < gi.number_entry) {
unzGoToNextFile(zfile);
diff --git a/core/io/ip_address.cpp b/core/io/ip_address.cpp
index c7a0ae5605..d0fb63b958 100644
--- a/core/io/ip_address.cpp
+++ b/core/io/ip_address.cpp
@@ -71,7 +71,7 @@ static void _parse_hex(const String &p_string, int p_start, uint8_t *p_dst) {
}
int n = 0;
- CharType c = p_string[i];
+ char32_t c = p_string[i];
if (c >= '0' && c <= '9') {
n = c - '0';
} else if (c >= 'a' && c <= 'f') {
@@ -101,7 +101,7 @@ void IP_Address::_parse_ipv6(const String &p_string) {
int parts_idx = 0;
for (int i = 0; i < p_string.length(); i++) {
- CharType c = p_string[i];
+ char32_t c = p_string[i];
if (c == ':') {
if (i == 0) {
continue; // next must be a ":"
diff --git a/core/io/json.cpp b/core/io/json.cpp
index 8bdd6385cb..1b89d966fd 100644
--- a/core/io/json.cpp
+++ b/core/io/json.cpp
@@ -125,7 +125,7 @@ String JSON::print(const Variant &p_var, const String &p_indent, bool p_sort_key
return _print_var(p_var, p_indent, 0, p_sort_keys);
}
-Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str) {
+Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str) {
while (p_len > 0) {
switch (p_str[index]) {
case '\n': {
@@ -180,12 +180,12 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to
} else if (p_str[index] == '\\') {
//escaped characters...
index++;
- CharType next = p_str[index];
+ char32_t next = p_str[index];
if (next == 0) {
r_err_str = "Unterminated String";
return ERR_PARSE_ERROR;
}
- CharType res = 0;
+ char32_t res = 0;
switch (next) {
case 'b':
@@ -206,7 +206,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to
case 'u': {
// hex number
for (int j = 0; j < 4; j++) {
- CharType c = p_str[index + j + 1];
+ char32_t c = p_str[index + j + 1];
if (c == 0) {
r_err_str = "Unterminated String";
return ERR_PARSE_ERROR;
@@ -215,7 +215,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to
r_err_str = "Malformed hex constant in string";
return ERR_PARSE_ERROR;
}
- CharType v;
+ char32_t v;
if (c >= '0' && c <= '9') {
v = c - '0';
} else if (c >= 'a' && c <= 'f') {
@@ -264,7 +264,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to
if (p_str[index] == '-' || (p_str[index] >= '0' && p_str[index] <= '9')) {
//a number
- const CharType *rptr;
+ const char32_t *rptr;
double number = String::to_float(&p_str[index], &rptr);
index += (rptr - &p_str[index]);
r_token.type = TK_NUMBER;
@@ -293,7 +293,7 @@ Error JSON::_get_token(const CharType *p_str, int &index, int p_len, Token &r_to
return ERR_PARSE_ERROR;
}
-Error JSON::_parse_value(Variant &value, Token &token, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) {
+Error JSON::_parse_value(Variant &value, Token &token, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) {
if (token.type == TK_CURLY_BRACKET_OPEN) {
Dictionary d;
Error err = _parse_object(d, p_str, index, p_len, line, r_err_str);
@@ -337,7 +337,7 @@ Error JSON::_parse_value(Variant &value, Token &token, const CharType *p_str, in
}
}
-Error JSON::_parse_array(Array &array, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) {
+Error JSON::_parse_array(Array &array, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) {
Token token;
bool need_comma = false;
@@ -375,7 +375,7 @@ Error JSON::_parse_array(Array &array, const CharType *p_str, int &index, int p_
return ERR_PARSE_ERROR;
}
-Error JSON::_parse_object(Dictionary &object, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str) {
+Error JSON::_parse_object(Dictionary &object, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str) {
bool at_key = true;
String key;
Token token;
@@ -439,7 +439,7 @@ Error JSON::_parse_object(Dictionary &object, const CharType *p_str, int &index,
}
Error JSON::parse(const String &p_json, Variant &r_ret, String &r_err_str, int &r_err_line) {
- const CharType *str = p_json.ptr();
+ const char32_t *str = p_json.ptr();
int idx = 0;
int len = p_json.length();
Token token;
diff --git a/core/io/json.h b/core/io/json.h
index 4fc5630a93..9122228163 100644
--- a/core/io/json.h
+++ b/core/io/json.h
@@ -65,10 +65,10 @@ class JSON {
static String _print_var(const Variant &p_var, const String &p_indent, int p_cur_indent, bool p_sort_keys);
- static Error _get_token(const CharType *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str);
- static Error _parse_value(Variant &value, Token &token, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str);
- static Error _parse_array(Array &array, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str);
- static Error _parse_object(Dictionary &object, const CharType *p_str, int &index, int p_len, int &line, String &r_err_str);
+ static Error _get_token(const char32_t *p_str, int &index, int p_len, Token &r_token, int &line, String &r_err_str);
+ static Error _parse_value(Variant &value, Token &token, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str);
+ static Error _parse_array(Array &array, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str);
+ static Error _parse_object(Dictionary &object, const char32_t *p_str, int &index, int p_len, int &line, String &r_err_str);
public:
static String print(const Variant &p_var, const String &p_indent = "", bool p_sort_keys = true);
diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp
index b11267b60f..fc75ac7d1e 100644
--- a/core/io/xml_parser.cpp
+++ b/core/io/xml_parser.cpp
@@ -36,7 +36,7 @@
VARIANT_ENUM_CAST(XMLParser::NodeType);
-static bool _equalsn(const CharType *str1, const CharType *str2, int len) {
+static bool _equalsn(const char32_t *str1, const char32_t *str2, int len) {
int i;
for (i = 0; i < len && str1[i] && str2[i]; ++i) {
if (str1[i] != str2[i]) {
@@ -64,7 +64,7 @@ String XMLParser::_replace_special_characters(const String &origstr) {
int specialChar = -1;
for (int i = 0; i < (int)special_characters.size(); ++i) {
- const CharType *p = &origstr[pos] + 1;
+ const char32_t *p = &origstr[pos] + 1;
if (_equalsn(&special_characters[i][1], p, special_characters[i].length() - 1)) {
specialChar = i;
diff --git a/core/math/expression.cpp b/core/math/expression.cpp
index 735a30f6cc..1040f9e0e4 100644
--- a/core/math/expression.cpp
+++ b/core/math/expression.cpp
@@ -596,7 +596,7 @@ void Expression::exec_func(BuiltinFunc p_func, const Variant **p_inputs, Variant
} break;
case TEXT_CHAR: {
- CharType result[2] = { *p_inputs[0], 0 };
+ char32_t result[2] = { *p_inputs[0], 0 };
*r_return = String(result);
@@ -739,7 +739,7 @@ void Expression::exec_func(BuiltinFunc p_func, const Variant **p_inputs, Variant
////////
-static bool _is_number(CharType c) {
+static bool _is_number(char32_t c) {
return (c >= '0' && c <= '9');
}
@@ -747,7 +747,7 @@ Error Expression::_get_token(Token &r_token) {
while (true) {
#define GET_CHAR() (str_ofs >= expression.length() ? 0 : expression[str_ofs++])
- CharType cchar = GET_CHAR();
+ char32_t cchar = GET_CHAR();
switch (cchar) {
case 0: {
@@ -900,7 +900,7 @@ Error Expression::_get_token(Token &r_token) {
case '"': {
String str;
while (true) {
- CharType ch = GET_CHAR();
+ char32_t ch = GET_CHAR();
if (ch == 0) {
_set_error("Unterminated String");
@@ -912,13 +912,13 @@ Error Expression::_get_token(Token &r_token) {
} else if (ch == '\\') {
//escaped characters...
- CharType next = GET_CHAR();
+ char32_t next = GET_CHAR();
if (next == 0) {
_set_error("Unterminated String");
r_token.type = TK_ERROR;
return ERR_PARSE_ERROR;
}
- CharType res = 0;
+ char32_t res = 0;
switch (next) {
case 'b':
@@ -939,7 +939,7 @@ Error Expression::_get_token(Token &r_token) {
case 'u': {
// hex number
for (int j = 0; j < 4; j++) {
- CharType c = GET_CHAR();
+ char32_t c = GET_CHAR();
if (c == 0) {
_set_error("Unterminated String");
@@ -951,7 +951,7 @@ Error Expression::_get_token(Token &r_token) {
r_token.type = TK_ERROR;
return ERR_PARSE_ERROR;
}
- CharType v;
+ char32_t v;
if (_is_number(c)) {
v = c - '0';
} else if (c >= 'a' && c <= 'f') {
@@ -992,7 +992,7 @@ Error Expression::_get_token(Token &r_token) {
break;
}
- CharType next_char = (str_ofs >= expression.length()) ? 0 : expression[str_ofs];
+ char32_t next_char = (str_ofs >= expression.length()) ? 0 : expression[str_ofs];
if (_is_number(cchar) || (cchar == '.' && _is_number(next_char))) {
//a number
@@ -1004,7 +1004,7 @@ Error Expression::_get_token(Token &r_token) {
#define READING_DONE 4
int reading = READING_INT;
- CharType c = cchar;
+ char32_t c = cchar;
bool exp_sign = false;
bool exp_beg = false;
bool is_float = false;
diff --git a/core/method_bind.h b/core/method_bind.h
index ff2c771f81..942e2e0036 100644
--- a/core/method_bind.h
+++ b/core/method_bind.h
@@ -181,18 +181,18 @@ VARIANT_ENUM_CAST(Variant::Type);
VARIANT_ENUM_CAST(Variant::Operator);
template <>
-struct VariantCaster<wchar_t> {
- static _FORCE_INLINE_ wchar_t cast(const Variant &p_variant) {
- return (wchar_t)p_variant.operator int();
+struct VariantCaster<char32_t> {
+ static _FORCE_INLINE_ char32_t cast(const Variant &p_variant) {
+ return (char32_t)p_variant.operator int();
}
};
#ifdef PTRCALL_ENABLED
template <>
-struct PtrToArg<wchar_t> {
- _FORCE_INLINE_ static wchar_t convert(const void *p_ptr) {
- return wchar_t(*reinterpret_cast<const int *>(p_ptr));
+struct PtrToArg<char32_t> {
+ _FORCE_INLINE_ static char32_t convert(const void *p_ptr) {
+ return char32_t(*reinterpret_cast<const int *>(p_ptr));
}
- _FORCE_INLINE_ static void encode(wchar_t p_val, const void *p_ptr) {
+ _FORCE_INLINE_ static void encode(char32_t p_val, const void *p_ptr) {
*(int *)p_ptr = p_val;
}
};
diff --git a/core/os/file_access.cpp b/core/os/file_access.cpp
index 20b3435911..9dbb2952f7 100644
--- a/core/os/file_access.cpp
+++ b/core/os/file_access.cpp
@@ -234,7 +234,7 @@ double FileAccess::get_double() const {
String FileAccess::get_token() const {
CharString token;
- CharType c = get_8();
+ char32_t c = get_8();
while (!eof_reached()) {
if (c <= ' ') {
@@ -299,7 +299,7 @@ public:
String FileAccess::get_line() const {
CharBuffer line;
- CharType c = get_8();
+ char32_t c = get_8();
while (!eof_reached()) {
if (c == '\n' || c == '\0') {
@@ -342,8 +342,8 @@ Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
bool in_quote = false;
String current;
for (int i = 0; i < l.length(); i++) {
- CharType c = l[i];
- CharType s[2] = { 0, 0 };
+ char32_t c = l[i];
+ char32_t s[2] = { 0, 0 };
if (!in_quote && c == p_delim[0]) {
strings.push_back(current);
diff --git a/core/string_buffer.h b/core/string_buffer.h
index f9cf31075a..a685720851 100644
--- a/core/string_buffer.h
+++ b/core/string_buffer.h
@@ -35,21 +35,21 @@
template <int SHORT_BUFFER_SIZE = 64>
class StringBuffer {
- CharType short_buffer[SHORT_BUFFER_SIZE];
+ char32_t short_buffer[SHORT_BUFFER_SIZE];
String buffer;
int string_length = 0;
- _FORCE_INLINE_ CharType *current_buffer_ptr() {
+ _FORCE_INLINE_ char32_t *current_buffer_ptr() {
return static_cast<String &>(buffer).empty() ? short_buffer : buffer.ptrw();
}
public:
- StringBuffer &append(CharType p_char);
+ StringBuffer &append(char32_t p_char);
StringBuffer &append(const String &p_string);
StringBuffer &append(const char *p_str);
- StringBuffer &append(const CharType *p_str, int p_clip_to_len = -1);
+ StringBuffer &append(const char32_t *p_str, int p_clip_to_len = -1);
- _FORCE_INLINE_ void operator+=(CharType p_char) {
+ _FORCE_INLINE_ void operator+=(char32_t p_char) {
append(p_char);
}
@@ -61,7 +61,7 @@ public:
append(p_str);
}
- _FORCE_INLINE_ void operator+=(const CharType *p_str) {
+ _FORCE_INLINE_ void operator+=(const char32_t *p_str) {
append(p_str);
}
@@ -80,7 +80,7 @@ public:
};
template <int SHORT_BUFFER_SIZE>
-StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(CharType p_char) {
+StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(char32_t p_char) {
reserve(string_length + 2);
current_buffer_ptr()[string_length++] = p_char;
return *this;
@@ -88,7 +88,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(CharTyp
template <int SHORT_BUFFER_SIZE>
StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const String &p_string) {
- return append(p_string.c_str());
+ return append(p_string.get_data());
}
template <int SHORT_BUFFER_SIZE>
@@ -96,7 +96,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const c
int len = strlen(p_str);
reserve(string_length + len + 1);
- CharType *buf = current_buffer_ptr();
+ char32_t *buf = current_buffer_ptr();
for (const char *c_ptr = p_str; *c_ptr; ++c_ptr) {
buf[string_length++] = *c_ptr;
}
@@ -104,13 +104,13 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const c
}
template <int SHORT_BUFFER_SIZE>
-StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const CharType *p_str, int p_clip_to_len) {
+StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::append(const char32_t *p_str, int p_clip_to_len) {
int len = 0;
while ((p_clip_to_len < 0 || len < p_clip_to_len) && p_str[len]) {
++len;
}
reserve(string_length + len + 1);
- memcpy(&(current_buffer_ptr()[string_length]), p_str, len * sizeof(CharType));
+ memcpy(&(current_buffer_ptr()[string_length]), p_str, len * sizeof(char32_t));
string_length += len;
return *this;
@@ -125,7 +125,7 @@ StringBuffer<SHORT_BUFFER_SIZE> &StringBuffer<SHORT_BUFFER_SIZE>::reserve(int p_
bool need_copy = string_length > 0 && buffer.empty();
buffer.resize(next_power_of_2(p_size));
if (need_copy) {
- memcpy(buffer.ptrw(), short_buffer, string_length * sizeof(CharType));
+ memcpy(buffer.ptrw(), short_buffer, string_length * sizeof(char32_t));
}
return *this;
diff --git a/core/string_builder.cpp b/core/string_builder.cpp
index c8d6498f27..dec299ffa3 100644
--- a/core/string_builder.cpp
+++ b/core/string_builder.cpp
@@ -61,7 +61,7 @@ String StringBuilder::as_string() const {
return "";
}
- CharType *buffer = memnew_arr(CharType, string_length);
+ char32_t *buffer = memnew_arr(char32_t, string_length);
int current_position = 0;
@@ -73,7 +73,7 @@ String StringBuilder::as_string() const {
// Godot string
const String &s = strings[godot_string_elem];
- memcpy(buffer + current_position, s.ptr(), s.length() * sizeof(CharType));
+ memcpy(buffer + current_position, s.ptr(), s.length() * sizeof(char32_t));
current_position += s.length();
diff --git a/core/string_name.cpp b/core/string_name.cpp
index cbf6009681..6260e3ce8c 100644
--- a/core/string_name.cpp
+++ b/core/string_name.cpp
@@ -317,7 +317,7 @@ StringName StringName::search(const char *p_name) {
return StringName(); //does not exist
}
-StringName StringName::search(const CharType *p_name) {
+StringName StringName::search(const char32_t *p_name) {
ERR_FAIL_COND_V(!configured, StringName());
ERR_FAIL_COND_V(!p_name, StringName());
diff --git a/core/string_name.h b/core/string_name.h
index 886ddd0ee7..4f90479bda 100644
--- a/core/string_name.h
+++ b/core/string_name.h
@@ -122,7 +122,7 @@ public:
}
static StringName search(const char *p_name);
- static StringName search(const CharType *p_name);
+ static StringName search(const char32_t *p_name);
static StringName search(const String &p_name);
struct AlphCompare {
diff --git a/core/type_info.h b/core/type_info.h
index e3d2b5bd53..3c7f59bb84 100644
--- a/core/type_info.h
+++ b/core/type_info.h
@@ -132,7 +132,8 @@ MAKE_TYPE_INFO_WITH_META(uint32_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_
MAKE_TYPE_INFO_WITH_META(int32_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_INT32)
MAKE_TYPE_INFO_WITH_META(uint64_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_UINT64)
MAKE_TYPE_INFO_WITH_META(int64_t, Variant::INT, GodotTypeInfo::METADATA_INT_IS_INT64)
-MAKE_TYPE_INFO(wchar_t, Variant::INT)
+MAKE_TYPE_INFO(char16_t, Variant::INT)
+MAKE_TYPE_INFO(char32_t, Variant::INT)
MAKE_TYPE_INFO_WITH_META(float, Variant::FLOAT, GodotTypeInfo::METADATA_REAL_IS_FLOAT)
MAKE_TYPE_INFO_WITH_META(double, Variant::FLOAT, GodotTypeInfo::METADATA_REAL_IS_DOUBLE)
diff --git a/core/ustring.cpp b/core/ustring.cpp
index 9d2d938eaf..d5afbc2b47 100644
--- a/core/ustring.cpp
+++ b/core/ustring.cpp
@@ -39,7 +39,6 @@
#include "core/ucaps.h"
#include "core/variant.h"
-#include <wchar.h>
#include <cstdint>
#ifndef NO_USE_STDLIB
@@ -62,9 +61,10 @@
#define IS_HEX_DIGIT(m_d) (((m_d) >= '0' && (m_d) <= '9') || ((m_d) >= 'a' && (m_d) <= 'f') || ((m_d) >= 'A' && (m_d) <= 'F'))
const char CharString::_null = 0;
-const CharType String::_null = 0;
+const char16_t Char16String::_null = 0;
+const char32_t String::_null = 0;
-bool is_symbol(CharType c) {
+bool is_symbol(char32_t c) {
return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
}
@@ -96,9 +96,11 @@ bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
}
}
-/** STRING **/
+/*************************************************************************/
+/* Char16String */
+/*************************************************************************/
-bool CharString::operator<(const CharString &p_right) const {
+bool Char16String::operator<(const Char16String &p_right) const {
if (length() == 0) {
return p_right.length() != 0;
}
@@ -106,7 +108,7 @@ bool CharString::operator<(const CharString &p_right) const {
return is_str_less(get_data(), p_right.get_data());
}
-CharString &CharString::operator+=(char p_char) {
+Char16String &Char16String::operator+=(char16_t p_char) {
resize(size() ? size() + 1 : 2);
set(length(), 0);
set(length() - 1, p_char);
@@ -114,19 +116,75 @@ CharString &CharString::operator+=(char p_char) {
return *this;
}
-const char *CharString::get_data() const {
+Char16String &Char16String::operator=(const char16_t *p_cstr) {
+ copy_from(p_cstr);
+ return *this;
+}
+
+const char16_t *Char16String::get_data() const {
if (size()) {
return &operator[](0);
} else {
- return "";
+ return u"";
}
}
+void Char16String::copy_from(const char16_t *p_cstr) {
+ if (!p_cstr) {
+ resize(0);
+ return;
+ }
+
+ const char16_t *s = p_cstr;
+ for (; *s; s++) {
+ }
+ size_t len = s - p_cstr;
+
+ if (len == 0) {
+ resize(0);
+ return;
+ }
+
+ Error err = resize(++len); // include terminating null char
+
+ ERR_FAIL_COND_MSG(err != OK, "Failed to copy char16_t string.");
+
+ memcpy(ptrw(), p_cstr, len * sizeof(char16_t));
+}
+
+/*************************************************************************/
+/* CharString */
+/*************************************************************************/
+
+bool CharString::operator<(const CharString &p_right) const {
+ if (length() == 0) {
+ return p_right.length() != 0;
+ }
+
+ return is_str_less(get_data(), p_right.get_data());
+}
+
+CharString &CharString::operator+=(char p_char) {
+ resize(size() ? size() + 1 : 2);
+ set(length(), 0);
+ set(length() - 1, p_char);
+
+ return *this;
+}
+
CharString &CharString::operator=(const char *p_cstr) {
copy_from(p_cstr);
return *this;
}
+const char *CharString::get_data() const {
+ if (size()) {
+ return &operator[](0);
+ } else {
+ return "";
+ }
+}
+
void CharString::copy_from(const char *p_cstr) {
if (!p_cstr) {
resize(0);
@@ -147,7 +205,44 @@ void CharString::copy_from(const char *p_cstr) {
memcpy(ptrw(), p_cstr, len);
}
+/*************************************************************************/
+/* String */
+/*************************************************************************/
+
+//TODO: move to TextServer
+//kind of poor should be rewritten properly
+String String::word_wrap(int p_chars_per_line) const {
+ int from = 0;
+ int last_space = 0;
+ String ret;
+ for (int i = 0; i < length(); i++) {
+ if (i - from >= p_chars_per_line) {
+ if (last_space == -1) {
+ ret += substr(from, i - from + 1) + "\n";
+ } else {
+ ret += substr(from, last_space - from) + "\n";
+ i = last_space; //rewind
+ }
+ from = i + 1;
+ last_space = -1;
+ } else if (operator[](i) == ' ' || operator[](i) == '\t') {
+ last_space = i;
+ } else if (operator[](i) == '\n') {
+ ret += substr(from, i - from) + "\n";
+ from = i + 1;
+ last_space = -1;
+ }
+ }
+
+ if (from < length()) {
+ ret += substr(from, length());
+ }
+
+ return ret;
+}
+
void String::copy_from(const char *p_cstr) {
+ // copy Latin-1 encoded c-string directly
if (!p_cstr) {
resize(0);
return;
@@ -166,21 +261,22 @@ void String::copy_from(const char *p_cstr) {
resize(len + 1); // include 0
- CharType *dst = this->ptrw();
+ char32_t *dst = this->ptrw();
for (int i = 0; i < len + 1; i++) {
dst[i] = p_cstr[i];
}
}
-void String::copy_from(const CharType *p_cstr, const int p_clip_to) {
+void String::copy_from(const char *p_cstr, const int p_clip_to) {
+ // copy Latin-1 encoded c-string directly
if (!p_cstr) {
resize(0);
return;
}
int len = 0;
- const CharType *ptr = p_cstr;
+ const char *ptr = p_cstr;
while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
len++;
}
@@ -190,55 +286,117 @@ void String::copy_from(const CharType *p_cstr, const int p_clip_to) {
return;
}
- copy_from_unchecked(p_cstr, len);
-}
-
-// assumes the following have already been validated:
-// p_char != nullptr
-// p_length > 0
-// p_length <= p_char strlen
-void String::copy_from_unchecked(const CharType *p_char, const int p_length) {
- resize(p_length + 1);
- set(p_length, 0);
+ resize(len + 1); // include 0
- CharType *dst = ptrw();
+ char32_t *dst = this->ptrw();
- for (int i = 0; i < p_length; i++) {
- dst[i] = p_char[i];
+ for (int i = 0; i < len; i++) {
+ dst[i] = p_cstr[i];
}
+ dst[len] = 0;
}
-void String::copy_from(const CharType &p_char) {
+void String::copy_from(const wchar_t *p_cstr) {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit, parse as UTF-16
+ parse_utf16((const char16_t *)p_cstr);
+#else
+ // wchar_t is 32-bit, copy directly
+ copy_from((const char32_t *)p_cstr);
+#endif
+}
+
+void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit, parse as UTF-16
+ parse_utf16((const char16_t *)p_cstr, p_clip_to);
+#else
+ // wchar_t is 32-bit, copy directly
+ copy_from((const char32_t *)p_cstr, p_clip_to);
+#endif
+}
+
+void String::copy_from(const char32_t &p_char) {
resize(2);
- set(0, p_char);
+ if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
+ set(0, 0xfffd);
+ } else {
+ set(0, p_char);
+ }
set(1, 0);
}
-bool String::operator==(const String &p_str) const {
- if (length() != p_str.length()) {
- return false;
+void String::copy_from(const char32_t *p_cstr) {
+ if (!p_cstr) {
+ resize(0);
+ return;
}
- if (empty()) {
- return true;
+
+ int len = 0;
+ const char32_t *ptr = p_cstr;
+ while (*(ptr++) != 0) {
+ len++;
}
- int l = length();
+ if (len == 0) {
+ resize(0);
+ return;
+ }
- const CharType *src = c_str();
- const CharType *dst = p_str.c_str();
+ copy_from_unchecked(p_cstr, len);
+}
- /* Compare char by char */
- for (int i = 0; i < l; i++) {
- if (src[i] != dst[i]) {
- return false;
+void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
+ if (!p_cstr) {
+ resize(0);
+ return;
+ }
+
+ int len = 0;
+ const char32_t *ptr = p_cstr;
+ while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
+ len++;
+ }
+
+ if (len == 0) {
+ resize(0);
+ return;
+ }
+
+ copy_from_unchecked(p_cstr, len);
+}
+
+// assumes the following have already been validated:
+// p_char != nullptr
+// p_length > 0
+// p_length <= p_char strlen
+void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
+ resize(p_length + 1);
+ set(p_length, 0);
+
+ char32_t *dst = ptrw();
+
+ for (int i = 0; i < p_length; i++) {
+ if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
+ dst[i] = 0xfffd;
+ } else {
+ dst[i] = p_char[i];
}
}
+}
- return true;
+void String::operator=(const char *p_str) {
+ copy_from(p_str);
}
-bool String::operator!=(const String &p_str) const {
- return !(*this == p_str);
+void String::operator=(const char32_t *p_str) {
+ copy_from(p_str);
+}
+
+void String::operator=(const wchar_t *p_str) {
+ copy_from(p_str);
}
String String::operator+(const String &p_str) const {
@@ -247,6 +405,28 @@ String String::operator+(const String &p_str) const {
return res;
}
+String operator+(const char *p_chr, const String &p_str) {
+ String tmp = p_chr;
+ tmp += p_str;
+ return tmp;
+}
+
+String operator+(const wchar_t *p_chr, const String &p_str) {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit
+ String tmp = String::utf16((const char16_t *)p_chr);
+#else
+ // wchar_t is 32-bi
+ String tmp = (const char32_t *)p_chr;
+#endif
+ tmp += p_str;
+ return tmp;
+}
+
+String operator+(char32_t p_chr, const String &p_str) {
+ return (String::chr(p_chr) + p_str);
+}
+
String &String::operator+=(const String &p_str) {
if (empty()) {
*this = p_str;
@@ -261,8 +441,8 @@ String &String::operator+=(const String &p_str) {
resize(length() + p_str.size());
- const CharType *src = p_str.c_str();
- CharType *dst = ptrw();
+ const char32_t *src = p_str.get_data();
+ char32_t *dst = ptrw();
set(length(), 0);
@@ -273,19 +453,6 @@ String &String::operator+=(const String &p_str) {
return *this;
}
-String &String::operator+=(const CharType *p_str) {
- *this += String(p_str);
- return *this;
-}
-
-String &String::operator+=(CharType p_char) {
- resize(size() ? size() + 1 : 2);
- set(length(), 0);
- set(length() - 1, p_char);
-
- return *this;
-}
-
String &String::operator+=(const char *p_str) {
if (!p_str || p_str[0] == 0) {
return *this;
@@ -301,7 +468,7 @@ String &String::operator+=(const char *p_str) {
resize(from + src_len + 1);
- CharType *dst = ptrw();
+ char32_t *dst = ptrw();
set(length(), 0);
@@ -312,16 +479,43 @@ String &String::operator+=(const char *p_str) {
return *this;
}
-void String::operator=(const char *p_str) {
- copy_from(p_str);
+String &String::operator+=(const wchar_t *p_str) {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit
+ *this += String::utf16((const char16_t *)p_str);
+#else
+ // wchar_t is 32-bit
+ *this += String((const char32_t *)p_str);
+#endif
+ return *this;
}
-void String::operator=(const CharType *p_str) {
- copy_from(p_str);
+String &String::operator+=(const char32_t *p_str) {
+ *this += String(p_str);
+ return *this;
}
-bool String::operator==(const StrRange &p_str_range) const {
- int len = p_str_range.len;
+String &String::operator+=(char32_t p_char) {
+ resize(size() ? size() + 1 : 2);
+ set(length(), 0);
+ if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
+ set(length() - 1, 0xfffd);
+ } else {
+ set(length() - 1, p_char);
+ }
+
+ return *this;
+}
+
+bool String::operator==(const char *p_str) const {
+ // compare Latin-1 encoded c-string
+ int len = 0;
+ const char *aux = p_str;
+
+ while (*(aux++) != 0) {
+ len++;
+ }
if (length() != len) {
return false;
@@ -330,12 +524,13 @@ bool String::operator==(const StrRange &p_str_range) const {
return true;
}
- const CharType *c_str = p_str_range.c_str;
- const CharType *dst = &operator[](0);
+ int l = length();
- /* Compare char by char */
- for (int i = 0; i < len; i++) {
- if (c_str[i] != dst[i]) {
+ const char32_t *dst = get_data();
+
+ // Compare char by char
+ for (int i = 0; i < l; i++) {
+ if ((char32_t)p_str[i] != dst[i]) {
return false;
}
}
@@ -343,9 +538,19 @@ bool String::operator==(const StrRange &p_str_range) const {
return true;
}
-bool String::operator==(const char *p_str) const {
+bool String::operator==(const wchar_t *p_str) const {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit, parse as UTF-16
+ return *this == String::utf16((const char16_t *)p_str);
+#else
+ // wchar_t is 32-bit, compare char by char
+ return *this == (const char32_t *)p_str;
+#endif
+}
+
+bool String::operator==(const char32_t *p_str) const {
int len = 0;
- const char *aux = p_str;
+ const char32_t *aux = p_str;
while (*(aux++) != 0) {
len++;
@@ -360,7 +565,7 @@ bool String::operator==(const char *p_str) const {
int l = length();
- const CharType *dst = c_str();
+ const char32_t *dst = get_data();
/* Compare char by char */
for (int i = 0; i < l; i++) {
@@ -372,14 +577,32 @@ bool String::operator==(const char *p_str) const {
return true;
}
-bool String::operator==(const CharType *p_str) const {
- int len = 0;
- const CharType *aux = p_str;
+bool String::operator==(const String &p_str) const {
+ if (length() != p_str.length()) {
+ return false;
+ }
+ if (empty()) {
+ return true;
+ }
- while (*(aux++) != 0) {
- len++;
+ int l = length();
+
+ const char32_t *src = get_data();
+ const char32_t *dst = p_str.get_data();
+
+ /* Compare char by char */
+ for (int i = 0; i < l; i++) {
+ if (src[i] != dst[i]) {
+ return false;
+ }
}
+ return true;
+}
+
+bool String::operator==(const StrRange &p_str_range) const {
+ int len = p_str_range.len;
+
if (length() != len) {
return false;
}
@@ -387,13 +610,12 @@ bool String::operator==(const CharType *p_str) const {
return true;
}
- int l = length();
-
- const CharType *dst = c_str();
+ const char32_t *c_str = p_str_range.c_str;
+ const char32_t *dst = &operator[](0);
/* Compare char by char */
- for (int i = 0; i < l; i++) {
- if (p_str[i] != dst[i]) {
+ for (int i = 0; i < len; i++) {
+ if (c_str[i] != dst[i]) {
return false;
}
}
@@ -401,30 +623,68 @@ bool String::operator==(const CharType *p_str) const {
return true;
}
+bool operator==(const char *p_chr, const String &p_str) {
+ return p_str == p_chr;
+}
+
+bool operator==(const wchar_t *p_chr, const String &p_str) {
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit
+ return p_str == String::utf16((const char16_t *)p_chr);
+#else
+ // wchar_t is 32-bi
+ return p_str == String((const char32_t *)p_chr);
+#endif
+}
+
bool String::operator!=(const char *p_str) const {
return (!(*this == p_str));
}
-bool String::operator!=(const CharType *p_str) const {
+bool String::operator!=(const wchar_t *p_str) const {
return (!(*this == p_str));
}
-bool String::operator<(const CharType *p_str) const {
+bool String::operator!=(const char32_t *p_str) const {
+ return (!(*this == p_str));
+}
+
+bool String::operator!=(const String &p_str) const {
+ return !((*this == p_str));
+}
+
+bool String::operator<=(const String &p_str) const {
+ return (*this < p_str) || (*this == p_str);
+}
+
+bool String::operator<(const char *p_str) const {
if (empty() && p_str[0] == 0) {
return false;
}
if (empty()) {
return true;
}
-
- return is_str_less(c_str(), p_str);
+ return is_str_less(get_data(), p_str);
}
-bool String::operator<=(const String &p_str) const {
- return (*this < p_str) || (*this == p_str);
+bool String::operator<(const wchar_t *p_str) const {
+ if (empty() && p_str[0] == 0) {
+ return false;
+ }
+ if (empty()) {
+ return true;
+ }
+
+#ifdef WINDOWS_ENABLED
+ // wchar_t is 16-bit
+ return is_str_less(get_data(), String::utf16((const char16_t *)p_str).get_data());
+#else
+ // wchar_t is 32-bit
+ return is_str_less(get_data(), (const char32_t *)p_str);
+#endif
}
-bool String::operator<(const char *p_str) const {
+bool String::operator<(const char32_t *p_str) const {
if (empty() && p_str[0] == 0) {
return false;
}
@@ -432,11 +692,11 @@ bool String::operator<(const char *p_str) const {
return true;
}
- return is_str_less(c_str(), p_str);
+ return is_str_less(get_data(), p_str);
}
bool String::operator<(const String &p_str) const {
- return operator<(p_str.c_str());
+ return operator<(p_str.get_data());
}
signed char String::nocasecmp_to(const String &p_str) const {
@@ -450,8 +710,8 @@ signed char String::nocasecmp_to(const String &p_str) const {
return 1;
}
- const CharType *that_str = p_str.c_str();
- const CharType *this_str = c_str();
+ const char32_t *that_str = p_str.get_data();
+ const char32_t *this_str = get_data();
while (true) {
if (*that_str == 0 && *this_str == 0) {
@@ -482,8 +742,8 @@ signed char String::casecmp_to(const String &p_str) const {
return 1;
}
- const CharType *that_str = p_str.c_str();
- const CharType *this_str = c_str();
+ const char32_t *that_str = p_str.get_data();
+ const char32_t *this_str = get_data();
while (true) {
if (*that_str == 0 && *this_str == 0) {
@@ -504,8 +764,8 @@ signed char String::casecmp_to(const String &p_str) const {
}
signed char String::naturalnocasecmp_to(const String &p_str) const {
- const CharType *this_str = c_str();
- const CharType *that_str = p_str.c_str();
+ const char32_t *this_str = get_data();
+ const char32_t *that_str = p_str.get_data();
if (this_str && that_str) {
while (*this_str == '.' || *that_str == '.') {
@@ -571,6 +831,11 @@ signed char String::naturalnocasecmp_to(const String &p_str) const {
return 0;
}
+const char32_t *String::get_data() const {
+ static const char32_t zero = 0;
+ return size() ? &operator[](0) : &zero;
+}
+
void String::erase(int p_pos, int p_chars) {
*this = left(p_pos) + substr(p_pos + p_chars, length() - ((p_pos + p_chars)));
}
@@ -593,7 +858,7 @@ String String::capitalize() const {
}
String String::camelcase_to_underscore(bool lowercase) const {
- const CharType *cstr = c_str();
+ const char32_t *cstr = get_data();
String new_string;
const char A = 'A', Z = 'Z';
const char a = 'a', z = 'z';
@@ -705,7 +970,7 @@ String String::get_slice(String p_splitter, int p_slice) const {
return ""; //no find!
}
-String String::get_slicec(CharType p_splitter, int p_slice) const {
+String String::get_slicec(char32_t p_splitter, int p_slice) const {
if (empty()) {
return String();
}
@@ -714,7 +979,7 @@ String String::get_slicec(CharType p_splitter, int p_slice) const {
return String();
}
- const CharType *c = this->ptr();
+ const char32_t *c = this->ptr();
int i = 0;
int prev = 0;
int count = 0;
@@ -851,7 +1116,7 @@ Vector<float> String::split_floats(const String &p_splitter, bool p_allow_empty)
end = len;
}
if (p_allow_empty || (end > from)) {
- ret.push_back(String::to_float(&c_str()[from]));
+ ret.push_back(String::to_float(&get_data()[from]));
}
if (end == len) {
@@ -880,7 +1145,7 @@ Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_
}
if (p_allow_empty || (end > from)) {
- ret.push_back(String::to_float(&c_str()[from]));
+ ret.push_back(String::to_float(&get_data()[from]));
}
if (end == len) {
@@ -904,7 +1169,7 @@ Vector<int> String::split_ints(const String &p_splitter, bool p_allow_empty) con
end = len;
}
if (p_allow_empty || (end > from)) {
- ret.push_back(String::to_int(&c_str()[from], end - from));
+ ret.push_back(String::to_int(&get_data()[from], end - from));
}
if (end == len) {
@@ -933,7 +1198,7 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo
}
if (p_allow_empty || (end > from)) {
- ret.push_back(String::to_int(&c_str()[from], end - from));
+ ret.push_back(String::to_int(&get_data()[from], end - from));
}
if (end == len) {
@@ -946,7 +1211,7 @@ Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allo
return ret;
}
-String String::join(Vector<String> parts) {
+String String::join(Vector<String> parts) const {
String ret;
for (int i = 0; i < parts.size(); ++i) {
if (i > 0) {
@@ -957,11 +1222,11 @@ String String::join(Vector<String> parts) {
return ret;
}
-CharType String::char_uppercase(CharType p_char) {
+char32_t String::char_uppercase(char32_t p_char) {
return _find_upper(p_char);
}
-CharType String::char_lowercase(CharType p_char) {
+char32_t String::char_lowercase(char32_t p_char) {
return _find_lower(p_char);
}
@@ -969,8 +1234,8 @@ String String::to_upper() const {
String upper = *this;
for (int i = 0; i < upper.size(); i++) {
- const CharType s = upper[i];
- const CharType t = _find_upper(s);
+ const char32_t s = upper[i];
+ const char32_t t = _find_upper(s);
if (s != t) { // avoid copy on write
upper[i] = t;
}
@@ -983,8 +1248,8 @@ String String::to_lower() const {
String lower = *this;
for (int i = 0; i < lower.size(); i++) {
- const CharType s = lower[i];
- const CharType t = _find_lower(s);
+ const char32_t s = lower[i];
+ const char32_t t = _find_lower(s);
if (s != t) { // avoid copy on write
lower[i] = t;
}
@@ -993,34 +1258,8 @@ String String::to_lower() const {
return lower;
}
-const CharType *String::c_str() const {
- static const CharType zero = 0;
-
- return size() ? &operator[](0) : &zero;
-}
-
-String String::md5(const uint8_t *p_md5) {
- return String::hex_encode_buffer(p_md5, 16);
-}
-
-String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
- static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
-
- String ret;
- char v[2] = { 0, 0 };
-
- for (int i = 0; i < p_len; i++) {
- v[0] = hex[p_buffer[i] >> 4];
- ret += v;
- v[0] = hex[p_buffer[i] & 0xF];
- ret += v;
- }
-
- return ret;
-}
-
-String String::chr(CharType p_char) {
- CharType c[2] = { p_char, 0 };
+String String::chr(char32_t p_char) {
+ char32_t c[2] = { p_char, 0 };
return String(c);
}
@@ -1028,6 +1267,14 @@ String String::num(double p_num, int p_decimals) {
if (Math::is_nan(p_num)) {
return "nan";
}
+
+ if (Math::is_inf(p_num)) {
+ if (signbit(p_num)) {
+ return "-inf";
+ } else {
+ return "inf";
+ }
+ }
#ifndef NO_USE_STDLIB
if (p_decimals > 16) {
@@ -1106,7 +1353,7 @@ String String::num(double p_num, int p_decimals) {
/* decimal part */
if (p_decimals > 0 || (p_decimals == -1 && (int)p_num != p_num)) {
- double dec = p_num - (float)((int)p_num);
+ double dec = p_num - (double)((int)p_num);
int digit = 0;
if (p_decimals > MAX_DIGITS)
@@ -1125,7 +1372,7 @@ String String::num(double p_num, int p_decimals) {
if (digit == MAX_DIGITS) //no point in going to infinite
break;
- if ((dec - (float)((int)dec)) < 1e-6)
+ if ((dec - (double)((int)dec)) < 1e-6)
break;
}
@@ -1159,7 +1406,7 @@ String String::num(double p_num, int p_decimals) {
s = "0";
else {
while (intn) {
- CharType num = '0' + (intn % 10);
+ char32_t num = '0' + (intn % 10);
intn /= 10;
s = num + s;
}
@@ -1188,7 +1435,7 @@ String String::num_int64(int64_t p_num, int base, bool capitalize_hex) {
}
String s;
s.resize(chars + 1);
- CharType *c = s.ptrw();
+ char32_t *c = s.ptrw();
c[chars] = 0;
n = p_num;
do {
@@ -1221,7 +1468,7 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
String s;
s.resize(chars + 1);
- CharType *c = s.ptrw();
+ char32_t *c = s.ptrw();
c[chars] = 0;
n = p_num;
do {
@@ -1240,6 +1487,18 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
}
String String::num_real(double p_num) {
+ if (Math::is_nan(p_num)) {
+ return "nan";
+ }
+
+ if (Math::is_inf(p_num)) {
+ if (signbit(p_num)) {
+ return "-inf";
+ } else {
+ return "inf";
+ }
+ }
+
String s;
String sd;
/* integer part */
@@ -1251,7 +1510,7 @@ String String::num_real(double p_num) {
/* decimal part */
if ((int)p_num != p_num) {
- double dec = p_num - (float)((int)p_num);
+ double dec = p_num - (double)((int)p_num);
int digit = 0;
int decimals = MAX_DIGITS;
@@ -1265,7 +1524,7 @@ String String::num_real(double p_num) {
dec_max = dec_max * 10 + 9;
digit++;
- if ((dec - (float)((int)dec)) < 1e-6) {
+ if ((dec - (double)((int)dec)) < 1e-6) {
break;
}
@@ -1302,7 +1561,7 @@ String String::num_real(double p_num) {
s = "0";
} else {
while (intn) {
- CharType num = '0' + (intn % 10);
+ char32_t num = '0' + (intn % 10);
intn /= 10;
s = num + s;
}
@@ -1319,6 +1578,14 @@ String String::num_scientific(double p_num) {
if (Math::is_nan(p_num)) {
return "nan";
}
+
+ if (Math::is_inf(p_num)) {
+ if (signbit(p_num)) {
+ return "-inf";
+ } else {
+ return "inf";
+ }
+ }
#ifndef NO_USE_STDLIB
char buf[256];
@@ -1348,6 +1615,26 @@ String String::num_scientific(double p_num) {
#endif
}
+String String::md5(const uint8_t *p_md5) {
+ return String::hex_encode_buffer(p_md5, 16);
+}
+
+String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
+ static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
+
+ String ret;
+ char v[2] = { 0, 0 };
+
+ for (int i = 0; i < p_len; i++) {
+ v[0] = hex[p_buffer[i] >> 4];
+ ret += v;
+ v[0] = hex[p_buffer[i] & 0xF];
+ ret += v;
+ }
+
+ return ret;
+}
+
CharString String::ascii(bool p_allow_extended) const {
if (!length()) {
return CharString();
@@ -1357,7 +1644,13 @@ CharString String::ascii(bool p_allow_extended) const {
cs.resize(size());
for (int i = 0; i < size(); i++) {
- cs[i] = operator[](i);
+ char32_t c = operator[](i);
+ if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
+ cs[i] = c;
+ } else {
+ print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
+ cs[i] = 0x20;
+ }
}
return cs;
@@ -1371,7 +1664,7 @@ String String::utf8(const char *p_utf8, int p_len) {
}
bool String::parse_utf8(const char *p_utf8, int p_len) {
-#define _UNICERROR(m_err) print_line("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
+#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
if (!p_utf8) {
return true;
@@ -1384,9 +1677,9 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
/* HANDLE BOM (Byte Order Mark) */
if (p_len < 0 || p_len >= 3) {
- bool has_bom = uint8_t(p_utf8[0]) == 0xEF && uint8_t(p_utf8[1]) == 0xBB && uint8_t(p_utf8[2]) == 0xBF;
+ bool has_bom = uint8_t(p_utf8[0]) == 0xef && uint8_t(p_utf8[1]) == 0xbb && uint8_t(p_utf8[2]) == 0xbf;
if (has_bom) {
- //just skip it
+ //8-bit encoding, byte order has no meaning in UTF-8, just skip it
if (p_len >= 0) {
p_len -= 3;
}
@@ -1405,24 +1698,19 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
skip = 0;
- } else if ((c & 0xE0) == 0xC0) {
+ } else if ((c & 0xe0) == 0xc0) {
skip = 1;
- } else if ((c & 0xF0) == 0xE0) {
+ } else if ((c & 0xf0) == 0xe0) {
skip = 2;
- } else if ((c & 0xF8) == 0xF0) {
+ } else if ((c & 0xf8) == 0xf0) {
skip = 3;
- } else if ((c & 0xFC) == 0xF8) {
- skip = 4;
- } else if ((c & 0xFE) == 0xFC) {
- skip = 5;
} else {
- _UNICERROR("invalid skip");
+ _UNICERROR("invalid skip at " + num_int64(cstr_size));
return true; //invalid utf8
}
- if (skip == 1 && (c & 0x1E) == 0) {
- //printf("overlong rejected\n");
- _UNICERROR("overlong rejected");
+ if (skip == 1 && (c & 0x1e) == 0) {
+ _UNICERROR("overlong rejected at " + num_int64(cstr_size));
return true; //reject overlong
}
@@ -1448,7 +1736,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
resize(str_size + 1);
- CharType *dst = ptrw();
+ char32_t *dst = ptrw();
dst[str_size] = 0;
while (cstr_size) {
@@ -1457,19 +1745,14 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
/* Determine the number of characters in sequence */
if ((*p_utf8 & 0x80) == 0) {
len = 1;
- } else if ((*p_utf8 & 0xE0) == 0xC0) {
+ } else if ((*p_utf8 & 0xe0) == 0xc0) {
len = 2;
- } else if ((*p_utf8 & 0xF0) == 0xE0) {
+ } else if ((*p_utf8 & 0xf0) == 0xe0) {
len = 3;
- } else if ((*p_utf8 & 0xF8) == 0xF0) {
+ } else if ((*p_utf8 & 0xf8) == 0xf0) {
len = 4;
- } else if ((*p_utf8 & 0xFC) == 0xF8) {
- len = 5;
- } else if ((*p_utf8 & 0xFE) == 0xFC) {
- len = 6;
} else {
_UNICERROR("invalid len");
-
return true; //invalid UTF8
}
@@ -1479,7 +1762,6 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
if (len == 2 && (*p_utf8 & 0x1E) == 0) {
- //printf("overlong rejected\n");
_UNICERROR("no space left");
return true; //reject overlong
}
@@ -1491,24 +1773,23 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
if (len == 1) {
unichar = *p_utf8;
} else {
- unichar = (0xFF >> (len + 1)) & *p_utf8;
+ unichar = (0xff >> (len + 1)) & *p_utf8;
for (int i = 1; i < len; i++) {
- if ((p_utf8[i] & 0xC0) != 0x80) {
+ if ((p_utf8[i] & 0xc0) != 0x80) {
_UNICERROR("invalid utf8");
return true; //invalid utf8
}
- if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7F) >> (7 - len)) == 0) {
+ if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
_UNICERROR("invalid utf8 overlong");
return true; //no overlong
}
- unichar = (unichar << 6) | (p_utf8[i] & 0x3F);
+ unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
}
}
-
- //printf("char %i, len %i\n",unichar,len);
- if (sizeof(wchar_t) == 2 && unichar > 0xFFFF) {
- unichar = ' '; //too long for windows
+ if (unichar >= 0xd800 && unichar <= 0xdfff) {
+ _UNICERROR("invalid code point");
+ return CharString();
}
*(dst++) = unichar;
@@ -1517,6 +1798,7 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
}
return false;
+#undef _UNICERROR
}
CharString String::utf8() const {
@@ -1525,7 +1807,7 @@ CharString String::utf8() const {
return CharString();
}
- const CharType *d = &operator[](0);
+ const char32_t *d = &operator[](0);
int fl = 0;
for (int i = 0; i < l; i++) {
uint32_t c = d[i];
@@ -1535,13 +1817,15 @@ CharString String::utf8() const {
fl += 2;
} else if (c <= 0xffff) { // 16 bits
fl += 3;
- } else if (c <= 0x001fffff) { // 21 bits
+ } else if (c <= 0x0010ffff) { // 21 bits
fl += 4;
-
- } else if (c <= 0x03ffffff) { // 26 bits
- fl += 5;
- } else if (c <= 0x7fffffff) { // 31 bits
- fl += 6;
+ } else {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
+ return CharString();
+ }
+ if (c >= 0xd800 && c <= 0xdfff) {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
+ return CharString();
}
}
@@ -1561,35 +1845,17 @@ CharString String::utf8() const {
if (c <= 0x7f) { // 7 bits.
APPEND_CHAR(c);
} else if (c <= 0x7ff) { // 11 bits
-
APPEND_CHAR(uint32_t(0xc0 | ((c >> 6) & 0x1f))); // Top 5 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
} else if (c <= 0xffff) { // 16 bits
-
APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else if (c <= 0x001fffff) { // 21 bits
-
+ } else { // 21 bits
APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else if (c <= 0x03ffffff) { // 26 bits
-
- APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
- } else if (c <= 0x7fffffff) { // 31 bits
-
- APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
- APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
}
}
#undef APPEND_CHAR
@@ -1598,21 +1864,191 @@ CharString String::utf8() const {
return utf8s;
}
-/*
-String::String(CharType p_char) {
+String String::utf16(const char16_t *p_utf16, int p_len) {
+ String ret;
+ ret.parse_utf16(p_utf16, p_len);
+
+ return ret;
+}
+
+bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
+#define _UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
+
+ if (!p_utf16) {
+ return true;
+ }
+
+ String aux;
+
+ int cstr_size = 0;
+ int str_size = 0;
+
+ /* HANDLE BOM (Byte Order Mark) */
+ bool byteswap = false; // assume correct endianness if no BOM found
+ if (p_len < 0 || p_len >= 1) {
+ bool has_bom = false;
+ if (uint16_t(p_utf16[0]) == 0xfeff) { // correct BOM, read as is
+ has_bom = true;
+ byteswap = false;
+ } else if (uint16_t(p_utf16[0]) == 0xfffe) { // backwards BOM, swap bytes
+ has_bom = true;
+ byteswap = true;
+ }
+ if (has_bom) {
+ if (p_len >= 0) {
+ p_len -= 1;
+ }
+ p_utf16 += 1;
+ }
+ }
+
+ {
+ const char16_t *ptrtmp = p_utf16;
+ const char16_t *ptrtmp_limit = &p_utf16[p_len];
+ int skip = 0;
+ while (ptrtmp != ptrtmp_limit && *ptrtmp) {
+ uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
+ if (skip == 0) {
+ if ((c & 0xfffffc00) == 0xd800) {
+ skip = 1; // lead surrogate
+ } else if ((c & 0xfffffc00) == 0xdc00) {
+ _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
+ return true; // invalid UTF16
+ } else {
+ skip = 0;
+ }
+ str_size++;
+ } else {
+ if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
+ --skip;
+ } else {
+ _UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
+ return true; // invalid UTF16
+ }
+ }
+
+ cstr_size++;
+ ptrtmp++;
+ }
+
+ if (skip) {
+ _UNICERROR("no space left");
+ return true; // not enough space
+ }
+ }
+
+ if (str_size == 0) {
+ clear();
+ return false;
+ }
+
+ resize(str_size + 1);
+ char32_t *dst = ptrw();
+ dst[str_size] = 0;
+
+ while (cstr_size) {
+ int len = 0;
+ uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
+
+ if ((c & 0xfffffc00) == 0xd800) {
+ len = 2;
+ } else {
+ len = 1;
+ }
+
+ if (len > cstr_size) {
+ _UNICERROR("no space left");
+ return true; //not enough space
+ }
+
+ uint32_t unichar = 0;
+ if (len == 1) {
+ unichar = c;
+ } else {
+ uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
+ unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ }
+
+ *(dst++) = unichar;
+ cstr_size -= len;
+ p_utf16 += len;
+ }
- shared=nullptr;
- copy_from(p_char);
+ return false;
+#undef _UNICERROR
}
+Char16String String::utf16() const {
+ int l = length();
+ if (!l) {
+ return Char16String();
+ }
-*/
+ const char32_t *d = &operator[](0);
+ int fl = 0;
+ for (int i = 0; i < l; i++) {
+ uint32_t c = d[i];
+ if (c <= 0xffff) { // 16 bits.
+ fl += 1;
+ } else if (c <= 0x10ffff) { // 32 bits.
+ fl += 2;
+ } else {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
+ return Char16String();
+ }
+ if (c >= 0xd800 && c <= 0xdfff) {
+ print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
+ return Char16String();
+ }
+ }
+
+ Char16String utf16s;
+ if (fl == 0) {
+ return utf16s;
+ }
+
+ utf16s.resize(fl + 1);
+ uint16_t *cdst = (uint16_t *)utf16s.get_data();
+
+#define APPEND_CHAR(m_c) *(cdst++) = m_c
+
+ for (int i = 0; i < l; i++) {
+ uint32_t c = d[i];
+
+ if (c <= 0xffff) { // 16 bits.
+ APPEND_CHAR(c);
+ } else { // 32 bits.
+ APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
+ APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
+ }
+ }
+#undef APPEND_CHAR
+ *cdst = 0; //trailing zero
+
+ return utf16s;
+}
String::String(const char *p_str) {
copy_from(p_str);
}
-String::String(const CharType *p_str, int p_clip_to_len) {
+String::String(const wchar_t *p_str) {
+ copy_from(p_str);
+}
+
+String::String(const char32_t *p_str) {
+ copy_from(p_str);
+}
+
+String::String(const char *p_str, int p_clip_to_len) {
+ copy_from(p_str, p_clip_to_len);
+}
+
+String::String(const wchar_t *p_str, int p_clip_to_len) {
+ copy_from(p_str, p_clip_to_len);
+}
+
+String::String(const char32_t *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}
@@ -1620,7 +2056,6 @@ String::String(const StrRange &p_range) {
if (!p_range.c_str) {
return;
}
-
copy_from(p_range.c_str, p_range.len);
}
@@ -1629,7 +2064,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const {
return 0;
}
- const CharType *s = ptr();
+ const char32_t *s = ptr();
int64_t sign = s[0] == '-' ? -1 : 1;
@@ -1647,7 +2082,7 @@ int64_t String::hex_to_int(bool p_with_prefix) const {
int64_t hex = 0;
while (*s) {
- CharType c = LOWERCASE(*s);
+ char32_t c = LOWERCASE(*s);
int64_t n;
if (c >= '0' && c <= '9') {
n = c - '0';
@@ -1672,7 +2107,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const {
return 0;
}
- const CharType *s = ptr();
+ const char32_t *s = ptr();
int64_t sign = s[0] == '-' ? -1 : 1;
@@ -1690,7 +2125,7 @@ int64_t String::bin_to_int(bool p_with_prefix) const {
int64_t binary = 0;
while (*s) {
- CharType c = LOWERCASE(*s);
+ char32_t c = LOWERCASE(*s);
int64_t n;
if (c == '0' || c == '1') {
n = c - '0';
@@ -1719,7 +2154,7 @@ int64_t String::to_int() const {
int64_t sign = 1;
for (int i = 0; i < to; i++) {
- CharType c = operator[](i);
+ char32_t c = operator[](i);
if (c >= '0' && c <= '9') {
bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as 64-bit integer, provided value is " + (sign == 1 ? "too big." : "too small."));
@@ -1765,6 +2200,37 @@ int64_t String::to_int(const char *p_str, int p_len) {
return integer * sign;
}
+int64_t String::to_int(const wchar_t *p_str, int p_len) {
+ int to = 0;
+ if (p_len >= 0) {
+ to = p_len;
+ } else {
+ while (p_str[to] != 0 && p_str[to] != '.') {
+ to++;
+ }
+ }
+
+ int64_t integer = 0;
+ int64_t sign = 1;
+
+ for (int i = 0; i < to; i++) {
+ wchar_t c = p_str[i];
+ if (c >= '0' && c <= '9') {
+ bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
+ ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as integer, provided value is " + (sign == 1 ? "too big." : "too small."));
+ integer *= 10;
+ integer += c - '0';
+
+ } else if (c == '-' && integer == 0) {
+ sign = -sign;
+ } else if (c != ' ') {
+ break;
+ }
+ }
+
+ return integer * sign;
+}
+
bool String::is_numeric() const {
if (length() == 0) {
return false;
@@ -1776,14 +2242,13 @@ bool String::is_numeric() const {
}
bool dot = false;
for (int i = s; i < length(); i++) {
- CharType c = operator[](i);
+ char32_t c = operator[](i);
if (c == '.') {
if (dot) {
return false;
}
dot = true;
- }
- if (c < '0' || c > '9') {
+ } else if (c < '0' || c > '9') {
return false;
}
}
@@ -1945,11 +2410,11 @@ static double built_in_strtod(const C *string, /* A decimal ASCII floating-point
}
expSign = false;
}
- if (!IS_DIGIT(CharType(*p))) {
+ if (!IS_DIGIT(char32_t(*p))) {
p = pExp;
goto done;
}
- while (IS_DIGIT(CharType(*p))) {
+ while (IS_DIGIT(char32_t(*p))) {
exp = exp * 10 + (*p - '0');
p += 1;
}
@@ -2007,19 +2472,18 @@ done:
#define READING_DONE 4
double String::to_float(const char *p_str) {
-#ifndef NO_USE_STDLIB
- return built_in_strtod<char>(p_str);
-//return atof(p_str); DOES NOT WORK ON ANDROID(??)
-#else
return built_in_strtod<char>(p_str);
-#endif
}
-double String::to_float(const CharType *p_str, const CharType **r_end) {
- return built_in_strtod<CharType>(p_str, (CharType **)r_end);
+double String::to_float(const char32_t *p_str, const char32_t **r_end) {
+ return built_in_strtod<char32_t>(p_str, (char32_t **)r_end);
}
-int64_t String::to_int(const CharType *p_str, int p_len, bool p_clamp) {
+double String::to_float(const wchar_t *p_str, const wchar_t **r_end) {
+ return built_in_strtod<wchar_t>(p_str, (wchar_t **)r_end);
+}
+
+int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
if (p_len == 0 || !p_str[0]) {
return 0;
}
@@ -2029,11 +2493,11 @@ int64_t String::to_int(const CharType *p_str, int p_len, bool p_clamp) {
int64_t sign = 1;
int reading = READING_SIGN;
- const CharType *str = p_str;
- const CharType *limit = &p_str[p_len];
+ const char32_t *str = p_str;
+ const char32_t *limit = &p_str[p_len];
while (*str && reading != READING_DONE && str != limit) {
- CharType c = *(str++);
+ char32_t c = *(str++);
switch (reading) {
case READING_SIGN: {
if (c >= '0' && c <= '9') {
@@ -2087,26 +2551,7 @@ double String::to_float() const {
if (empty()) {
return 0;
}
-#ifndef NO_USE_STDLIB
- return built_in_strtod<CharType>(c_str());
-//return wcstod(c_str(),nullptr ); DOES NOT WORK ON ANDROID :(
-#else
- return built_in_strtod<CharType>(c_str());
-#endif
-}
-
-bool operator==(const char *p_chr, const String &p_str) {
- return p_str == p_chr;
-}
-
-String operator+(const char *p_chr, const String &p_str) {
- String tmp = p_chr;
- tmp += p_str;
- return tmp;
-}
-
-String operator+(CharType p_chr, const String &p_str) {
- return (String::chr(p_chr) + p_str);
+ return built_in_strtod<char32_t>(get_data());
}
uint32_t String::hash(const char *p_cstr) {
@@ -2129,7 +2574,27 @@ uint32_t String::hash(const char *p_cstr, int p_len) {
return hashv;
}
-uint32_t String::hash(const CharType *p_cstr, int p_len) {
+uint32_t String::hash(const wchar_t *p_cstr, int p_len) {
+ uint32_t hashv = 5381;
+ for (int i = 0; i < p_len; i++) {
+ hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
+ }
+
+ return hashv;
+}
+
+uint32_t String::hash(const wchar_t *p_cstr) {
+ uint32_t hashv = 5381;
+ uint32_t c;
+
+ while ((c = *p_cstr++)) {
+ hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
+ }
+
+ return hashv;
+}
+
+uint32_t String::hash(const char32_t *p_cstr, int p_len) {
uint32_t hashv = 5381;
for (int i = 0; i < p_len; i++) {
hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
@@ -2138,7 +2603,7 @@ uint32_t String::hash(const CharType *p_cstr, int p_len) {
return hashv;
}
-uint32_t String::hash(const CharType *p_cstr) {
+uint32_t String::hash(const char32_t *p_cstr) {
uint32_t hashv = 5381;
uint32_t c;
@@ -2152,7 +2617,7 @@ uint32_t String::hash(const CharType *p_cstr) {
uint32_t String::hash() const {
/* simple djb2 hashing */
- const CharType *chr = c_str();
+ const char32_t *chr = get_data();
uint32_t hashv = 5381;
uint32_t c;
@@ -2166,7 +2631,7 @@ uint32_t String::hash() const {
uint64_t String::hash64() const {
/* simple djb2 hashing */
- const CharType *chr = c_str();
+ const char32_t *chr = get_data();
uint64_t hashv = 5381;
uint64_t c;
@@ -2278,7 +2743,7 @@ String String::substr(int p_from, int p_chars) const {
}
String s = String();
- s.copy_from_unchecked(&c_str()[p_from], p_chars);
+ s.copy_from_unchecked(&get_data()[p_from], p_chars);
return s;
}
@@ -2295,8 +2760,8 @@ int String::find(const String &p_str, int p_from) const {
return -1; // won't find anything!
}
- const CharType *src = c_str();
- const CharType *str = p_str.c_str();
+ const char32_t *src = get_data();
+ const char32_t *str = p_str.get_data();
for (int i = p_from; i <= (len - src_len); i++) {
bool found = true;
@@ -2333,7 +2798,7 @@ int String::find(const char *p_str, int p_from) const {
return -1; // won't find anything!
}
- const CharType *src = c_str();
+ const char32_t *src = get_data();
int src_len = 0;
while (p_str[src_len] != '\0') {
@@ -2341,7 +2806,7 @@ int String::find(const char *p_str, int p_from) const {
}
if (src_len == 1) {
- const char needle = p_str[0];
+ const char32_t needle = p_str[0];
for (int i = p_from; i < len; i++) {
if (src[i] == needle) {
@@ -2360,7 +2825,7 @@ int String::find(const char *p_str, int p_from) const {
return -1;
}
- if (src[read_pos] != p_str[j]) {
+ if (src[read_pos] != (char32_t)p_str[j]) {
found = false;
break;
}
@@ -2375,7 +2840,7 @@ int String::find(const char *p_str, int p_from) const {
return -1;
}
-int String::find_char(const CharType &p_char, int p_from) const {
+int String::find_char(const char32_t &p_char, int p_from) const {
return _cowdata.find(p_char, p_from);
}
@@ -2396,7 +2861,7 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
return -1; // won't find anything!
}
- const CharType *src = c_str();
+ const char32_t *src = get_data();
for (int i = p_from; i < len; i++) {
bool found = true;
@@ -2405,7 +2870,7 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
if (r_key) {
*r_key = k;
}
- const CharType *cmp = keys[k].c_str();
+ const char32_t *cmp = keys[k].get_data();
int l = keys[k].length();
for (int j = 0; j < l; j++) {
@@ -2445,7 +2910,7 @@ int String::findn(const String &p_str, int p_from) const {
return -1; // won't find anything!
}
- const CharType *srcd = c_str();
+ const char32_t *srcd = get_data();
for (int i = p_from; i <= (length() - src_len); i++) {
bool found = true;
@@ -2457,8 +2922,8 @@ int String::findn(const String &p_str, int p_from) const {
return -1;
}
- CharType src = _find_lower(srcd[read_pos]);
- CharType dst = _find_lower(p_str[j]);
+ char32_t src = _find_lower(srcd[read_pos]);
+ char32_t dst = _find_lower(p_str[j]);
if (src != dst) {
found = false;
@@ -2495,7 +2960,7 @@ int String::rfind(const String &p_str, int p_from) const {
return -1; // won't find anything!
}
- const CharType *src = c_str();
+ const char32_t *src = get_data();
for (int i = p_from; i >= 0; i--) {
bool found = true;
@@ -2542,7 +3007,7 @@ int String::rfindn(const String &p_str, int p_from) const {
return -1; // won't find anything!
}
- const CharType *src = c_str();
+ const char32_t *src = get_data();
for (int i = p_from; i >= 0; i--) {
bool found = true;
@@ -2554,8 +3019,8 @@ int String::rfindn(const String &p_str, int p_from) const {
return -1;
}
- CharType srcc = _find_lower(src[read_pos]);
- CharType dstc = _find_lower(p_str[j]);
+ char32_t srcc = _find_lower(src[read_pos]);
+ char32_t dstc = _find_lower(p_str[j]);
if (srcc != dstc) {
found = false;
@@ -2589,8 +3054,8 @@ bool String::begins_with(const String &p_string) const {
return true;
}
- const CharType *src = &p_string[0];
- const CharType *str = &operator[](0);
+ const char32_t *src = &p_string[0];
+ const char32_t *str = &operator[](0);
int i = 0;
for (; i < l; i++) {
@@ -2609,11 +3074,11 @@ bool String::begins_with(const char *p_string) const {
return false;
}
- const CharType *str = &operator[](0);
+ const char32_t *str = &operator[](0);
int i = 0;
while (*p_string && i < l) {
- if (*p_string != str[i]) {
+ if ((char32_t)*p_string != str[i]) {
return false;
}
i++;
@@ -2657,7 +3122,7 @@ int String::_count(const String &p_string, int p_from, int p_to, bool p_case_ins
}
if (p_from == 0 && p_to == len) {
str = String();
- str.copy_from_unchecked(&c_str()[0], len);
+ str.copy_from_unchecked(&get_data()[0], len);
} else {
str = substr(p_from, p_to - p_from);
}
@@ -2695,14 +3160,14 @@ bool String::_base_is_subsequence_of(const String &p_string, bool case_insensiti
return false;
}
- const CharType *src = &operator[](0);
- const CharType *tgt = &p_string[0];
+ const char32_t *src = &operator[](0);
+ const char32_t *tgt = &p_string[0];
for (; *src && *tgt; tgt++) {
bool match = false;
if (case_insensitive) {
- CharType srcc = _find_lower(*src);
- CharType tgtc = _find_lower(*tgt);
+ char32_t srcc = _find_lower(*src);
+ char32_t tgtc = _find_lower(*tgt);
match = srcc == tgtc;
} else {
match = *src == *tgt;
@@ -2748,8 +3213,8 @@ float String::similarity(const String &p_string) const {
int src_size = src_bigrams.size();
int tgt_size = tgt_bigrams.size();
- float sum = src_size + tgt_size;
- float inter = 0;
+ double sum = src_size + tgt_size;
+ double inter = 0;
for (int i = 0; i < src_size; i++) {
for (int j = 0; j < tgt_size; j++) {
if (src_bigrams[i] == tgt_bigrams[j]) {
@@ -2762,7 +3227,7 @@ float String::similarity(const String &p_string) const {
return (2.0f * inter) / sum;
}
-static bool _wildcard_match(const CharType *p_pattern, const CharType *p_string, bool p_case_sensitive) {
+static bool _wildcard_match(const char32_t *p_pattern, const char32_t *p_string, bool p_case_sensitive) {
switch (*p_pattern) {
case '\0':
return !*p_string;
@@ -2781,14 +3246,14 @@ bool String::match(const String &p_wildcard) const {
return false;
}
- return _wildcard_match(p_wildcard.c_str(), c_str(), true);
+ return _wildcard_match(p_wildcard.get_data(), get_data(), true);
}
bool String::matchn(const String &p_wildcard) const {
if (!p_wildcard.length() || !length()) {
return false;
}
- return _wildcard_match(p_wildcard.c_str(), c_str(), false);
+ return _wildcard_match(p_wildcard.get_data(), get_data(), false);
}
String String::format(const Variant &values, String placeholder) const {
@@ -2938,9 +3403,10 @@ String String::repeat(int p_count) const {
ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
String new_string;
- const CharType *src = this->c_str();
+ const char32_t *src = this->get_data();
new_string.resize(length() * p_count + 1);
+ new_string[length() * p_count] = 0;
for (int i = 0; i < p_count; i++) {
for (int j = 0; j < length(); j++) {
@@ -2975,7 +3441,7 @@ String String::right(int p_pos) const {
return substr(p_pos, (length() - p_pos));
}
-CharType String::ord_at(int p_idx) const {
+char32_t String::ord_at(int p_idx) const {
ERR_FAIL_INDEX_V(p_idx, length(), 0);
return operator[](p_idx);
}
@@ -2989,7 +3455,7 @@ String String::dedent() const {
int indent_stop = -1;
for (int i = 0; i < length(); i++) {
- CharType c = operator[](i);
+ char32_t c = operator[](i);
if (c == '\n') {
if (has_text) {
new_string += substr(indent_stop, i - indent_stop);
@@ -3218,7 +3684,7 @@ bool String::is_valid_identifier() const {
return false;
}
- const wchar_t *str = &operator[](0);
+ const char32_t *str = &operator[](0);
for (int i = 0; i < len; i++) {
if (i == 0) {
@@ -3237,36 +3703,14 @@ bool String::is_valid_identifier() const {
return true;
}
-//kind of poor should be rewritten properly
-
-String String::word_wrap(int p_chars_per_line) const {
- int from = 0;
- int last_space = 0;
- String ret;
- for (int i = 0; i < length(); i++) {
- if (i - from >= p_chars_per_line) {
- if (last_space == -1) {
- ret += substr(from, i - from + 1) + "\n";
- } else {
- ret += substr(from, last_space - from) + "\n";
- i = last_space; //rewind
- }
- from = i + 1;
- last_space = -1;
- } else if (operator[](i) == ' ' || operator[](i) == '\t') {
- last_space = i;
- } else if (operator[](i) == '\n') {
- ret += substr(from, i - from) + "\n";
- from = i + 1;
- last_space = -1;
- }
- }
-
- if (from < length()) {
- ret += substr(from, length());
+bool String::is_valid_string() const {
+ int l = length();
+ const char32_t *src = get_data();
+ bool valid = true;
+ for (int i = 0; i < l; i++) {
+ valid = valid && (src[i] < 0xd800 || (src[i] > 0xdfff && src[i] <= 0x10ffff));
}
-
- return ret;
+ return valid;
}
String String::http_escape() const {
@@ -3297,9 +3741,9 @@ String String::http_unescape() const {
String res;
for (int i = 0; i < length(); ++i) {
if (ord_at(i) == '%' && i + 2 < length()) {
- CharType ord1 = ord_at(i + 1);
+ char32_t ord1 = ord_at(i + 1);
if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) {
- CharType ord2 = ord_at(i + 2);
+ char32_t ord2 = ord_at(i + 2);
if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) {
char bytes[3] = { (char)ord1, (char)ord2, 0 };
res += (char)strtol(bytes, nullptr, 16);
@@ -3389,18 +3833,18 @@ for (int i=1;i<32;i++) {
return str;
}
-static _FORCE_INLINE_ int _xml_unescape(const CharType *p_src, int p_src_len, CharType *p_dst) {
+static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, char32_t *p_dst) {
int len = 0;
while (p_src_len) {
if (*p_src == '&') {
int eat = 0;
if (p_src_len >= 4 && p_src[1] == '#') {
- CharType c = 0;
+ char32_t c = 0;
for (int i = 2; i < p_src_len; i++) {
eat = i + 1;
- CharType ct = p_src[i];
+ char32_t ct = p_src[i];
if (ct == ';') {
break;
} else if (ct >= '0' && ct <= '9') {
@@ -3476,12 +3920,12 @@ static _FORCE_INLINE_ int _xml_unescape(const CharType *p_src, int p_src_len, Ch
String String::xml_unescape() const {
String str;
int l = length();
- int len = _xml_unescape(c_str(), l, nullptr);
+ int len = _xml_unescape(get_data(), l, nullptr);
if (len == 0) {
return String();
}
str.resize(len + 1);
- _xml_unescape(c_str(), l, str.ptrw());
+ _xml_unescape(get_data(), l, str.ptrw());
str[len] = 0;
return str;
}
@@ -3602,7 +4046,7 @@ bool String::is_valid_hex_number(bool p_with_prefix) const {
}
for (int i = from; i < len; i++) {
- CharType c = operator[](i);
+ char32_t c = operator[](i);
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
continue;
}
@@ -3917,7 +4361,7 @@ String String::percent_decode() const {
String String::property_name_encode() const {
// Escape and quote strings with extended ASCII or further Unicode characters
// as well as '"', '=' or ' ' (32)
- const CharType *cstr = c_str();
+ const char32_t *cstr = get_data();
for (int i = 0; cstr[i]; i++) {
if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] < 33 || cstr[i] > 126) {
return "\"" + c_escape_multiline() + "\"";
@@ -3984,7 +4428,7 @@ String String::lpad(int min_length, const String &character) const {
// In case of an error, the string returned is the error description and "error" is true.
String String::sprintf(const Array &values, bool *error) const {
String formatted;
- CharType *self = (CharType *)c_str();
+ char32_t *self = (char32_t *)get_data();
bool in_format = false;
int value_index = 0;
int min_chars = 0;
@@ -3997,7 +4441,7 @@ String String::sprintf(const Array &values, bool *error) const {
*error = true;
for (; *self; self++) {
- const CharType c = *self;
+ const char32_t c = *self;
if (in_format) { // We have % - lets see what else we get.
switch (c) {
@@ -4134,9 +4578,11 @@ String String::sprintf(const Array &values, bool *error) const {
if (values[value_index].is_num()) {
int value = values[value_index];
if (value < 0) {
- return "unsigned byte integer is lower than maximum";
- } else if (value > 255) {
- return "unsigned byte integer is greater than maximum";
+ return "unsigned integer is lower than minimum";
+ } else if (value >= 0xd800 && value <= 0xdfff) {
+ return "unsigned integer is invalid Unicode character";
+ } else if (value > 0x10ffff) {
+ return "unsigned integer is greater than maximum";
}
str = chr(values[value_index]);
} else if (values[value_index].get_type() == Variant::STRING) {
diff --git a/core/ustring.h b/core/ustring.h
index 7a1c1a5232..65eeae6643 100644
--- a/core/ustring.h
+++ b/core/ustring.h
@@ -36,8 +36,13 @@
#include "core/typedefs.h"
#include "core/vector.h"
+/*************************************************************************/
+/* CharProxy */
+/*************************************************************************/
+
template <class T>
class CharProxy {
+ friend class Char16String;
friend class CharString;
friend class String;
@@ -71,6 +76,54 @@ public:
}
};
+/*************************************************************************/
+/* Char16String */
+/*************************************************************************/
+
+class Char16String {
+ CowData<char16_t> _cowdata;
+ static const char16_t _null;
+
+public:
+ _FORCE_INLINE_ char16_t *ptrw() { return _cowdata.ptrw(); }
+ _FORCE_INLINE_ const char16_t *ptr() const { return _cowdata.ptr(); }
+ _FORCE_INLINE_ int size() const { return _cowdata.size(); }
+ Error resize(int p_size) { return _cowdata.resize(p_size); }
+
+ _FORCE_INLINE_ char16_t get(int p_index) const { return _cowdata.get(p_index); }
+ _FORCE_INLINE_ void set(int p_index, const char16_t &p_elem) { _cowdata.set(p_index, p_elem); }
+ _FORCE_INLINE_ const char16_t &operator[](int p_index) const {
+ if (unlikely(p_index == _cowdata.size())) {
+ return _null;
+ }
+
+ return _cowdata.get(p_index);
+ }
+ _FORCE_INLINE_ CharProxy<char16_t> operator[](int p_index) { return CharProxy<char16_t>(p_index, _cowdata); }
+
+ _FORCE_INLINE_ Char16String() {}
+ _FORCE_INLINE_ Char16String(const Char16String &p_str) { _cowdata._ref(p_str._cowdata); }
+ _FORCE_INLINE_ Char16String operator=(const Char16String &p_str) {
+ _cowdata._ref(p_str._cowdata);
+ return *this;
+ }
+ _FORCE_INLINE_ Char16String(const char16_t *p_cstr) { copy_from(p_cstr); }
+
+ Char16String &operator=(const char16_t *p_cstr);
+ bool operator<(const Char16String &p_right) const;
+ Char16String &operator+=(char16_t p_char);
+ int length() const { return size() ? size() - 1 : 0; }
+ const char16_t *get_data() const;
+ operator const char16_t *() const { return get_data(); };
+
+protected:
+ void copy_from(const char16_t *p_cstr);
+};
+
+/*************************************************************************/
+/* CharString */
+/*************************************************************************/
+
class CharString {
CowData<char> _cowdata;
static const char _null;
@@ -111,26 +164,35 @@ protected:
void copy_from(const char *p_cstr);
};
-typedef wchar_t CharType;
+/*************************************************************************/
+/* String */
+/*************************************************************************/
struct StrRange {
- const CharType *c_str;
+ const char32_t *c_str;
int len;
- StrRange(const CharType *p_c_str = nullptr, int p_len = 0) {
+ StrRange(const char32_t *p_c_str = nullptr, int p_len = 0) {
c_str = p_c_str;
len = p_len;
}
};
class String {
- CowData<CharType> _cowdata;
- static const CharType _null;
+ CowData<char32_t> _cowdata;
+ static const char32_t _null;
void copy_from(const char *p_cstr);
- void copy_from(const CharType *p_cstr, const int p_clip_to = -1);
- void copy_from(const CharType &p_char);
- void copy_from_unchecked(const CharType *p_char, const int p_length);
+ void copy_from(const char *p_cstr, const int p_clip_to);
+ void copy_from(const wchar_t *p_cstr);
+ void copy_from(const wchar_t *p_cstr, const int p_clip_to);
+ void copy_from(const char32_t *p_cstr);
+ void copy_from(const char32_t *p_cstr, const int p_clip_to);
+
+ void copy_from(const char32_t &p_char);
+
+ void copy_from_unchecked(const char32_t *p_char, const int p_length);
+
bool _base_is_subsequence_of(const String &p_string, bool case_insensitive) const;
int _count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const;
@@ -140,48 +202,56 @@ public:
npos = -1 ///<for "some" compatibility with std::string (npos is a huge value in std::string)
};
- _FORCE_INLINE_ CharType *ptrw() { return _cowdata.ptrw(); }
- _FORCE_INLINE_ const CharType *ptr() const { return _cowdata.ptr(); }
+ _FORCE_INLINE_ char32_t *ptrw() { return _cowdata.ptrw(); }
+ _FORCE_INLINE_ const char32_t *ptr() const { return _cowdata.ptr(); }
void remove(int p_index) { _cowdata.remove(p_index); }
_FORCE_INLINE_ void clear() { resize(0); }
- _FORCE_INLINE_ CharType get(int p_index) const { return _cowdata.get(p_index); }
- _FORCE_INLINE_ void set(int p_index, const CharType &p_elem) { _cowdata.set(p_index, p_elem); }
+ _FORCE_INLINE_ char32_t get(int p_index) const { return _cowdata.get(p_index); }
+ _FORCE_INLINE_ void set(int p_index, const char32_t &p_elem) { _cowdata.set(p_index, p_elem); }
_FORCE_INLINE_ int size() const { return _cowdata.size(); }
Error resize(int p_size) { return _cowdata.resize(p_size); }
- _FORCE_INLINE_ const CharType &operator[](int p_index) const {
+ _FORCE_INLINE_ const char32_t &operator[](int p_index) const {
if (unlikely(p_index == _cowdata.size())) {
return _null;
}
return _cowdata.get(p_index);
}
- _FORCE_INLINE_ CharProxy<CharType> operator[](int p_index) { return CharProxy<CharType>(p_index, _cowdata); }
+ _FORCE_INLINE_ CharProxy<char32_t> operator[](int p_index) { return CharProxy<char32_t>(p_index, _cowdata); }
bool operator==(const String &p_str) const;
bool operator!=(const String &p_str) const;
String operator+(const String &p_str) const;
- //String operator+(CharType p_char) const;
String &operator+=(const String &);
- String &operator+=(CharType p_char);
+ String &operator+=(char32_t p_char);
String &operator+=(const char *p_str);
- String &operator+=(const CharType *p_str);
+ String &operator+=(const wchar_t *p_str);
+ String &operator+=(const char32_t *p_str);
/* Compatibility Operators */
void operator=(const char *p_str);
- void operator=(const CharType *p_str);
+ void operator=(const wchar_t *p_str);
+ void operator=(const char32_t *p_str);
+
bool operator==(const char *p_str) const;
- bool operator==(const CharType *p_str) const;
+ bool operator==(const wchar_t *p_str) const;
+ bool operator==(const char32_t *p_str) const;
bool operator==(const StrRange &p_str_range) const;
+
bool operator!=(const char *p_str) const;
- bool operator!=(const CharType *p_str) const;
- bool operator<(const CharType *p_str) const;
+ bool operator!=(const wchar_t *p_str) const;
+ bool operator!=(const char32_t *p_str) const;
+
+ bool operator<(const char32_t *p_str) const;
bool operator<(const char *p_str) const;
+ bool operator<(const wchar_t *p_str) const;
+
bool operator<(const String &p_str) const;
bool operator<=(const String &p_str) const;
@@ -189,7 +259,7 @@ public:
signed char nocasecmp_to(const String &p_str) const;
signed char naturalnocasecmp_to(const String &p_str) const;
- const CharType *c_str() const;
+ const char32_t *get_data() const;
/* standard size stuff */
_FORCE_INLINE_ int length() const {
@@ -197,11 +267,13 @@ public:
return s ? (s - 1) : 0; // length does not include zero
}
+ bool is_valid_string() const;
+
/* complex helpers */
String substr(int p_from, int p_chars = -1) const;
int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed
int find(const char *p_str, int p_from = 0) const; ///< return <0 if failed
- int find_char(const CharType &p_char, int p_from = 0) const; ///< return <0 if failed
+ int find_char(const char32_t &p_char, int p_from = 0) const; ///< return <0 if failed
int findn(const String &p_str, int p_from = 0) const; ///< return <0 if failed, case insensitive
int rfind(const String &p_str, int p_from = -1) const; ///< return <0 if failed
int rfindn(const String &p_str, int p_from = -1) const; ///< return <0 if failed, case insensitive
@@ -238,26 +310,31 @@ public:
static String num_real(double p_num);
static String num_int64(int64_t p_num, int base = 10, bool capitalize_hex = false);
static String num_uint64(uint64_t p_num, int base = 10, bool capitalize_hex = false);
- static String chr(CharType p_char);
+ static String chr(char32_t p_char);
static String md5(const uint8_t *p_md5);
static String hex_encode_buffer(const uint8_t *p_buffer, int p_len);
bool is_numeric() const;
- double to_float() const;
+ double to_float() const;
int64_t hex_to_int(bool p_with_prefix = true) const;
int64_t bin_to_int(bool p_with_prefix = true) const;
int64_t to_int() const;
+
static int64_t to_int(const char *p_str, int p_len = -1);
+ static int64_t to_int(const wchar_t *p_str, int p_len = -1);
+ static int64_t to_int(const char32_t *p_str, int p_len = -1, bool p_clamp = false);
+
static double to_float(const char *p_str);
- static double to_float(const CharType *p_str, const CharType **r_end = nullptr);
- static int64_t to_int(const CharType *p_str, int p_len = -1, bool p_clamp = false);
+ static double to_float(const wchar_t *p_str, const wchar_t **r_end = nullptr);
+ static double to_float(const char32_t *p_str, const char32_t **r_end = nullptr);
+
String capitalize() const;
String camelcase_to_underscore(bool lowercase = true) const;
String get_with_code_lines() const;
int get_slice_count(String p_splitter) const;
String get_slice(String p_splitter, int p_slice) const;
- String get_slicec(CharType p_splitter, int p_slice) const;
+ String get_slicec(char32_t p_splitter, int p_slice) const;
Vector<String> split(const String &p_splitter, bool p_allow_empty = true, int p_maxsplit = 0) const;
Vector<String> rsplit(const String &p_splitter, bool p_allow_empty = true, int p_maxsplit = 0) const;
@@ -267,10 +344,10 @@ public:
Vector<int> split_ints(const String &p_splitter, bool p_allow_empty = true) const;
Vector<int> split_ints_mk(const Vector<String> &p_splitters, bool p_allow_empty = true) const;
- String join(Vector<String> parts);
+ String join(Vector<String> parts) const;
- static CharType char_uppercase(CharType p_char);
- static CharType char_lowercase(CharType p_char);
+ static char32_t char_uppercase(char32_t p_char);
+ static char32_t char_lowercase(char32_t p_char);
String to_upper() const;
String to_lower() const;
@@ -287,7 +364,7 @@ public:
String get_extension() const;
String get_basename() const;
String plus_file(const String &p_file) const;
- CharType ord_at(int p_idx) const;
+ char32_t ord_at(int p_idx) const;
void erase(int p_pos, int p_chars);
@@ -296,8 +373,14 @@ public:
bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error
static String utf8(const char *p_utf8, int p_len = -1);
- static uint32_t hash(const CharType *p_cstr, int p_len); /* hash the string */
- static uint32_t hash(const CharType *p_cstr); /* hash the string */
+ Char16String utf16() const;
+ bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error
+ static String utf16(const char16_t *p_utf16, int p_len = -1);
+
+ static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
+ static uint32_t hash(const char32_t *p_cstr); /* hash the string */
+ static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */
+ static uint32_t hash(const wchar_t *p_cstr); /* hash the string */
static uint32_t hash(const char *p_cstr, int p_len); /* hash the string */
static uint32_t hash(const char *p_cstr); /* hash the string */
uint32_t hash() const; /* hash the string */
@@ -348,7 +431,7 @@ public:
/**
* The constructors must not depend on other overloads
*/
- /* String(CharType p_char);*/
+ /* String(char32_t p_char);*/
_FORCE_INLINE_ String() {}
_FORCE_INLINE_ String(const String &p_str) { _cowdata._ref(p_str._cowdata); }
@@ -358,14 +441,20 @@ public:
}
String(const char *p_str);
- String(const CharType *p_str, int p_clip_to_len = -1);
+ String(const wchar_t *p_str);
+ String(const char32_t *p_str);
+ String(const char *p_str, int p_clip_to_len);
+ String(const wchar_t *p_str, int p_clip_to_len);
+ String(const char32_t *p_str, int p_clip_to_len);
String(const StrRange &p_range);
};
bool operator==(const char *p_chr, const String &p_str);
+bool operator==(const wchar_t *p_chr, const String &p_str);
String operator+(const char *p_chr, const String &p_str);
-String operator+(CharType p_chr, const String &p_str);
+String operator+(const wchar_t *p_chr, const String &p_str);
+String operator+(char32_t p_chr, const String &p_str);
String itos(int64_t p_val);
String uitos(uint64_t p_val);
@@ -387,15 +476,18 @@ struct NaturalNoCaseComparator {
template <typename L, typename R>
_FORCE_INLINE_ bool is_str_less(const L *l_ptr, const R *r_ptr) {
while (true) {
- if (*l_ptr == 0 && *r_ptr == 0) {
+ const char32_t l = *l_ptr;
+ const char32_t r = *r_ptr;
+
+ if (l == 0 && r == 0) {
return false;
- } else if (*l_ptr == 0) {
+ } else if (l == 0) {
return true;
- } else if (*r_ptr == 0) {
+ } else if (r == 0) {
return false;
- } else if (*l_ptr < *r_ptr) {
+ } else if (l < r) {
return true;
- } else if (*l_ptr > *r_ptr) {
+ } else if (l > r) {
return false;
}
@@ -432,7 +524,7 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St
String RTR(const String &p_text, const String &p_context = "");
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = "");
-bool is_symbol(CharType c);
+bool is_symbol(char32_t c);
bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end);
#endif // USTRING_H
diff --git a/core/variant.cpp b/core/variant.cpp
index c19ce79e64..181ced0f32 100644
--- a/core/variant.cpp
+++ b/core/variant.cpp
@@ -1558,7 +1558,7 @@ Variant::operator unsigned char() const {
}
}
-Variant::operator CharType() const {
+Variant::operator char32_t() const {
return operator unsigned int();
}
@@ -2445,7 +2445,7 @@ Variant::Variant(const char *const p_cstring) {
memnew_placement(_data._mem, String((const char *)p_cstring));
}
-Variant::Variant(const CharType *p_wstring) {
+Variant::Variant(const char32_t *p_wstring) {
type = STRING;
memnew_placement(_data._mem, String(p_wstring));
}
diff --git a/core/variant.h b/core/variant.h
index 27a709b473..112003a7ae 100644
--- a/core/variant.h
+++ b/core/variant.h
@@ -246,7 +246,7 @@ public:
operator ObjectID() const;
- operator CharType() const;
+ operator char32_t() const;
operator float() const;
operator double() const;
operator String() const;
@@ -323,7 +323,7 @@ public:
Variant(const String &p_string);
Variant(const StringName &p_string);
Variant(const char *const p_cstring);
- Variant(const CharType *p_wstring);
+ Variant(const char32_t *p_wstring);
Variant(const Vector2 &p_vector2);
Variant(const Vector2i &p_vector2i);
Variant(const Rect2 &p_rect2);
diff --git a/core/variant_call.cpp b/core/variant_call.cpp
index 91af127d32..7a1fdbaafe 100644
--- a/core/variant_call.cpp
+++ b/core/variant_call.cpp
@@ -239,6 +239,7 @@ struct _VariantCall {
VCALL_LOCALMEM1R(String, casecmp_to);
VCALL_LOCALMEM1R(String, nocasecmp_to);
+ VCALL_LOCALMEM1R(String, naturalnocasecmp_to);
VCALL_LOCALMEM0R(String, length);
VCALL_LOCALMEM3R(String, count);
VCALL_LOCALMEM3R(String, countn);
@@ -311,6 +312,8 @@ struct _VariantCall {
VCALL_LOCALMEM0R(String, to_int);
VCALL_LOCALMEM0R(String, to_float);
VCALL_LOCALMEM0R(String, hex_to_int);
+ VCALL_LOCALMEM2R(String, lpad);
+ VCALL_LOCALMEM2R(String, rpad);
VCALL_LOCALMEM1R(String, pad_decimals);
VCALL_LOCALMEM1R(String, pad_zeros);
VCALL_LOCALMEM1R(String, trim_prefix);
@@ -350,6 +353,39 @@ struct _VariantCall {
r_ret = retval;
}
+ static void _call_String_to_utf16(Variant &r_ret, Variant &p_self, const Variant **p_args) {
+ String *s = reinterpret_cast<String *>(p_self._data._mem);
+ if (s->empty()) {
+ r_ret = PackedByteArray();
+ return;
+ }
+ Char16String charstr = s->utf16();
+
+ PackedByteArray retval;
+ size_t len = charstr.length() * 2;
+ retval.resize(len);
+ uint8_t *w = retval.ptrw();
+ copymem(w, (const void *)charstr.ptr(), len);
+
+ r_ret = retval;
+ }
+
+ static void _call_String_to_utf32(Variant &r_ret, Variant &p_self, const Variant **p_args) {
+ String *s = reinterpret_cast<String *>(p_self._data._mem);
+ if (s->empty()) {
+ r_ret = PackedByteArray();
+ return;
+ }
+
+ PackedByteArray retval;
+ size_t len = s->length() * 4;
+ retval.resize(len);
+ uint8_t *w = retval.ptrw();
+ copymem(w, (const void *)s->ptr(), len);
+
+ r_ret = retval;
+ }
+
VCALL_LOCALMEM1R(Vector2, distance_to);
VCALL_LOCALMEM1R(Vector2, distance_squared_to);
VCALL_LOCALMEM0R(Vector2, length);
@@ -618,6 +654,26 @@ struct _VariantCall {
r_ret = s;
}
+ static void _call_PackedByteArray_get_string_from_utf16(Variant &r_ret, Variant &p_self, const Variant **p_args) {
+ PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem);
+ String s;
+ if (ba->size() > 0) {
+ const uint8_t *r = ba->ptr();
+ s.parse_utf16((const char16_t *)r, ba->size() / 2);
+ }
+ r_ret = s;
+ }
+
+ static void _call_PackedByteArray_get_string_from_utf32(Variant &r_ret, Variant &p_self, const Variant **p_args) {
+ PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem);
+ String s;
+ if (ba->size() > 0) {
+ const uint8_t *r = ba->ptr();
+ s = String((const char32_t *)r, ba->size() / 4);
+ }
+ r_ret = s;
+ }
+
static void _call_PackedByteArray_compress(Variant &r_ret, Variant &p_self, const Variant **p_args) {
PackedByteArray *ba = reinterpret_cast<PackedByteArray *>(p_self._data._mem);
PackedByteArray compressed;
@@ -1789,6 +1845,7 @@ void register_variant_methods() {
/* STRING */
ADDFUNC1R(STRING, INT, String, casecmp_to, STRING, "to", varray());
ADDFUNC1R(STRING, INT, String, nocasecmp_to, STRING, "to", varray());
+ ADDFUNC1R(STRING, INT, String, naturalnocasecmp_to, STRING, "to", varray());
ADDFUNC0R(STRING, INT, String, length, varray());
ADDFUNC2R(STRING, STRING, String, substr, INT, "from", INT, "len", varray(-1));
@@ -1867,6 +1924,8 @@ void register_variant_methods() {
ADDFUNC0R(STRING, INT, String, to_int, varray());
ADDFUNC0R(STRING, FLOAT, String, to_float, varray());
ADDFUNC0R(STRING, INT, String, hex_to_int, varray());
+ ADDFUNC2R(STRING, STRING, String, lpad, INT, "min_length", STRING, "character", varray(" "));
+ ADDFUNC2R(STRING, STRING, String, rpad, INT, "min_length", STRING, "character", varray(" "));
ADDFUNC1R(STRING, STRING, String, pad_decimals, INT, "digits", varray());
ADDFUNC1R(STRING, STRING, String, pad_zeros, INT, "digits", varray());
ADDFUNC1R(STRING, STRING, String, trim_prefix, STRING, "prefix", varray());
@@ -1874,6 +1933,8 @@ void register_variant_methods() {
ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_ascii, varray());
ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf8, varray());
+ ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf16, varray());
+ ADDFUNC0R(STRING, PACKED_BYTE_ARRAY, String, to_utf32, varray());
ADDFUNC0R(VECTOR2, FLOAT, Vector2, angle, varray());
ADDFUNC1R(VECTOR2, FLOAT, Vector2, angle_to, VECTOR2, "to", varray());
@@ -2109,6 +2170,8 @@ void register_variant_methods() {
ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_ascii, varray());
ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf8, varray());
+ ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf16, varray());
+ ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, get_string_from_utf32, varray());
ADDFUNC0R(PACKED_BYTE_ARRAY, STRING, PackedByteArray, hex_encode, varray());
ADDFUNC1R(PACKED_BYTE_ARRAY, PACKED_BYTE_ARRAY, PackedByteArray, compress, INT, "compression_mode", varray(0));
ADDFUNC2R(PACKED_BYTE_ARRAY, PACKED_BYTE_ARRAY, PackedByteArray, decompress, INT, "buffer_size", INT, "compression_mode", varray(0));
diff --git a/core/variant_op.cpp b/core/variant_op.cpp
index 0cb2fe29a1..95b488230d 100644
--- a/core/variant_op.cpp
+++ b/core/variant_op.cpp
@@ -4215,7 +4215,7 @@ void Variant::interpolate(const Variant &a, const Variant &b, float c, Variant &
int split = csize / 2;
for (int i = 0; i < csize; i++) {
- CharType chr = ' ';
+ char32_t chr = ' ';
if (i < split) {
if (i < sa.length()) {
diff --git a/core/variant_parser.cpp b/core/variant_parser.cpp
index 74f4f32c0e..3c4fed68fb 100644
--- a/core/variant_parser.cpp
+++ b/core/variant_parser.cpp
@@ -35,7 +35,7 @@
#include "core/os/keyboard.h"
#include "core/string_buffer.h"
-CharType VariantParser::StreamFile::get_char() {
+char32_t VariantParser::StreamFile::get_char() {
return f->get_8();
}
@@ -47,7 +47,7 @@ bool VariantParser::StreamFile::is_eof() const {
return f->eof_reached();
}
-CharType VariantParser::StreamString::get_char() {
+char32_t VariantParser::StreamString::get_char() {
if (pos > s.length()) {
return 0;
} else if (pos == s.length()) {
@@ -94,7 +94,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
bool string_name = false;
while (true) {
- CharType cchar;
+ char32_t cchar;
if (p_stream->saved) {
cchar = p_stream->saved;
p_stream->saved = 0;
@@ -145,7 +145,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
}
case ';': {
while (true) {
- CharType ch = p_stream->get_char();
+ char32_t ch = p_stream->get_char();
if (p_stream->is_eof()) {
r_token.type = TK_EOF;
return OK;
@@ -173,7 +173,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
StringBuffer<> color_str;
color_str += '#';
while (true) {
- CharType ch = p_stream->get_char();
+ char32_t ch = p_stream->get_char();
if (p_stream->is_eof()) {
r_token.type = TK_EOF;
return OK;
@@ -204,7 +204,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
case '"': {
String str;
while (true) {
- CharType ch = p_stream->get_char();
+ char32_t ch = p_stream->get_char();
if (ch == 0) {
r_err_str = "Unterminated String";
@@ -214,13 +214,13 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
break;
} else if (ch == '\\') {
//escaped characters...
- CharType next = p_stream->get_char();
+ char32_t next = p_stream->get_char();
if (next == 0) {
r_err_str = "Unterminated String";
r_token.type = TK_ERROR;
return ERR_PARSE_ERROR;
}
- CharType res = 0;
+ char32_t res = 0;
switch (next) {
case 'b':
@@ -241,7 +241,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
case 'u': {
//hex number
for (int j = 0; j < 4; j++) {
- CharType c = p_stream->get_char();
+ char32_t c = p_stream->get_char();
if (c == 0) {
r_err_str = "Unterminated String";
r_token.type = TK_ERROR;
@@ -252,7 +252,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
r_token.type = TK_ERROR;
return ERR_PARSE_ERROR;
}
- CharType v;
+ char32_t v;
if (c >= '0' && c <= '9') {
v = c - '0';
} else if (c >= 'a' && c <= 'f') {
@@ -321,7 +321,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
cchar = p_stream->get_char();
}
- CharType c = cchar;
+ char32_t c = cchar;
bool exp_sign = false;
bool exp_beg = false;
bool is_float = false;
@@ -421,7 +421,7 @@ Error VariantParser::_parse_enginecfg(Stream *p_stream, Vector<String> &strings,
String accum;
while (true) {
- CharType c = p_stream->get_char();
+ char32_t c = p_stream->get_char();
if (p_stream->is_eof()) {
r_err_str = "Unexpected EOF while parsing old-style project.godot construct";
@@ -1206,7 +1206,7 @@ Error VariantParser::_parse_tag(Token &token, Stream *p_stream, int &line, Strin
r_tag.fields.clear();
while (true) {
- CharType c = p_stream->get_char();
+ char32_t c = p_stream->get_char();
if (p_stream->is_eof()) {
r_err_str = "Unexpected EOF while parsing simple tag";
return ERR_PARSE_ERROR;
@@ -1305,7 +1305,7 @@ Error VariantParser::parse_tag_assign_eof(Stream *p_stream, int &line, String &r
String what;
while (true) {
- CharType c;
+ char32_t c;
if (p_stream->saved) {
c = p_stream->saved;
p_stream->saved = 0;
@@ -1320,7 +1320,7 @@ Error VariantParser::parse_tag_assign_eof(Stream *p_stream, int &line, String &r
if (c == ';') { //comment
while (true) {
- CharType ch = p_stream->get_char();
+ char32_t ch = p_stream->get_char();
if (p_stream->is_eof()) {
return ERR_FILE_EOF;
}
diff --git a/core/variant_parser.h b/core/variant_parser.h
index b55d7b2df0..12329e2db6 100644
--- a/core/variant_parser.h
+++ b/core/variant_parser.h
@@ -38,11 +38,11 @@
class VariantParser {
public:
struct Stream {
- virtual CharType get_char() = 0;
+ virtual char32_t get_char() = 0;
virtual bool is_utf8() const = 0;
virtual bool is_eof() const = 0;
- CharType saved = 0;
+ char32_t saved = 0;
Stream() {}
virtual ~Stream() {}
@@ -51,7 +51,7 @@ public:
struct StreamFile : public Stream {
FileAccess *f = nullptr;
- virtual CharType get_char();
+ virtual char32_t get_char();
virtual bool is_utf8() const;
virtual bool is_eof() const;
@@ -62,7 +62,7 @@ public:
String s;
int pos = 0;
- virtual CharType get_char();
+ virtual char32_t get_char();
virtual bool is_utf8() const;
virtual bool is_eof() const;