diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/math/expression.cpp | 43 | ||||
-rw-r--r-- | core/variant/variant_parser.cpp | 44 |
2 files changed, 81 insertions, 6 deletions
diff --git a/core/math/expression.cpp b/core/math/expression.cpp index 4f8e79038f..203566579d 100644 --- a/core/math/expression.cpp +++ b/core/math/expression.cpp @@ -197,6 +197,7 @@ Error Expression::_get_token(Token &r_token) { case '\'': case '"': { String str; + char32_t prev = 0; while (true) { char32_t ch = GET_CHAR(); @@ -234,9 +235,11 @@ Error Expression::_get_token(Token &r_token) { case 'r': res = 13; break; + case 'U': case 'u': { - // hex number - for (int j = 0; j < 4; j++) { + // Hexadecimal sequence. + int hex_len = (next == 'U') ? 6 : 4; + for (int j = 0; j < hex_len; j++) { char32_t c = GET_CHAR(); if (c == 0) { @@ -273,12 +276,46 @@ Error Expression::_get_token(Token &r_token) { } break; } + // Parse UTF-16 pair. + if ((res & 0xfffffc00) == 0xd800) { + if (prev == 0) { + prev = res; + continue; + } else { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } + } else if ((res & 0xfffffc00) == 0xdc00) { + if (prev == 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired trail surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } else { + res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + prev = 0; + } + } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += res; - } else { + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += ch; } } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } r_token.type = TK_CONSTANT; r_token.value = str; diff --git a/core/variant/variant_parser.cpp b/core/variant/variant_parser.cpp index 57875bf50f..96cdc0678e 100644 --- a/core/variant/variant_parser.cpp +++ b/core/variant/variant_parser.cpp @@ -217,6 +217,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri } case '"': { String str; + char32_t prev = 0; while (true) { char32_t ch = p_stream->get_char(); @@ -252,10 +253,13 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri case 'r': res = 13; break; + case 'U': case 'u': { - //hex number - for (int j = 0; j < 4; j++) { + // Hexadecimal sequence. + int hex_len = (next == 'U') ? 6 : 4; + for (int j = 0; j < hex_len; j++) { char32_t c = p_stream->get_char(); + if (c == 0) { r_err_str = "Unterminated String"; r_token.type = TK_ERROR; @@ -290,15 +294,49 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri } break; } + // Parse UTF-16 pair. + if ((res & 0xfffffc00) == 0xd800) { + if (prev == 0) { + prev = res; + continue; + } else { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } + } else if ((res & 0xfffffc00) == 0xdc00) { + if (prev == 0) { + r_err_str = "Invalid UTF-16 sequence in string, unpaired trail surrogate"; + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } else { + res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + prev = 0; + } + } + if (prev != 0) { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += res; - } else { + if (prev != 0) { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } if (ch == '\n') { line++; } str += ch; } } + if (prev != 0) { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } if (p_stream->is_utf8()) { str.parse_utf8(str.ascii(true).get_data()); |