diff options
author | RĂ©mi Verschelde <remi@verschelde.fr> | 2022-01-31 18:11:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-31 18:11:55 +0100 |
commit | 243fbebb895713cb846e19538969112d3695eded (patch) | |
tree | 0e62dd2d667b30d1adb482b0668c1604e01e2fe4 /modules | |
parent | f14b1441e79de31564f6b7926755682947348e7b (diff) | |
parent | 8e79c5fb8dd986aa6903a6419ac8c45444fff6f0 (diff) |
Merge pull request #57447 from bruvzg/unicode_escape
Diffstat (limited to 'modules')
-rw-r--r-- | modules/gdscript/gdscript_tokenizer.cpp | 66 | ||||
-rw-r--r-- | modules/visual_script/visual_script_expression.cpp | 43 |
2 files changed, 103 insertions, 6 deletions
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index 05ea061798..9977b88aa1 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -786,6 +786,8 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { } String result; + char32_t prev = 0; + int prev_pos = 0; for (;;) { // Consume actual string. @@ -852,9 +854,11 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { case '\\': escaped = '\\'; break; - case 'u': + case 'U': + case 'u': { // Hexadecimal sequence. - for (int i = 0; i < 4; i++) { + int hex_len = (code == 'U') ? 6 : 4; + for (int j = 0; j < hex_len; j++) { if (_is_at_end()) { return make_error("Unterminated string."); } @@ -886,7 +890,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { _advance(); } - break; + } break; case '\r': if (_peek() != '\n') { // Carriage return without newline in string. (???) @@ -909,11 +913,53 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { valid_escape = false; break; } + // Parse UTF-16 pair. + if (valid_escape) { + if ((escaped & 0xfffffc00) == 0xd800) { + if (prev == 0) { + prev = escaped; + prev_pos = column - 2; + continue; + } else { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + error.start_column = column - 2; + error.leftmost_column = error.start_column; + push_error(error); + valid_escape = false; + prev = 0; + } + } else if ((escaped & 0xfffffc00) == 0xdc00) { + if (prev == 0) { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate"); + error.start_column = column - 2; + error.leftmost_column = error.start_column; + push_error(error); + valid_escape = false; + } else { + escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + prev = 0; + } + } + if (prev != 0) { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + error.start_column = prev_pos; + error.leftmost_column = error.start_column; + push_error(error); + prev = 0; + } + } if (valid_escape) { result += escaped; } } else if (ch == quote_char) { + if (prev != 0) { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + error.start_column = prev_pos; + error.leftmost_column = error.start_column; + push_error(error); + prev = 0; + } _advance(); if (is_multiline) { if (_peek() == quote_char && _peek(1) == quote_char) { @@ -930,6 +976,13 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { break; } } else { + if (prev != 0) { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + error.start_column = prev_pos; + error.leftmost_column = error.start_column; + push_error(error); + prev = 0; + } result += ch; _advance(); if (ch == '\n') { @@ -937,6 +990,13 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { } } } + if (prev != 0) { + Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + error.start_column = prev_pos; + error.leftmost_column = error.start_column; + push_error(error); + prev = 0; + } // Make the literal. Variant string; diff --git a/modules/visual_script/visual_script_expression.cpp b/modules/visual_script/visual_script_expression.cpp index 2abbd19e12..17a3566ed2 100644 --- a/modules/visual_script/visual_script_expression.cpp +++ b/modules/visual_script/visual_script_expression.cpp @@ -328,6 +328,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) { }; case '"': { String str; + char32_t prev = 0; while (true) { char32_t ch = GET_CHAR(); @@ -364,9 +365,11 @@ Error VisualScriptExpression::_get_token(Token &r_token) { case 'r': res = 13; break; + case 'U': case 'u': { - // hex number - for (int j = 0; j < 4; j++) { + // Hexadecimal sequence. + int hex_len = (next == 'U') ? 6 : 4; + for (int j = 0; j < hex_len; j++) { char32_t c = GET_CHAR(); if (c == 0) { @@ -403,12 +406,46 @@ Error VisualScriptExpression::_get_token(Token &r_token) { } break; } + // Parse UTF-16 pair. + if ((res & 0xfffffc00) == 0xd800) { + if (prev == 0) { + prev = res; + continue; + } else { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } + } else if ((res & 0xfffffc00) == 0xdc00) { + if (prev == 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired trail surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } else { + res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + prev = 0; + } + } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += res; - } else { + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += ch; } } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } r_token.type = TK_CONSTANT; r_token.value = str; |