summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/math/expression.cpp43
-rw-r--r--core/variant/variant_parser.cpp44
-rw-r--r--modules/gdscript/gdscript_tokenizer.cpp66
-rw-r--r--modules/visual_script/visual_script_expression.cpp43
4 files changed, 184 insertions, 12 deletions
diff --git a/core/math/expression.cpp b/core/math/expression.cpp
index 4f8e79038f..203566579d 100644
--- a/core/math/expression.cpp
+++ b/core/math/expression.cpp
@@ -197,6 +197,7 @@ Error Expression::_get_token(Token &r_token) {
case '\'':
case '"': {
String str;
+ char32_t prev = 0;
while (true) {
char32_t ch = GET_CHAR();
@@ -234,9 +235,11 @@ Error Expression::_get_token(Token &r_token) {
case 'r':
res = 13;
break;
+ case 'U':
case 'u': {
- // hex number
- for (int j = 0; j < 4; j++) {
+ // Hexadecimal sequence.
+ int hex_len = (next == 'U') ? 6 : 4;
+ for (int j = 0; j < hex_len; j++) {
char32_t c = GET_CHAR();
if (c == 0) {
@@ -273,12 +276,46 @@ Error Expression::_get_token(Token &r_token) {
} break;
}
+ // Parse UTF-16 pair.
+ if ((res & 0xfffffc00) == 0xd800) {
+ if (prev == 0) {
+ prev = res;
+ continue;
+ } else {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
+ } else if ((res & 0xfffffc00) == 0xdc00) {
+ if (prev == 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ } else {
+ res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ prev = 0;
+ }
+ }
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
str += res;
-
} else {
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
str += ch;
}
}
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
r_token.type = TK_CONSTANT;
r_token.value = str;
diff --git a/core/variant/variant_parser.cpp b/core/variant/variant_parser.cpp
index 57875bf50f..96cdc0678e 100644
--- a/core/variant/variant_parser.cpp
+++ b/core/variant/variant_parser.cpp
@@ -217,6 +217,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
}
case '"': {
String str;
+ char32_t prev = 0;
while (true) {
char32_t ch = p_stream->get_char();
@@ -252,10 +253,13 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
case 'r':
res = 13;
break;
+ case 'U':
case 'u': {
- //hex number
- for (int j = 0; j < 4; j++) {
+ // Hexadecimal sequence.
+ int hex_len = (next == 'U') ? 6 : 4;
+ for (int j = 0; j < hex_len; j++) {
char32_t c = p_stream->get_char();
+
if (c == 0) {
r_err_str = "Unterminated String";
r_token.type = TK_ERROR;
@@ -290,15 +294,49 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
} break;
}
+ // Parse UTF-16 pair.
+ if ((res & 0xfffffc00) == 0xd800) {
+ if (prev == 0) {
+ prev = res;
+ continue;
+ } else {
+ r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
+ } else if ((res & 0xfffffc00) == 0xdc00) {
+ if (prev == 0) {
+ r_err_str = "Invalid UTF-16 sequence in string, unpaired trail surrogate";
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ } else {
+ res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ prev = 0;
+ }
+ }
+ if (prev != 0) {
+ r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
str += res;
-
} else {
+ if (prev != 0) {
+ r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
if (ch == '\n') {
line++;
}
str += ch;
}
}
+ if (prev != 0) {
+ r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
if (p_stream->is_utf8()) {
str.parse_utf8(str.ascii(true).get_data());
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp
index 05ea061798..9977b88aa1 100644
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -786,6 +786,8 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
}
String result;
+ char32_t prev = 0;
+ int prev_pos = 0;
for (;;) {
// Consume actual string.
@@ -852,9 +854,11 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
case '\\':
escaped = '\\';
break;
- case 'u':
+ case 'U':
+ case 'u': {
// Hexadecimal sequence.
- for (int i = 0; i < 4; i++) {
+ int hex_len = (code == 'U') ? 6 : 4;
+ for (int j = 0; j < hex_len; j++) {
if (_is_at_end()) {
return make_error("Unterminated string.");
}
@@ -886,7 +890,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
_advance();
}
- break;
+ } break;
case '\r':
if (_peek() != '\n') {
// Carriage return without newline in string. (???)
@@ -909,11 +913,53 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
valid_escape = false;
break;
}
+ // Parse UTF-16 pair.
+ if (valid_escape) {
+ if ((escaped & 0xfffffc00) == 0xd800) {
+ if (prev == 0) {
+ prev = escaped;
+ prev_pos = column - 2;
+ continue;
+ } else {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ error.start_column = column - 2;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ valid_escape = false;
+ prev = 0;
+ }
+ } else if ((escaped & 0xfffffc00) == 0xdc00) {
+ if (prev == 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
+ error.start_column = column - 2;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ valid_escape = false;
+ } else {
+ escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ prev = 0;
+ }
+ }
+ if (prev != 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ error.start_column = prev_pos;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ prev = 0;
+ }
+ }
if (valid_escape) {
result += escaped;
}
} else if (ch == quote_char) {
+ if (prev != 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ error.start_column = prev_pos;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ prev = 0;
+ }
_advance();
if (is_multiline) {
if (_peek() == quote_char && _peek(1) == quote_char) {
@@ -930,6 +976,13 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
break;
}
} else {
+ if (prev != 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ error.start_column = prev_pos;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ prev = 0;
+ }
result += ch;
_advance();
if (ch == '\n') {
@@ -937,6 +990,13 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
}
}
}
+ if (prev != 0) {
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ error.start_column = prev_pos;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ prev = 0;
+ }
// Make the literal.
Variant string;
diff --git a/modules/visual_script/visual_script_expression.cpp b/modules/visual_script/visual_script_expression.cpp
index 2abbd19e12..17a3566ed2 100644
--- a/modules/visual_script/visual_script_expression.cpp
+++ b/modules/visual_script/visual_script_expression.cpp
@@ -328,6 +328,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
};
case '"': {
String str;
+ char32_t prev = 0;
while (true) {
char32_t ch = GET_CHAR();
@@ -364,9 +365,11 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
case 'r':
res = 13;
break;
+ case 'U':
case 'u': {
- // hex number
- for (int j = 0; j < 4; j++) {
+ // Hexadecimal sequence.
+ int hex_len = (next == 'U') ? 6 : 4;
+ for (int j = 0; j < hex_len; j++) {
char32_t c = GET_CHAR();
if (c == 0) {
@@ -403,12 +406,46 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
} break;
}
+ // Parse UTF-16 pair.
+ if ((res & 0xfffffc00) == 0xd800) {
+ if (prev == 0) {
+ prev = res;
+ continue;
+ } else {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
+ } else if ((res & 0xfffffc00) == 0xdc00) {
+ if (prev == 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ } else {
+ res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+ prev = 0;
+ }
+ }
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
str += res;
-
} else {
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
str += ch;
}
}
+ if (prev != 0) {
+ _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
+ r_token.type = TK_ERROR;
+ return ERR_PARSE_ERROR;
+ }
r_token.type = TK_CONSTANT;
r_token.value = str;