From 8e79c5fb8dd986aa6903a6419ac8c45444fff6f0 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Sun, 30 Jan 2022 15:44:07 +0200 Subject: Add support for the escaped UTF-16 and UTF-32 Unicode characters in the scripts and expressions. --- modules/visual_script/visual_script_expression.cpp | 43 ++++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'modules/visual_script') diff --git a/modules/visual_script/visual_script_expression.cpp b/modules/visual_script/visual_script_expression.cpp index 2abbd19e12..17a3566ed2 100644 --- a/modules/visual_script/visual_script_expression.cpp +++ b/modules/visual_script/visual_script_expression.cpp @@ -328,6 +328,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) { }; case '"': { String str; + char32_t prev = 0; while (true) { char32_t ch = GET_CHAR(); @@ -364,9 +365,11 @@ Error VisualScriptExpression::_get_token(Token &r_token) { case 'r': res = 13; break; + case 'U': case 'u': { - // hex number - for (int j = 0; j < 4; j++) { + // Hexadecimal sequence. + int hex_len = (next == 'U') ? 6 : 4; + for (int j = 0; j < hex_len; j++) { char32_t c = GET_CHAR(); if (c == 0) { @@ -403,12 +406,46 @@ Error VisualScriptExpression::_get_token(Token &r_token) { } break; } + // Parse UTF-16 pair. + if ((res & 0xfffffc00) == 0xd800) { + if (prev == 0) { + prev = res; + continue; + } else { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } + } else if ((res & 0xfffffc00) == 0xdc00) { + if (prev == 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired trail surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } else { + res = (prev << 10UL) + res - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + prev = 0; + } + } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += res; - } else { + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } str += ch; } } + if (prev != 0) { + _set_error("Invalid UTF-16 sequence in string, unpaired lead surrogate"); + r_token.type = TK_ERROR; + return ERR_PARSE_ERROR; + } r_token.type = TK_CONSTANT; r_token.value = str; -- cgit v1.2.3