From 76b653528dc393f7523187d5afe9e84867b7c4c7 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Mon, 1 Mar 2021 13:23:12 +0200 Subject: JSON parser: add UTF-16 surrogate pairs support. --- core/io/json.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/core/io/json.cpp b/core/io/json.cpp index bc4527869b..0d9117fdda 100644 --- a/core/io/json.cpp +++ b/core/io/json.cpp @@ -234,6 +234,52 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to } index += 4; //will add at the end anyway + if ((res & 0xfffffc00) == 0xd800) { + if (p_str[index + 1] != '\\' || p_str[index + 2] != 'u') { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + return ERR_PARSE_ERROR; + } + index += 2; + char32_t trail = 0; + for (int j = 0; j < 4; j++) { + char32_t c = p_str[index + j + 1]; + if (c == 0) { + r_err_str = "Unterminated String"; + return ERR_PARSE_ERROR; + } + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { + r_err_str = "Malformed hex constant in string"; + return ERR_PARSE_ERROR; + } + char32_t v; + if (c >= '0' && c <= '9') { + v = c - '0'; + } else if (c >= 'a' && c <= 'f') { + v = c - 'a'; + v += 10; + } else if (c >= 'A' && c <= 'F') { + v = c - 'A'; + v += 10; + } else { + ERR_PRINT("Bug parsing hex constant."); + v = 0; + } + + trail <<= 4; + trail |= v; + } + if ((trail & 0xfffffc00) == 0xdc00) { + res = (res << 10UL) + trail - ((0xd800 << 10UL) + 0xdc00 - 0x10000); + index += 4; //will add at the end anyway + } else { + r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate"; + return ERR_PARSE_ERROR; + } + } else if ((res & 0xfffffc00) == 0xdc00) { + r_err_str = "Invalid UTF-16 sequence in string, unpaired trail surrogate"; + return ERR_PARSE_ERROR; + } + } break; default: { res = next; -- cgit v1.2.3