diff options
author | Henry Conklin <henrywconklin@gmail.com> | 2021-02-11 09:33:55 -0600 |
---|---|---|
committer | Henry Conklin <henrywconklin@gmail.com> | 2021-02-14 11:00:25 -0600 |
commit | c6a911f0379b429691aa0dca6615523892bbfb06 (patch) | |
tree | 40b85fbb5f85cca9d99bfc91123a600a6aa62a4a /core | |
parent | e5bb89cdd5e92fa6fdeff78aad08bf0cbfbcc692 (diff) |
Add support for numeric XML entities to XMLParser
* Add support for decimal numeric entities to String::xml_unescape
* Add more error checks to String::xml_unescape
* Refactor XMLParser to use String::xml_unescape instead of an internal
implementation
Diffstat (limited to 'core')
-rw-r--r-- | core/io/xml_parser.cpp | 66 | ||||
-rw-r--r-- | core/io/xml_parser.h | 2 | ||||
-rw-r--r-- | core/string/ustring.cpp | 62 |
3 files changed, 48 insertions, 82 deletions
diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp index 905be6089d..1574634aad 100644 --- a/core/io/xml_parser.cpp +++ b/core/io/xml_parser.cpp @@ -36,63 +36,6 @@ VARIANT_ENUM_CAST(XMLParser::NodeType); -static bool _equalsn(const char32_t *str1, const char32_t *str2, int len) { - int i; - for (i = 0; i < len && str1[i] && str2[i]; ++i) { - if (str1[i] != str2[i]) { - return false; - } - } - - // if one (or both) of the strings was smaller then they - // are only equal if they have the same length - return (i == len) || (str1[i] == 0 && str2[i] == 0); -} - -String XMLParser::_replace_special_characters(const String &origstr) { - int pos = origstr.find("&"); - int oldPos = 0; - - if (pos == -1) { - return origstr; - } - - String newstr; - - while (pos != -1 && pos < origstr.length() - 2) { - // check if it is one of the special characters - - int specialChar = -1; - for (int i = 0; i < (int)special_characters.size(); ++i) { - const char32_t *p = &origstr[pos] + 1; - - if (_equalsn(&special_characters[i][1], p, special_characters[i].length() - 1)) { - specialChar = i; - break; - } - } - - if (specialChar != -1) { - newstr += (origstr.substr(oldPos, pos - oldPos)); - newstr += (special_characters[specialChar][0]); - pos += special_characters[specialChar].length(); - } else { - newstr += (origstr.substr(oldPos, pos - oldPos + 1)); - pos += 1; - } - - // find next & - oldPos = pos; - pos = origstr.find("&", pos); - } - - if (oldPos < origstr.length() - 1) { - newstr += (origstr.substr(oldPos, origstr.length() - oldPos)); - } - - return newstr; -} - static inline bool _is_white_space(char c) { return (c == ' ' || c == '\t' || c == '\n' || c == '\r'); } @@ -116,7 +59,7 @@ bool XMLParser::_set_text(char *start, char *end) { // set current text to the parsed text, and replace xml special characters String s = String::utf8(start, (int)(end - start)); - node_name = _replace_special_characters(s); + node_name = s.xml_unescape(); // current XML node type is text node_type = NODE_TEXT; @@ -292,7 +235,7 @@ void XMLParser::_parse_opening_xml_element() { String s = String::utf8(attributeValueBegin, (int)(attributeValueEnd - attributeValueBegin)); - attr.value = _replace_special_characters(s); + attr.value = s.xml_unescape(); attributes.push_back(attr); } else { // tag is closed directly @@ -555,11 +498,6 @@ int XMLParser::get_current_line() const { } XMLParser::XMLParser() { - special_characters.push_back("&"); - special_characters.push_back("<lt;"); - special_characters.push_back(">gt;"); - special_characters.push_back("\"quot;"); - special_characters.push_back("'apos;"); } XMLParser::~XMLParser() { diff --git a/core/io/xml_parser.h b/core/io/xml_parser.h index 01af6a90ad..847edf958d 100644 --- a/core/io/xml_parser.h +++ b/core/io/xml_parser.h @@ -68,8 +68,6 @@ private: char *data = nullptr; char *P = nullptr; uint64_t length = 0; - void unescape(String &p_str); - Vector<String> special_characters; String node_name; bool node_empty = false; NodeType node_type = NODE_NONE; diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 59fda65d43..a57c7b2504 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -3888,25 +3888,55 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch if (p_src_len >= 4 && p_src[1] == '#') { char32_t c = 0; - - for (int i = 2; i < p_src_len; i++) { - eat = i + 1; - char32_t ct = p_src[i]; - if (ct == ';') { - break; - } else if (ct >= '0' && ct <= '9') { - ct = ct - '0'; - } else if (ct >= 'a' && ct <= 'f') { - ct = (ct - 'a') + 10; - } else if (ct >= 'A' && ct <= 'F') { - ct = (ct - 'A') + 10; - } else { - continue; + bool overflow = false; + if (p_src[2] == 'x') { + // Hex entity &#x<num>; + for (int i = 3; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';') { + break; + } else if (ct >= '0' && ct <= '9') { + ct = ct - '0'; + } else if (ct >= 'a' && ct <= 'f') { + ct = (ct - 'a') + 10; + } else if (ct >= 'A' && ct <= 'F') { + ct = (ct - 'A') + 10; + } else { + break; + } + if (c > (UINT32_MAX >> 4)) { + overflow = true; + break; + } + c <<= 4; + c |= ct; + } + } else { + // Decimal entity &#<num>; + for (int i = 2; i < p_src_len; i++) { + eat = i + 1; + char32_t ct = p_src[i]; + if (ct == ';' || ct < '0' || ct > '9') { + break; + } + } + if (p_src[eat - 1] == ';') { + int64_t val = String::to_int(p_src + 2, eat - 3); + if (val > 0 && val <= UINT32_MAX) { + c = (char32_t)val; + } else { + overflow = true; + } } - c <<= 4; - c |= ct; } + // Value must be non-zero, in the range of char32_t, + // actually end with ';'. If invalid, leave the entity as-is + if (c == '\0' || overflow || p_src[eat - 1] != ';') { + eat = 1; + c = *p_src; + } if (p_dst) { *p_dst = c; } |