summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaoyu Qiu <timothyqiu32@gmail.com>2021-05-18 15:01:21 +0800
committerHaoyu Qiu <timothyqiu32@gmail.com>2021-05-18 15:01:21 +0800
commit549ad70760e96828dadc98ad47bd4788e14947f2 (patch)
treee45ae5a4aa710cc51dd996f58b9833c22fda4049
parentcdb3726fe6c9d41701dc89e572c6b2631d14cf6f (diff)
Fix XMLParser behavior for comments and premature endings
-rw-r--r--core/io/xml_parser.cpp99
1 files changed, 65 insertions, 34 deletions
diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp
index a1f8e79adc..f3ce2319e3 100644
--- a/core/io/xml_parser.cpp
+++ b/core/io/xml_parser.cpp
@@ -75,7 +75,7 @@ void XMLParser::_parse_closing_xml_element() {
++P;
const char *pBeginClose = P;
- while (*P != '>') {
+ while (*P && *P != '>') {
++P;
}
@@ -83,7 +83,10 @@ void XMLParser::_parse_closing_xml_element() {
#ifdef DEBUG_XML
print_line("XML CLOSE: " + node_name);
#endif
- ++P;
+
+ if (*P) {
+ ++P;
+ }
}
void XMLParser::_ignore_definition() {
@@ -91,11 +94,14 @@ void XMLParser::_ignore_definition() {
char *F = P;
// move until end marked with '>' reached
- while (*P != '>') {
+ while (*P && *P != '>') {
++P;
}
node_name.parse_utf8(F, P - F);
- ++P;
+
+ if (*P) {
+ ++P;
+ }
}
bool XMLParser::_parse_cdata() {
@@ -113,6 +119,7 @@ bool XMLParser::_parse_cdata() {
}
if (!*P) {
+ node_name = "";
return true;
}
@@ -131,10 +138,9 @@ bool XMLParser::_parse_cdata() {
}
if (cDataEnd) {
- node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
- } else {
- node_name = "";
+ cDataEnd = P;
}
+ node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
#ifdef DEBUG_XML
print_line("XML CDATA: " + node_name);
#endif
@@ -146,24 +152,45 @@ void XMLParser::_parse_comment() {
node_type = NODE_COMMENT;
P += 1;
- char *pCommentBegin = P;
+ char *pEndOfInput = data + length;
+ char *pCommentBegin;
+ char *pCommentEnd;
- int count = 1;
-
- // move until end of comment reached
- while (count) {
- if (*P == '>') {
- --count;
- } else if (*P == '<') {
- ++count;
+ if (P + 1 < pEndOfInput && P[0] == '-' && P[1] == '-') {
+ // Comment, use '-->' as end.
+ pCommentBegin = P + 2;
+ for (pCommentEnd = pCommentBegin; pCommentEnd + 2 < pEndOfInput; pCommentEnd++) {
+ if (pCommentEnd[0] == '-' && pCommentEnd[1] == '-' && pCommentEnd[2] == '>') {
+ break;
+ }
+ }
+ if (pCommentEnd + 2 < pEndOfInput) {
+ P = pCommentEnd + 3;
+ } else {
+ P = pCommentEnd = pEndOfInput;
+ }
+ } else {
+ // Like document type definition, match angle brackets.
+ pCommentBegin = P;
+
+ int count = 1;
+ while (*P && count) {
+ if (*P == '>') {
+ --count;
+ } else if (*P == '<') {
+ ++count;
+ }
+ ++P;
}
- ++P;
+ if (count) {
+ pCommentEnd = P;
+ } else {
+ pCommentEnd = P - 1;
+ }
}
- P -= 3;
- node_name = String::utf8(pCommentBegin + 2, (int)(P - pCommentBegin - 2));
- P += 3;
+ node_name = String::utf8(pCommentBegin, (int)(pCommentEnd - pCommentBegin));
#ifdef DEBUG_XML
print_line("XML COMMENT: " + node_name);
#endif
@@ -178,14 +205,14 @@ void XMLParser::_parse_opening_xml_element() {
const char *startName = P;
// find end of element
- while (*P != '>' && !_is_white_space(*P)) {
+ while (*P && *P != '>' && !_is_white_space(*P)) {
++P;
}
const char *endName = P;
// find attributes
- while (*P != '>') {
+ while (*P && *P != '>') {
if (_is_white_space(*P)) {
++P;
} else {
@@ -195,10 +222,14 @@ void XMLParser::_parse_opening_xml_element() {
// read the attribute names
const char *attributeNameBegin = P;
- while (!_is_white_space(*P) && *P != '=') {
+ while (*P && !_is_white_space(*P) && *P != '=') {
++P;
}
+ if (!*P) {
+ break;
+ }
+
const char *attributeNameEnd = P;
++P;
@@ -209,7 +240,7 @@ void XMLParser::_parse_opening_xml_element() {
}
if (!*P) { // malformatted xml file
- return;
+ break;
}
const char attributeQuoteChar = *P;
@@ -221,12 +252,10 @@ void XMLParser::_parse_opening_xml_element() {
++P;
}
- if (!*P) { // malformatted xml file
- return;
- }
-
const char *attributeValueEnd = P;
- ++P;
+ if (*P) {
+ ++P;
+ }
Attribute attr;
attr.name = String::utf8(attributeNameBegin,
@@ -258,7 +287,9 @@ void XMLParser::_parse_opening_xml_element() {
print_line("XML OPEN: " + node_name);
#endif
- ++P;
+ if (*P) {
+ ++P;
+ }
}
void XMLParser::_parse_current_node() {
@@ -270,10 +301,6 @@ void XMLParser::_parse_current_node() {
++P;
}
- if (!*P) {
- return;
- }
-
if (P - start > 0) {
// we found some text, store it
if (_set_text(start, P)) {
@@ -281,6 +308,10 @@ void XMLParser::_parse_current_node() {
}
}
+ if (!*P) {
+ return;
+ }
+
++P;
// based on current token, parse and report next element