diff options
Diffstat (limited to 'core/io/xml_parser.cpp')
-rw-r--r-- | core/io/xml_parser.cpp | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/core/io/xml_parser.cpp b/core/io/xml_parser.cpp new file mode 100644 index 0000000000..150643b2e1 --- /dev/null +++ b/core/io/xml_parser.cpp @@ -0,0 +1,576 @@ +/*************************************************************************/ +/* xml_parser.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "xml_parser.h" +#include "print_string.h" +//#define DEBUG_XML + +static bool _equalsn(const CharType* str1, const CharType* str2, int len) { + int i; + for(i=0; str1[i] && str2[i] && i < len; ++i) + if (str1[i] != str2[i]) + return false; + + // if one (or both) of the strings was smaller then they + // are only equal if they have the same lenght + return (i == len) || (str1[i] == 0 && str2[i] == 0); +} + + +String XMLParser::_replace_special_characters(const String& origstr) { + + int pos = origstr.find("&"); + int oldPos = 0; + + if (pos == -1) + return origstr; + + String newstr; + + while(pos != -1 && pos < origstr.length()-2) { + // check if it is one of the special characters + + int specialChar = -1; + for (int i=0; i<(int)special_characters.size(); ++i) + { + const CharType* p = &origstr[pos]+1; + + if (_equalsn(&special_characters[i][1], p, special_characters[i].length()-1)) + { + specialChar = i; + break; + } + } + + if (specialChar != -1) + { + newstr+=(origstr.substr(oldPos, pos - oldPos)); + newstr+=(special_characters[specialChar][0]); + pos += special_characters[specialChar].length(); + } + else + { + newstr+=(origstr.substr(oldPos, pos - oldPos + 1)); + pos += 1; + } + + // find next & + oldPos = pos; + pos = origstr.find("&", pos); + } + + if (oldPos < origstr.length()-1) + newstr+=(origstr.substr(oldPos, origstr.length()-oldPos)); + + return newstr; +} + + +static inline bool _is_white_space(char c) +{ + return (c==' ' || c=='\t' || c=='\n' || c=='\r'); +} + + +//! sets the state that text was found. Returns true if set should be set +bool XMLParser::_set_text(char* start, char* end) { + // check if text is more than 2 characters, and if not, check if there is + // only white space, so that this text won't be reported + if (end - start < 3) + { + char* p = start; + for(; p != end; ++p) + if (!_is_white_space(*p)) + break; + + if (p == end) + return false; + } + + // set current text to the parsed text, and replace xml special characters + String s = String::utf8(start, (int)(end - start)); + node_name = _replace_special_characters(s); + + // current XML node type is text + node_type = NODE_TEXT; + + return true; +} + +void XMLParser::_parse_closing_xml_element() { + node_type = NODE_ELEMENT_END; + node_empty = false; + attributes.clear(); + + ++P; + const char* pBeginClose = P; + + while(*P != '>') + ++P; + + node_name = String::utf8(pBeginClose, (int)(P - pBeginClose)); +#ifdef DEBUG_XML + print_line("XML CLOSE: "+node_name); +#endif + ++P; +} + +void XMLParser::_ignore_definition() { + node_type = NODE_UNKNOWN; + + char *F=P; + // move until end marked with '>' reached + while(*P != '>') + ++P; + node_name.parse_utf8(F,P-F); + ++P; +} + +bool XMLParser::_parse_cdata() { + + if (*(P+1) != '[') + return false; + + node_type = NODE_CDATA; + + // skip '<![CDATA[' + int count=0; + while( *P && count<8 ) + { + ++P; + ++count; + } + + if (!*P) + return true; + + char *cDataBegin = P; + char *cDataEnd = 0; + + // find end of CDATA + while(*P && !cDataEnd) { + if (*P == '>' && + (*(P-1) == ']') && + (*(P-2) == ']')) + { + cDataEnd = P - 2; + } + + ++P; + } + + if ( cDataEnd ) + node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin)); + else + node_name = ""; +#ifdef DEBUG_XML + print_line("XML CDATA: "+node_name); +#endif + + return true; +} + +void XMLParser::_parse_comment() { + + node_type = NODE_COMMENT; + P += 1; + + char *pCommentBegin = P; + + int count = 1; + + // move until end of comment reached + while(count) + { + if (*P == '>') + --count; + else + if (*P == '<') + ++count; + + ++P; + } + + P -= 3; + node_name = String::utf8(pCommentBegin+2, (int)(P - pCommentBegin-2)); + P += 3; +#ifdef DEBUG_XML + print_line("XML COMMENT: "+node_name); +#endif + +} + +void XMLParser::_parse_opening_xml_element() { + + node_type = NODE_ELEMENT; + node_empty = false; + attributes.clear(); + + // find name + const char* startName = P; + + // find end of element + while(*P != '>' && !_is_white_space(*P)) + ++P; + + const char* endName = P; + + // find attributes + while(*P != '>') + { + if (_is_white_space(*P)) + ++P; + else + { + if (*P != '/') + { + // we've got an attribute + + // read the attribute names + const char* attributeNameBegin = P; + + while(!_is_white_space(*P) && *P != '=') + ++P; + + const char* attributeNameEnd = P; + ++P; + + // read the attribute value + // check for quotes and single quotes, thx to murphy + while( (*P != '\"') && (*P != '\'') && *P) + ++P; + + if (!*P) // malformatted xml file + return; + + const char attributeQuoteChar = *P; + + ++P; + const char* attributeValueBegin = P; + + while(*P != attributeQuoteChar && *P) + ++P; + + if (!*P) // malformatted xml file + return; + + const char* attributeValueEnd = P; + ++P; + + Attribute attr; + attr.name = String::utf8(attributeNameBegin, + (int)(attributeNameEnd - attributeNameBegin)); + + String s =String::utf8(attributeValueBegin, + (int)(attributeValueEnd - attributeValueBegin)); + + attr.value = _replace_special_characters(s); + attributes.push_back(attr); + } + else + { + // tag is closed directly + ++P; + node_empty = true; + break; + } + } + } + + // check if this tag is closing directly + if (endName > startName && *(endName-1) == '/') + { + // directly closing tag + node_empty = true; + endName--; + } + + node_name = String::utf8(startName, (int)(endName - startName)); +#ifdef DEBUG_XML + print_line("XML OPEN: "+node_name); +#endif + + ++P; +} + + +void XMLParser::_parse_current_node() { + + char* start = P; + node_offset = P - data; + + // more forward until '<' found + while(*P != '<' && *P) + ++P; + + if (!*P) + return; + + if (P - start > 0) + { + // we found some text, store it + if (_set_text(start, P)) + return; + } + + ++P; + + // based on current token, parse and report next element + switch(*P) + { + case '/': + _parse_closing_xml_element(); + break; + case '?': + _ignore_definition(); + break; + case '!': + if (!_parse_cdata()) + _parse_comment(); + break; + default: + _parse_opening_xml_element(); + break; + } +} + + +uint64_t XMLParser::get_node_offset() const { + + return node_offset; +}; + +Error XMLParser::seek(uint64_t p_pos) { + + ERR_FAIL_COND_V(!data, ERR_FILE_EOF) + ERR_FAIL_COND_V(p_pos >= length, ERR_FILE_EOF); + + P = data + p_pos; + + return read(); +}; + +void XMLParser::_bind_methods() { + + ObjectTypeDB::bind_method(_MD("read"),&XMLParser::read); + ObjectTypeDB::bind_method(_MD("get_node_type"),&XMLParser::get_node_type); + ObjectTypeDB::bind_method(_MD("get_node_name"),&XMLParser::get_node_name); + ObjectTypeDB::bind_method(_MD("get_node_data"),&XMLParser::get_node_data); + ObjectTypeDB::bind_method(_MD("get_node_offset"),&XMLParser::get_node_offset); + ObjectTypeDB::bind_method(_MD("get_attribute_count"),&XMLParser::get_attribute_count); + ObjectTypeDB::bind_method(_MD("get_attribute_name"),&XMLParser::get_attribute_name); + ObjectTypeDB::bind_method(_MD("get_attribute_value"),(String (XMLParser::*)(int) const) &XMLParser::get_attribute_value); + ObjectTypeDB::bind_method(_MD("has_attribute"),&XMLParser::has_attribute); + ObjectTypeDB::bind_method(_MD("get_named_attribute_value"), (String (XMLParser::*)(const String&) const) &XMLParser::get_attribute_value); + ObjectTypeDB::bind_method(_MD("get_named_attribute_value_safe"), &XMLParser::get_attribute_value_safe); + ObjectTypeDB::bind_method(_MD("is_empty"),&XMLParser::is_empty); + ObjectTypeDB::bind_method(_MD("get_current_line"),&XMLParser::get_current_line); + ObjectTypeDB::bind_method(_MD("skip_section"),&XMLParser::skip_section); + ObjectTypeDB::bind_method(_MD("seek"),&XMLParser::seek); + ObjectTypeDB::bind_method(_MD("open"),&XMLParser::open); + + BIND_CONSTANT( NODE_NONE ); + BIND_CONSTANT( NODE_ELEMENT ); + BIND_CONSTANT( NODE_ELEMENT_END ); + BIND_CONSTANT( NODE_TEXT ); + BIND_CONSTANT( NODE_COMMENT ); + BIND_CONSTANT( NODE_CDATA ); + BIND_CONSTANT( NODE_UNKNOWN ); + +}; + + + +Error XMLParser::read() { + + // if not end reached, parse the node + if (P && (P - data) < length - 1 && *P != 0) + { + _parse_current_node(); + return OK; + } + + return ERR_FILE_EOF; +} + +XMLParser::NodeType XMLParser::get_node_type() { + + return node_type; +} +String XMLParser::get_node_data() const { + + ERR_FAIL_COND_V( node_type != NODE_TEXT, ""); + return node_name; +} + +String XMLParser::get_node_name() const { + ERR_FAIL_COND_V( node_type == NODE_TEXT, ""); + return node_name; +} +int XMLParser::get_attribute_count() const { + + return attributes.size(); +} +String XMLParser::get_attribute_name(int p_idx) const { + + ERR_FAIL_INDEX_V(p_idx,attributes.size(),""); + return attributes[p_idx].name; +} +String XMLParser::get_attribute_value(int p_idx) const { + + ERR_FAIL_INDEX_V(p_idx,attributes.size(),""); + return attributes[p_idx].value; +} +bool XMLParser::has_attribute(const String& p_name) const { + + for(int i=0;i<attributes.size();i++) { + if (attributes[i].name==p_name) + return true; + } + + return false; +} +String XMLParser::get_attribute_value(const String& p_name) const { + + int idx=-1; + for(int i=0;i<attributes.size();i++) { + if (attributes[i].name==p_name) { + idx=i; + break; + } + } + + if (idx<0) { + ERR_EXPLAIN("Attribute not found: "+p_name); + } + ERR_FAIL_COND_V(idx<0,""); + return attributes[idx].value; + +} + +String XMLParser::get_attribute_value_safe(const String& p_name) const { + + int idx=-1; + for(int i=0;i<attributes.size();i++) { + if (attributes[i].name==p_name) { + idx=i; + break; + } + } + + if (idx<0) + return ""; + return attributes[idx].value; + +} +bool XMLParser::is_empty() const { + + return node_empty; +} + +Error XMLParser::open(const String& p_path) { + + Error err; + FileAccess * file = FileAccess::open(p_path,FileAccess::READ,&err); + + if (err) { + ERR_FAIL_COND_V(err!=OK,err); + } + + length = file->get_len(); + ERR_FAIL_COND_V(length<1, ERR_FILE_CORRUPT); + + data = memnew_arr( char, length+1); + file->get_buffer((uint8_t*)data,length); + data[length]=0; + P=data; + + memdelete(file); + + return OK; + +} + +void XMLParser::skip_section() { + + // skip if this element is empty anyway. + if (is_empty()) + return; + + // read until we've reached the last element in this section + int tagcount = 1; + + while(tagcount && read()==OK) + { + if (get_node_type() == XMLParser::NODE_ELEMENT && + !is_empty()) + { + ++tagcount; + } + else + if (get_node_type() == XMLParser::NODE_ELEMENT_END) + --tagcount; + } + +} + +void XMLParser::close() { + + if (data) + memdelete_arr(data); + data=NULL; + length=0; + P=NULL; + node_empty=false; + node_type=NODE_NONE; + node_offset = 0; +} + +int XMLParser::get_current_line() const { + + return 0; +} + +XMLParser::XMLParser() { + + data=NULL; + close(); + special_characters.push_back("&"); + special_characters.push_back("<lt;"); + special_characters.push_back(">gt;"); + special_characters.push_back("\"quot;"); + special_characters.push_back("'apos;"); + + +} +XMLParser::~XMLParser() { + + + if (data) + memdelete_arr(data); +} |