/*************************************************************************/ /* script_class_parser.cpp */ /*************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ /* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ /* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ /* "Software"), to deal in the Software without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of the Software, and to */ /* permit persons to whom the Software is furnished to do so, subject to */ /* the following conditions: */ /* */ /* The above copyright notice and this permission notice shall be */ /* included in all copies or substantial portions of the Software. */ /* */ /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /*************************************************************************/ #include "script_class_parser.h" #include "core/os/os.h" #include "core/templates/map.h" #include "../utils/string_utils.h" const char *ScriptClassParser::token_names[ScriptClassParser::TK_MAX] = { "[", "]", "{", "}", ".", ":", ",", "Symbol", "Identifier", "String", "Number", "<", ">", "EOF", "Error" }; String ScriptClassParser::get_token_name(ScriptClassParser::Token p_token) { ERR_FAIL_INDEX_V(p_token, TK_MAX, ""); return token_names[p_token]; } ScriptClassParser::Token ScriptClassParser::get_token() { while (true) { switch (code[idx]) { case '\n': { line++; idx++; break; }; case 0: { return TK_EOF; } break; case '{': { idx++; return TK_CURLY_BRACKET_OPEN; }; case '}': { idx++; return TK_CURLY_BRACKET_CLOSE; }; case '[': { idx++; return TK_BRACKET_OPEN; }; case ']': { idx++; return TK_BRACKET_CLOSE; }; case '<': { idx++; return TK_OP_LESS; }; case '>': { idx++; return TK_OP_GREATER; }; case ':': { idx++; return TK_COLON; }; case ',': { idx++; return TK_COMMA; }; case '.': { idx++; return TK_PERIOD; }; case '#': { //compiler directive while (code[idx] != '\n' && code[idx] != 0) { idx++; } continue; } break; case '/': { switch (code[idx + 1]) { case '*': { // block comment idx += 2; while (true) { if (code[idx] == 0) { error_str = "Unterminated comment"; error = true; return TK_ERROR; } else if (code[idx] == '*' && code[idx + 1] == '/') { idx += 2; break; } else if (code[idx] == '\n') { line++; } idx++; } } break; case '/': { // line comment skip while (code[idx] != '\n' && code[idx] != 0) { idx++; } } break; default: { value = "/"; idx++; return TK_SYMBOL; } } continue; // a comment } break; case '\'': case '"': { bool verbatim = idx != 0 && code[idx - 1] == '@'; char32_t begin_str = code[idx]; idx++; String tk_string = String(); while (true) { if (code[idx] == 0) { error_str = "Unterminated String"; error = true; return TK_ERROR; } else if (code[idx] == begin_str) { if (verbatim && code[idx + 1] == '"') { // '""' is verbatim string's '\"' idx += 2; // skip next '"' as well continue; } idx += 1; break; } else if (code[idx] == '\\' && !verbatim) { //escaped characters... idx++; char32_t next = code[idx]; if (next == 0) { error_str = "Unterminated String"; error = true; return TK_ERROR; } char32_t res = 0; switch (next) { case 'b': res = 8; break; case 't': res = 9; break; case 'n': res = 10; break; case 'f': res = 12; break; case 'r': res = 13; break; case '\"': res = '\"'; break; case '\\': res = '\\'; break; default: { res = next; } break; } tk_string += res; } else { if (code[idx] == '\n') { line++; } tk_string += code[idx]; } idx++; } value = tk_string; return TK_STRING; } break; default: { if (code[idx] <= 32) { idx++; break; } if ((code[idx] >= 33 && code[idx] <= 47) || (code[idx] >= 58 && code[idx] <= 63) || (code[idx] >= 91 && code[idx] <= 94) || code[idx] == 96 || (code[idx] >= 123 && code[idx] <= 127)) { value = String::chr(code[idx]); idx++; return TK_SYMBOL; } if (code[idx] == '-' || (code[idx] >= '0' && code[idx] <= '9')) { //a number const char32_t *rptr; double number = String::to_float(&code[idx], &rptr); idx += (rptr - &code[idx]); value = number; return TK_NUMBER; } else if ((code[idx] == '@' && code[idx + 1] != '"') || code[idx] == '_' || (code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || code[idx] > 127) { String id; id += code[idx]; idx++; while (code[idx] == '_' || (code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || (code[idx] >= '0' && code[idx] <= '9') || code[idx] > 127) { id += code[idx]; idx++; } value = id; return TK_IDENTIFIER; } else if (code[idx] == '@' && code[idx + 1] == '"') { // begin of verbatim string idx++; } else { error_str = "Unexpected character."; error = true; return TK_ERROR; } } } } } Error ScriptClassParser::_skip_generic_type_params() { Token tk; while (true) { tk = get_token(); if (tk == TK_IDENTIFIER) { tk = get_token(); // Type specifications can end with "?" to denote nullable types, such as IList if (tk == TK_SYMBOL) { tk = get_token(); if (value.operator String() != "?") { error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found unexpected symbol '" + value + "'"; error = true; return ERR_PARSE_ERROR; } if (tk != TK_OP_GREATER && tk != TK_COMMA) { error_str = "Nullable type symbol '?' is only allowed after an identifier, but found " + get_token_name(tk) + " next."; error = true; return ERR_PARSE_ERROR; } } if (tk == TK_PERIOD) { while (true) { tk = get_token(); if (tk != TK_IDENTIFIER) { error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } tk = get_token(); if (tk != TK_PERIOD) { break; } } } if (tk == TK_OP_LESS) { Error err = _skip_generic_type_params(); if (err) { return err; } tk = get_token(); } if (tk == TK_OP_GREATER) { return OK; } else if (tk != TK_COMMA) { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } else if (tk == TK_OP_LESS) { error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found " + get_token_name(TK_OP_LESS); error = true; return ERR_PARSE_ERROR; } else if (tk == TK_OP_GREATER) { return OK; } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } } Error ScriptClassParser::_parse_type_full_name(String &r_full_name) { Token tk = get_token(); if (tk != TK_IDENTIFIER) { error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } r_full_name += String(value); if (code[idx] == '<') { idx++; // We don't mind if the base is generic, but we skip it any ways since this information is not needed Error err = _skip_generic_type_params(); if (err) { return err; } } if (code[idx] != '.') { // We only want to take the next token if it's a period return OK; } tk = get_token(); CRASH_COND(tk != TK_PERIOD); // Assertion r_full_name += "."; return _parse_type_full_name(r_full_name); } Error ScriptClassParser::_parse_class_base(Vector &r_base) { String name; Error err = _parse_type_full_name(name); if (err) { return err; } Token tk = get_token(); if (tk == TK_COMMA) { err = _parse_class_base(r_base); if (err) { return err; } } else if (tk == TK_IDENTIFIER && String(value) == "where") { err = _parse_type_constraints(); if (err) { return err; } // An open curly bracket was parsed by _parse_type_constraints, so we can exit } else if (tk == TK_CURLY_BRACKET_OPEN) { // we are finished when we hit the open curly bracket } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } r_base.push_back(name); return OK; } Error ScriptClassParser::_parse_type_constraints() { Token tk = get_token(); if (tk != TK_IDENTIFIER) { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } tk = get_token(); if (tk != TK_COLON) { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } while (true) { tk = get_token(); if (tk == TK_IDENTIFIER) { if (String(value) == "where") { return _parse_type_constraints(); } tk = get_token(); if (tk == TK_PERIOD) { while (true) { tk = get_token(); if (tk != TK_IDENTIFIER) { error_str = "Expected " + get_token_name(TK_IDENTIFIER) + ", found: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } tk = get_token(); if (tk != TK_PERIOD) { break; } } } } if (tk == TK_COMMA) { continue; } else if (tk == TK_IDENTIFIER && String(value) == "where") { return _parse_type_constraints(); } else if (tk == TK_SYMBOL && String(value) == "(") { tk = get_token(); if (tk != TK_SYMBOL || String(value) != ")") { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } else if (tk == TK_OP_LESS) { Error err = _skip_generic_type_params(); if (err) { return err; } } else if (tk == TK_CURLY_BRACKET_OPEN) { return OK; } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } } Error ScriptClassParser::_parse_namespace_name(String &r_name, int &r_curly_stack) { Token tk = get_token(); if (tk == TK_IDENTIFIER) { r_name += String(value); } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } tk = get_token(); if (tk == TK_PERIOD) { r_name += "."; return _parse_namespace_name(r_name, r_curly_stack); } else if (tk == TK_CURLY_BRACKET_OPEN) { r_curly_stack++; return OK; } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } Error ScriptClassParser::parse(const String &p_code) { code = p_code; idx = 0; line = 0; error_str = String(); error = false; value = Variant(); classes.clear(); Token tk = get_token(); Map name_stack; int curly_stack = 0; int type_curly_stack = 0; while (!error && tk != TK_EOF) { String identifier = value; if (tk == TK_IDENTIFIER && (identifier == "class" || identifier == "struct")) { bool is_class = identifier == "class"; tk = get_token(); if (tk == TK_IDENTIFIER) { String name = value; int at_level = curly_stack; ClassDecl class_decl; for (Map::Element *E = name_stack.front(); E; E = E->next()) { const NameDecl &name_decl = E->value(); if (name_decl.type == NameDecl::NAMESPACE_DECL) { if (E != name_stack.front()) { class_decl.namespace_ += "."; } class_decl.namespace_ += name_decl.name; } else { class_decl.name += name_decl.name + "."; } } class_decl.name += name; class_decl.nested = type_curly_stack > 0; bool generic = false; while (true) { tk = get_token(); if (tk == TK_COLON) { Error err = _parse_class_base(class_decl.base); if (err) { return err; } curly_stack++; type_curly_stack++; break; } else if (tk == TK_CURLY_BRACKET_OPEN) { curly_stack++; type_curly_stack++; break; } else if (tk == TK_OP_LESS && !generic) { generic = true; Error err = _skip_generic_type_params(); if (err) { return err; } } else if (tk == TK_IDENTIFIER && String(value) == "where") { Error err = _parse_type_constraints(); if (err) { return err; } // An open curly bracket was parsed by _parse_type_constraints, so we can exit curly_stack++; type_curly_stack++; break; } else { error_str = "Unexpected token: " + get_token_name(tk); error = true; return ERR_PARSE_ERROR; } } NameDecl name_decl; name_decl.name = name; name_decl.type = is_class ? NameDecl::CLASS_DECL : NameDecl::STRUCT_DECL; name_stack[at_level] = name_decl; if (is_class) { if (!generic) { // no generics, thanks classes.push_back(class_decl); } else if (OS::get_singleton()->is_stdout_verbose()) { String full_name = class_decl.namespace_; if (full_name.length()) { full_name += "."; } full_name += class_decl.name; OS::get_singleton()->print("Ignoring generic class declaration: %s\n", full_name.utf8().get_data()); } } } } else if (tk == TK_IDENTIFIER && identifier == "namespace") { if (type_curly_stack > 0) { error_str = "Found namespace nested inside type."; error = true; return ERR_PARSE_ERROR; } String name; int at_level = curly_stack; Error err = _parse_namespace_name(name, curly_stack); if (err) { return err; } NameDecl name_decl; name_decl.name = name; name_decl.type = NameDecl::NAMESPACE_DECL; name_stack[at_level] = name_decl; } else if (tk == TK_CURLY_BRACKET_OPEN) { curly_stack++; } else if (tk == TK_CURLY_BRACKET_CLOSE) { curly_stack--; if (name_stack.has(curly_stack)) { if (name_stack[curly_stack].type != NameDecl::NAMESPACE_DECL) { type_curly_stack--; } name_stack.erase(curly_stack); } } tk = get_token(); } if (!error && tk == TK_EOF && curly_stack > 0) { error_str = "Reached EOF with missing close curly brackets."; error = true; } if (error) { return ERR_PARSE_ERROR; } return OK; } static String get_preprocessor_directive(const String &p_line, int p_from) { CRASH_COND(p_line[p_from] != '#'); p_from++; int i = p_from; while (i < p_line.length() && (p_line[i] == '_' || (p_line[i] >= 'A' && p_line[i] <= 'Z') || (p_line[i] >= 'a' && p_line[i] <= 'z') || p_line[i] > 127)) { i++; } return p_line.substr(p_from, i - p_from); } static void run_dummy_preprocessor(String &r_source, const String &p_filepath) { Vector lines = r_source.split("\n", /* p_allow_empty: */ true); bool *include_lines = memnew_arr(bool, lines.size()); int if_level = -1; Vector is_branch_being_compiled; for (int i = 0; i < lines.size(); i++) { const String &line = lines[i]; const int line_len = line.length(); int j; for (j = 0; j < line_len; j++) { if (line[j] != ' ' && line[j] != '\t') { if (line[j] == '#') { // First non-whitespace char of the line is '#' include_lines[i] = false; String directive = get_preprocessor_directive(line, j); if (directive == "if") { if_level++; is_branch_being_compiled.push_back(if_level == 0 || is_branch_being_compiled[if_level - 1]); } else if (directive == "elif") { ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#elif' directive. File: '" + p_filepath + "'."); is_branch_being_compiled.write[if_level] = false; } else if (directive == "else") { ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#else' directive. File: '" + p_filepath + "'."); is_branch_being_compiled.write[if_level] = false; } else if (directive == "endif") { ERR_CONTINUE_MSG(if_level == -1, "Found unexpected '#endif' directive. File: '" + p_filepath + "'."); is_branch_being_compiled.remove(if_level); if_level--; } break; } else { // First non-whitespace char of the line is not '#' include_lines[i] = if_level == -1 || is_branch_being_compiled[if_level]; break; } } } if (j == line_len) { // Loop ended without finding a non-whitespace character. // Either the line was empty or it only contained whitespaces. include_lines[i] = if_level == -1 || is_branch_being_compiled[if_level]; } } r_source.clear(); // Custom join ignoring lines removed by the preprocessor for (int i = 0; i < lines.size(); i++) { if (i > 0 && include_lines[i - 1]) { r_source += '\n'; } if (include_lines[i]) { r_source += lines[i]; } } } Error ScriptClassParser::parse_file(const String &p_filepath) { String source; Error ferr = read_all_file_utf8(p_filepath, source); ERR_FAIL_COND_V_MSG(ferr != OK, ferr, ferr == ERR_INVALID_DATA ? "File '" + p_filepath + "' contains invalid unicode (UTF-8), so it was not loaded." " Please ensure that scripts are saved in valid UTF-8 unicode." : "Failed to read file: '" + p_filepath + "'."); run_dummy_preprocessor(source, p_filepath); return parse(source); } String ScriptClassParser::get_error() { return error_str; } Vector ScriptClassParser::get_classes() { return classes; }