diff options
Diffstat (limited to 'modules/gdscript/gdscript_tokenizer.cpp')
-rw-r--r-- | modules/gdscript/gdscript_tokenizer.cpp | 133 |
1 files changed, 99 insertions, 34 deletions
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index 737e920693..3f14156dfa 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -5,8 +5,8 @@ /* GODOT ENGINE */ /* https://godotengine.org */ /*************************************************************************/ -/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ /* */ /* Permission is hereby granted, free of charge, to any person obtaining */ /* a copy of this software and associated documentation files (the */ @@ -30,7 +30,7 @@ #include "gdscript_tokenizer.h" -#include "core/error_macros.h" +#include "core/error/error_macros.h" #ifdef TOOLS_ENABLED #include "editor/editor_settings.h" @@ -221,8 +221,8 @@ String GDScriptTokenizer::get_token_name(Token::Type p_token_type) { void GDScriptTokenizer::set_source_code(const String &p_source_code) { source = p_source_code; - if (source.empty()) { - _source = L""; + if (source.is_empty()) { + _source = U""; } else { _source = source.ptr(); } @@ -242,6 +242,16 @@ void GDScriptTokenizer::set_multiline_mode(bool p_state) { multiline_mode = p_state; } +void GDScriptTokenizer::push_expression_indented_block() { + indent_stack_stack.push_back(indent_stack); +} + +void GDScriptTokenizer::pop_expression_indented_block() { + ERR_FAIL_COND(indent_stack_stack.size() == 0); + indent_stack = indent_stack_stack.back()->get(); + indent_stack_stack.pop_back(); +} + int GDScriptTokenizer::get_cursor_line() const { return cursor_line; } @@ -263,7 +273,7 @@ bool GDScriptTokenizer::is_past_cursor() const { return true; } -CharType GDScriptTokenizer::_advance() { +char32_t GDScriptTokenizer::_advance() { if (unlikely(_is_at_end())) { return '\0'; } @@ -282,15 +292,15 @@ CharType GDScriptTokenizer::_advance() { return _peek(-1); } -void GDScriptTokenizer::push_paren(CharType p_char) { +void GDScriptTokenizer::push_paren(char32_t p_char) { paren_stack.push_back(p_char); } -bool GDScriptTokenizer::pop_paren(CharType p_expected) { - if (paren_stack.empty()) { +bool GDScriptTokenizer::pop_paren(char32_t p_expected) { + if (paren_stack.is_empty()) { return false; } - CharType actual = paren_stack.back()->get(); + char32_t actual = paren_stack.back()->get(); paren_stack.pop_back(); return actual == p_expected; @@ -302,19 +312,19 @@ GDScriptTokenizer::Token GDScriptTokenizer::pop_error() { return error; } -static bool _is_alphanumeric(CharType c) { +static bool _is_alphanumeric(char32_t c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; } -static bool _is_digit(CharType c) { +static bool _is_digit(char32_t c) { return (c >= '0' && c <= '9'); } -static bool _is_hex_digit(CharType c) { +static bool _is_hex_digit(char32_t c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } -static bool _is_binary_digit(CharType c) { +static bool _is_binary_digit(char32_t c) { return (c == '0' || c == '1'); } @@ -404,8 +414,8 @@ void GDScriptTokenizer::push_error(const Token &p_error) { error_stack.push_back(p_error); } -GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(CharType p_paren) { - if (paren_stack.empty()) { +GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) { + if (paren_stack.is_empty()) { return make_error(vformat("Closing \"%c\" doesn't have an opening counterpart.", p_paren)); } Token error = make_error(vformat("Closing \"%c\" doesn't match the opening \"%c\".", p_paren, paren_stack.back()->get())); @@ -413,8 +423,8 @@ GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(CharType p_paren) { return error; } -GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(CharType p_test, Token::Type p_double_type) { - const CharType *next = _current + 1; +GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, Token::Type p_double_type) { + const char32_t *next = _current + 1; int chars = 2; // Two already matched. // Test before consuming characters, since we don't want to consume more than needed. @@ -578,7 +588,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { } void GDScriptTokenizer::newline(bool p_make_token) { - // Don't overwrite previous newline, nor create if we want a line contination. + // Don't overwrite previous newline, nor create if we want a line continuation. if (p_make_token && !pending_newline && !line_continuation) { Token newline(Token::NEWLINE); newline.start_line = line; @@ -602,7 +612,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() { bool has_decimal = false; bool has_exponent = false; bool has_error = false; - bool (*digit_check_func)(CharType) = _is_digit; + bool (*digit_check_func)(char32_t) = _is_digit; if (_peek(-1) == '.') { has_decimal = true; @@ -621,7 +631,21 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() { } // Allow '_' to be used in a number, for readability. + bool previous_was_underscore = false; while (digit_check_func(_peek()) || _peek() == '_') { + if (_peek() == '_') { + if (previous_was_underscore) { + Token error = make_error(R"(Only one underscore can be used as a numeric separator.)"); + error.start_column = column; + error.leftmost_column = column; + error.end_column = column + 1; + error.rightmost_column = column + 1; + push_error(error); + } + previous_was_underscore = true; + } else { + previous_was_underscore = false; + } _advance(); } @@ -672,7 +696,29 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() { _advance(); } // Consume exponent digits. + if (!_is_digit(_peek())) { + Token error = make_error(R"(Expected exponent value after "e".)"); + error.start_column = column; + error.leftmost_column = column; + error.end_column = column + 1; + error.rightmost_column = column + 1; + push_error(error); + } + previous_was_underscore = false; while (_is_digit(_peek()) || _peek() == '_') { + if (_peek() == '_') { + if (previous_was_underscore) { + Token error = make_error(R"(Only one underscore can be used as a numeric separator.)"); + error.start_column = column; + error.leftmost_column = column; + error.end_column = column + 1; + error.rightmost_column = column + 1; + push_error(error); + } + previous_was_underscore = true; + } else { + previous_was_underscore = false; + } _advance(); } } @@ -730,7 +776,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { _advance(); } - CharType quote_char = _peek(-1); + char32_t quote_char = _peek(-1); if (_peek() == quote_char && _peek(1) == quote_char) { is_multiline = true; @@ -747,7 +793,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { return make_error("Unterminated string."); } - CharType ch = _peek(); + char32_t ch = _peek(); if (ch == '\\') { // Escape pattern. @@ -757,13 +803,13 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { } // Grab escape character. - CharType code = _peek(); + char32_t code = _peek(); _advance(); if (_is_at_end()) { return make_error("Unterminated string."); } - CharType escaped = 0; + char32_t escaped = 0; bool valid_escape = true; switch (code) { @@ -804,8 +850,8 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { return make_error("Unterminated string."); } - CharType digit = _peek(); - CharType value = 0; + char32_t digit = _peek(); + char32_t value = 0; if (digit >= '0' && digit <= '9') { value = digit - '0'; } else if (digit >= 'a' && digit <= 'f') { @@ -866,6 +912,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() { _advance(); _advance(); break; + } else { + // Not a multiline string termination, add consumed quote. + result += quote_char; } } else { // Ended single-line string. @@ -908,7 +957,7 @@ void GDScriptTokenizer::check_indent() { } for (;;) { - CharType current_indent_char = _peek(); + char32_t current_indent_char = _peek(); int indent_count = 0; if (current_indent_char != ' ' && current_indent_char != '\t' && current_indent_char != '\r' && current_indent_char != '\n' && current_indent_char != '#') { @@ -938,7 +987,7 @@ void GDScriptTokenizer::check_indent() { // Check indent level. bool mixed = false; while (!_is_at_end()) { - CharType space = _peek(); + char32_t space = _peek(); if (space == '\t') { // Consider individual tab columns. column += tab_size - 1; @@ -982,9 +1031,17 @@ void GDScriptTokenizer::check_indent() { } if (_peek() == '#') { // Comment. Advance to the next line. +#ifdef TOOLS_ENABLED + String comment; + while (_peek() != '\n' && !_is_at_end()) { + comment += _advance(); + } + comments[line] = CommentData(comment, true); +#else while (_peek() != '\n' && !_is_at_end()) { _advance(); } +#endif // TOOLS_ENABLED if (_is_at_end()) { // Reached the end with an empty line, so just dedent as much as needed. pending_indents -= indent_level(); @@ -1007,7 +1064,7 @@ void GDScriptTokenizer::check_indent() { // First time indenting, choose character now. indent_char = current_indent_char; } else if (current_indent_char != indent_char) { - Token error = make_error(vformat("Used \"%c\" for indentation instead \"%c\" as used before in the file.", String(¤t_indent_char, 1).c_escape(), String(&indent_char, 1).c_escape())); + Token error = make_error(vformat("Used \"%s\" for indentation instead \"%s\" as used before in the file.", String(¤t_indent_char, 1).c_escape(), String(&indent_char, 1).c_escape())); error.start_line = line; error.start_column = 1; error.leftmost_column = 1; @@ -1071,7 +1128,7 @@ void GDScriptTokenizer::_skip_whitespace() { } for (;;) { - CharType c = _peek(); + char32_t c = _peek(); switch (c) { case ' ': _advance(); @@ -1093,18 +1150,26 @@ void GDScriptTokenizer::_skip_whitespace() { newline(!is_bol); // Don't create new line token if line is empty. check_indent(); break; - case '#': + case '#': { // Comment. +#ifdef TOOLS_ENABLED + String comment; + while (_peek() != '\n' && !_is_at_end()) { + comment += _advance(); + } + comments[line] = CommentData(comment, is_bol); +#else while (_peek() != '\n' && !_is_at_end()) { _advance(); } +#endif // TOOLS_ENABLED if (_is_at_end()) { return; } _advance(); // Consume '\n' newline(!is_bol); check_indent(); - break; + } break; default: return; } @@ -1121,7 +1186,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { if (pending_newline) { pending_newline = false; if (!multiline_mode) { - // Don't return newline tokens on multine mode. + // Don't return newline tokens on multiline mode. return last_newline; } } @@ -1160,7 +1225,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { return make_token(Token::TK_EOF); } - const CharType c = _advance(); + const char32_t c = _advance(); if (c == '\\') { // Line continuation with backslash. |