summaryrefslogtreecommitdiff
path: root/modules/gdscript/gdscript_tokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'modules/gdscript/gdscript_tokenizer.cpp')
-rw-r--r--modules/gdscript/gdscript_tokenizer.cpp2458
1 files changed, 1138 insertions, 1320 deletions
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp
index 82def3f877..7a4bdd88ba 100644
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -30,1476 +30,1294 @@
#include "gdscript_tokenizer.h"
-#include "core/io/marshalls.h"
-#include "core/map.h"
-#include "core/print_string.h"
-#include "gdscript_functions.h"
-
-const char *GDScriptTokenizer::token_names[TK_MAX] = {
- "Empty",
- "Identifier",
- "Constant",
- "Self",
- "Built-In Type",
- "Built-In Func",
- "In",
- "'=='",
- "'!='",
- "'<'",
- "'<='",
- "'>'",
- "'>='",
- "'and'",
- "'or'",
- "'not'",
- "'+'",
- "'-'",
- "'*'",
- "'/'",
- "'%'",
- "'<<'",
- "'>>'",
- "'='",
- "'+='",
- "'-='",
- "'*='",
- "'/='",
- "'%='",
- "'<<='",
- "'>>='",
- "'&='",
- "'|='",
- "'^='",
- "'&'",
- "'|'",
- "'^'",
- "'~'",
- //"Plus Plus",
- //"Minus Minus",
- "if",
- "elif",
- "else",
- "for",
- "while",
- "break",
- "continue",
- "pass",
- "return",
- "match",
- "func",
- "class",
- "class_name",
- "extends",
- "is",
- "onready",
- "tool",
- "static",
- "export",
- "setget",
- "const",
- "var",
- "as",
- "void",
- "enum",
- "preload",
- "assert",
- "yield",
- "signal",
- "breakpoint",
- "remote",
- "master",
- "puppet",
- "remotesync",
- "mastersync",
- "puppetsync",
- "'['",
- "']'",
- "'{'",
- "'}'",
- "'('",
- "')'",
- "','",
- "';'",
- "'.'",
- "'?'",
- "':'",
- "'$'",
- "'->'",
- "'\\n'",
- "PI",
- "TAU",
- "_",
- "INF",
- "NAN",
- "Error",
- "EOF",
- "Cursor"
+#include "core/error_macros.h"
+
+#ifdef TOOLS_ENABLED
+#include "editor/editor_settings.h"
+#endif
+
+static const char *token_names[] = {
+ "Empty", // EMPTY,
+ // Basic
+ "Annotation", // ANNOTATION
+ "Identifier", // IDENTIFIER,
+ "Literal", // LITERAL,
+ // Comparison
+ "<", // LESS,
+ "<=", // LESS_EQUAL,
+ ">", // GREATER,
+ ">=", // GREATER_EQUAL,
+ "==", // EQUAL_EQUAL,
+ "!=", // BANG_EQUAL,
+ // Logical
+ "and", // AND,
+ "or", // OR,
+ "not", // NOT,
+ "&&", // AMPERSAND_AMPERSAND,
+ "||", // PIPE_PIPE,
+ "!", // BANG,
+ // Bitwise
+ "&", // AMPERSAND,
+ "|", // PIPE,
+ "~", // TILDE,
+ "^", // CARET,
+ "<<", // LESS_LESS,
+ ">>", // GREATER_GREATER,
+ // Math
+ "+", // PLUS,
+ "-", // MINUS,
+ "*", // STAR,
+ "/", // SLASH,
+ "%", // PERCENT,
+ // Assignment
+ "=", // EQUAL,
+ "+=", // PLUS_EQUAL,
+ "-=", // MINUS_EQUAL,
+ "*=", // STAR_EQUAL,
+ "/=", // SLASH_EQUAL,
+ "%=", // PERCENT_EQUAL,
+ "<<=", // LESS_LESS_EQUAL,
+ ">>=", // GREATER_GREATER_EQUAL,
+ "&=", // AMPERSAND_EQUAL,
+ "|=", // PIPE_EQUAL,
+ "^=", // CARET_EQUAL,
+ // Control flow
+ "if", // IF,
+ "elif", // ELIF,
+ "else", // ELSE,
+ "for", // FOR,
+ "while", // WHILE,
+ "break", // BREAK,
+ "continue", // CONTINUE,
+ "pass", // PASS,
+ "return", // RETURN,
+ "match", // MATCH,
+ // Keywords
+ "as", // AS,
+ "assert", // ASSERT,
+ "await", // AWAIT,
+ "breakpoint", // BREAKPOINT,
+ "class", // CLASS,
+ "class_name", // CLASS_NAME,
+ "const", // CONST,
+ "enum", // ENUM,
+ "extends", // EXTENDS,
+ "func", // FUNC,
+ "in", // IN,
+ "is", // IS,
+ "namespace", // NAMESPACE
+ "preload", // PRELOAD,
+ "self", // SELF,
+ "signal", // SIGNAL,
+ "static", // STATIC,
+ "super", // SUPER,
+ "trait", // TRAIT,
+ "var", // VAR,
+ "void", // VOID,
+ "yield", // YIELD,
+ // Punctuation
+ "[", // BRACKET_OPEN,
+ "]", // BRACKET_CLOSE,
+ "{", // BRACE_OPEN,
+ "}", // BRACE_CLOSE,
+ "(", // PARENTHESIS_OPEN,
+ ")", // PARENTHESIS_CLOSE,
+ ",", // COMMA,
+ ";", // SEMICOLON,
+ ".", // PERIOD,
+ "..", // PERIOD_PERIOD,
+ ":", // COLON,
+ "$", // DOLLAR,
+ "->", // FORWARD_ARROW,
+ "_", // UNDERSCORE,
+ // Whitespace
+ "Newline", // NEWLINE,
+ "Indent", // INDENT,
+ "Dedent", // DEDENT,
+ // Constants
+ "PI", // CONST_PI,
+ "TAU", // CONST_TAU,
+ "INF", // CONST_INF,
+ "NaN", // CONST_NAN,
+ // Error message improvement
+ "VCS conflict marker", // VCS_CONFLICT_MARKER,
+ "`", // BACKTICK,
+ "?", // QUESTION_MARK,
+ // Special
+ "Error", // ERROR,
+ "End of file", // EOF,
};
-struct _bit {
- Variant::Type type;
- const char *text;
-};
-//built in types
-
-static const _bit _type_list[] = {
- //types
- { Variant::BOOL, "bool" },
- { Variant::INT, "int" },
- { Variant::FLOAT, "float" },
- { Variant::STRING, "String" },
- { Variant::VECTOR2, "Vector2" },
- { Variant::VECTOR2I, "Vector2i" },
- { Variant::RECT2, "Rect2" },
- { Variant::RECT2I, "Rect2i" },
- { Variant::TRANSFORM2D, "Transform2D" },
- { Variant::VECTOR3, "Vector3" },
- { Variant::VECTOR3I, "Vector3i" },
- { Variant::AABB, "AABB" },
- { Variant::PLANE, "Plane" },
- { Variant::QUAT, "Quat" },
- { Variant::BASIS, "Basis" },
- { Variant::TRANSFORM, "Transform" },
- { Variant::COLOR, "Color" },
- { Variant::_RID, "RID" },
- { Variant::OBJECT, "Object" },
- { Variant::STRING_NAME, "StringName" },
- { Variant::NODE_PATH, "NodePath" },
- { Variant::DICTIONARY, "Dictionary" },
- { Variant::CALLABLE, "Callable" },
- { Variant::SIGNAL, "Signal" },
- { Variant::ARRAY, "Array" },
- { Variant::PACKED_BYTE_ARRAY, "PackedByteArray" },
- { Variant::PACKED_INT32_ARRAY, "PackedInt32Array" },
- { Variant::PACKED_INT64_ARRAY, "PackedInt64Array" },
- { Variant::PACKED_FLOAT32_ARRAY, "PackedFloat32Array" },
- { Variant::PACKED_FLOAT64_ARRAY, "PackedFloat64Array" },
- { Variant::PACKED_STRING_ARRAY, "PackedStringArray" },
- { Variant::PACKED_VECTOR2_ARRAY, "PackedVector2Array" },
- { Variant::PACKED_VECTOR3_ARRAY, "PackedVector3Array" },
- { Variant::PACKED_COLOR_ARRAY, "PackedColorArray" },
- { Variant::VARIANT_MAX, nullptr },
-};
-
-struct _kws {
- GDScriptTokenizer::Token token;
- const char *text;
-};
-
-static const _kws _keyword_list[] = {
- //ops
- { GDScriptTokenizer::TK_OP_IN, "in" },
- { GDScriptTokenizer::TK_OP_NOT, "not" },
- { GDScriptTokenizer::TK_OP_OR, "or" },
- { GDScriptTokenizer::TK_OP_AND, "and" },
- //func
- { GDScriptTokenizer::TK_PR_FUNCTION, "func" },
- { GDScriptTokenizer::TK_PR_CLASS, "class" },
- { GDScriptTokenizer::TK_PR_CLASS_NAME, "class_name" },
- { GDScriptTokenizer::TK_PR_EXTENDS, "extends" },
- { GDScriptTokenizer::TK_PR_IS, "is" },
- { GDScriptTokenizer::TK_PR_ONREADY, "onready" },
- { GDScriptTokenizer::TK_PR_TOOL, "tool" },
- { GDScriptTokenizer::TK_PR_STATIC, "static" },
- { GDScriptTokenizer::TK_PR_EXPORT, "export" },
- { GDScriptTokenizer::TK_PR_SETGET, "setget" },
- { GDScriptTokenizer::TK_PR_VAR, "var" },
- { GDScriptTokenizer::TK_PR_AS, "as" },
- { GDScriptTokenizer::TK_PR_VOID, "void" },
- { GDScriptTokenizer::TK_PR_PRELOAD, "preload" },
- { GDScriptTokenizer::TK_PR_ASSERT, "assert" },
- { GDScriptTokenizer::TK_PR_YIELD, "yield" },
- { GDScriptTokenizer::TK_PR_SIGNAL, "signal" },
- { GDScriptTokenizer::TK_PR_BREAKPOINT, "breakpoint" },
- { GDScriptTokenizer::TK_PR_REMOTE, "remote" },
- { GDScriptTokenizer::TK_PR_MASTER, "master" },
- { GDScriptTokenizer::TK_PR_PUPPET, "puppet" },
- { GDScriptTokenizer::TK_PR_REMOTESYNC, "remotesync" },
- { GDScriptTokenizer::TK_PR_MASTERSYNC, "mastersync" },
- { GDScriptTokenizer::TK_PR_PUPPETSYNC, "puppetsync" },
- { GDScriptTokenizer::TK_PR_CONST, "const" },
- { GDScriptTokenizer::TK_PR_ENUM, "enum" },
- //controlflow
- { GDScriptTokenizer::TK_CF_IF, "if" },
- { GDScriptTokenizer::TK_CF_ELIF, "elif" },
- { GDScriptTokenizer::TK_CF_ELSE, "else" },
- { GDScriptTokenizer::TK_CF_FOR, "for" },
- { GDScriptTokenizer::TK_CF_WHILE, "while" },
- { GDScriptTokenizer::TK_CF_BREAK, "break" },
- { GDScriptTokenizer::TK_CF_CONTINUE, "continue" },
- { GDScriptTokenizer::TK_CF_RETURN, "return" },
- { GDScriptTokenizer::TK_CF_MATCH, "match" },
- { GDScriptTokenizer::TK_CF_PASS, "pass" },
- { GDScriptTokenizer::TK_SELF, "self" },
- { GDScriptTokenizer::TK_CONST_PI, "PI" },
- { GDScriptTokenizer::TK_CONST_TAU, "TAU" },
- { GDScriptTokenizer::TK_WILDCARD, "_" },
- { GDScriptTokenizer::TK_CONST_INF, "INF" },
- { GDScriptTokenizer::TK_CONST_NAN, "NAN" },
- { GDScriptTokenizer::TK_ERROR, nullptr }
-};
+// Avoid desync.
+static_assert(sizeof(token_names) / sizeof(token_names[0]) == GDScriptTokenizer::Token::TK_MAX, "Amount of token names don't match the amount of token types.");
-const char *GDScriptTokenizer::get_token_name(Token p_token) {
- ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
- return token_names[p_token];
+const char *GDScriptTokenizer::Token::get_name() const {
+ ERR_FAIL_INDEX_V_MSG(type, TK_MAX, "<error>", "Using token type out of the enum.");
+ return token_names[type];
}
-bool GDScriptTokenizer::is_token_literal(int p_offset, bool variable_safe) const {
- switch (get_token(p_offset)) {
- // Can always be literal:
- case TK_IDENTIFIER:
-
- case TK_PR_ONREADY:
- case TK_PR_TOOL:
- case TK_PR_STATIC:
- case TK_PR_EXPORT:
- case TK_PR_SETGET:
- case TK_PR_SIGNAL:
- case TK_PR_REMOTE:
- case TK_PR_MASTER:
- case TK_PR_PUPPET:
- case TK_PR_REMOTESYNC:
- case TK_PR_MASTERSYNC:
- case TK_PR_PUPPETSYNC:
- return true;
-
- // Literal for non-variables only:
- case TK_BUILT_IN_TYPE:
- case TK_BUILT_IN_FUNC:
-
- case TK_OP_IN:
- //case TK_OP_NOT:
- //case TK_OP_OR:
- //case TK_OP_AND:
-
- case TK_PR_CLASS:
- case TK_PR_CONST:
- case TK_PR_ENUM:
- case TK_PR_PRELOAD:
- case TK_PR_FUNCTION:
- case TK_PR_EXTENDS:
- case TK_PR_ASSERT:
- case TK_PR_YIELD:
- case TK_PR_VAR:
-
- case TK_CF_IF:
- case TK_CF_ELIF:
- case TK_CF_ELSE:
- case TK_CF_FOR:
- case TK_CF_WHILE:
- case TK_CF_BREAK:
- case TK_CF_CONTINUE:
- case TK_CF_RETURN:
- case TK_CF_MATCH:
- case TK_CF_PASS:
- case TK_SELF:
- case TK_CONST_PI:
- case TK_CONST_TAU:
- case TK_WILDCARD:
- case TK_CONST_INF:
- case TK_CONST_NAN:
- case TK_ERROR:
- return !variable_safe;
-
- case TK_CONSTANT: {
- switch (get_token_constant(p_offset).get_type()) {
- case Variant::NIL:
- case Variant::BOOL:
- return true;
- default:
- return false;
- }
- }
- default:
- return false;
- }
+String GDScriptTokenizer::get_token_name(Token::Type p_token_type) {
+ ERR_FAIL_INDEX_V_MSG(p_token_type, Token::TK_MAX, "<error>", "Using token type out of the enum.");
+ return token_names[p_token_type];
}
-StringName GDScriptTokenizer::get_token_literal(int p_offset) const {
- Token token = get_token(p_offset);
- switch (token) {
- case TK_IDENTIFIER:
- return get_token_identifier(p_offset);
- case TK_BUILT_IN_TYPE: {
- Variant::Type type = get_token_type(p_offset);
- int idx = 0;
-
- while (_type_list[idx].text) {
- if (type == _type_list[idx].type) {
- return _type_list[idx].text;
- }
- idx++;
- }
- } break; // Shouldn't get here, stuff happens
- case TK_BUILT_IN_FUNC:
- return GDScriptFunctions::get_func_name(get_token_built_in_func(p_offset));
- case TK_CONSTANT: {
- const Variant value = get_token_constant(p_offset);
-
- switch (value.get_type()) {
- case Variant::NIL:
- return "null";
- case Variant::BOOL:
- return value ? "true" : "false";
- default: {
- }
- }
- } break;
- case TK_OP_AND:
- case TK_OP_OR:
- break; // Don't get into default, since they can be non-literal
- default: {
- int idx = 0;
-
- while (_keyword_list[idx].text) {
- if (token == _keyword_list[idx].token) {
- return _keyword_list[idx].text;
- }
- idx++;
- }
- }
+void GDScriptTokenizer::set_source_code(const String &p_source_code) {
+ source = p_source_code;
+ if (source.empty()) {
+ _source = L"";
+ } else {
+ _source = source.ptr();
}
- ERR_FAIL_V_MSG("", "Failed to get token literal.");
+ _current = _source;
+ line = 1;
+ column = 1;
+ length = p_source_code.length();
+ position = 0;
}
-static bool _is_text_char(CharType c) {
- return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+void GDScriptTokenizer::set_cursor_position(int p_line, int p_column) {
+ cursor_line = p_line;
+ cursor_column = p_column;
}
-static bool _is_number(CharType c) {
- return (c >= '0' && c <= '9');
+void GDScriptTokenizer::set_multiline_mode(bool p_state) {
+ multiline_mode = p_state;
}
-static bool _is_hex(CharType c) {
- return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+int GDScriptTokenizer::get_cursor_line() const {
+ return cursor_line;
}
-static bool _is_bin(CharType c) {
- return (c == '0' || c == '1');
+int GDScriptTokenizer::get_cursor_column() const {
+ return cursor_column;
}
-void GDScriptTokenizerText::_make_token(Token p_type) {
- TokenData &tk = tk_rb[tk_rb_pos];
-
- tk.type = p_type;
- tk.line = line;
- tk.col = column;
-
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+bool GDScriptTokenizer::is_past_cursor() const {
+ if (line < cursor_line) {
+ return false;
+ }
+ if (line > cursor_line) {
+ return true;
+ }
+ if (column < cursor_column) {
+ return false;
+ }
+ return true;
}
-void GDScriptTokenizerText::_make_identifier(const StringName &p_identifier) {
- TokenData &tk = tk_rb[tk_rb_pos];
-
- tk.type = TK_IDENTIFIER;
- tk.identifier = p_identifier;
- tk.line = line;
- tk.col = column;
-
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+CharType GDScriptTokenizer::_advance() {
+ if (unlikely(_is_at_end())) {
+ return '\0';
+ }
+ _current++;
+ column++;
+ position++;
+ if (column > rightmost_column) {
+ rightmost_column = column;
+ }
+ if (unlikely(_is_at_end())) {
+ // Add extra newline even if it's not there, to satisfy the parser.
+ newline(true);
+ // Also add needed unindent.
+ check_indent();
+ }
+ return _peek(-1);
}
-void GDScriptTokenizerText::_make_built_in_func(GDScriptFunctions::Function p_func) {
- TokenData &tk = tk_rb[tk_rb_pos];
-
- tk.type = TK_BUILT_IN_FUNC;
- tk.func = p_func;
- tk.line = line;
- tk.col = column;
-
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+void GDScriptTokenizer::push_paren(CharType p_char) {
+ paren_stack.push_back(p_char);
}
-void GDScriptTokenizerText::_make_constant(const Variant &p_constant) {
- TokenData &tk = tk_rb[tk_rb_pos];
-
- tk.type = TK_CONSTANT;
- tk.constant = p_constant;
- tk.line = line;
- tk.col = column;
+bool GDScriptTokenizer::pop_paren(CharType p_expected) {
+ if (paren_stack.empty()) {
+ return false;
+ }
+ CharType actual = paren_stack.back()->get();
+ paren_stack.pop_back();
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+ return actual == p_expected;
}
-void GDScriptTokenizerText::_make_type(const Variant::Type &p_type) {
- TokenData &tk = tk_rb[tk_rb_pos];
-
- tk.type = TK_BUILT_IN_TYPE;
- tk.vtype = p_type;
- tk.line = line;
- tk.col = column;
-
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+GDScriptTokenizer::Token GDScriptTokenizer::pop_error() {
+ Token error = error_stack.back()->get();
+ error_stack.pop_back();
+ return error;
}
-void GDScriptTokenizerText::_make_error(const String &p_error) {
- error_flag = true;
- last_error = p_error;
-
- TokenData &tk = tk_rb[tk_rb_pos];
- tk.type = TK_ERROR;
- tk.constant = p_error;
- tk.line = line;
- tk.col = column;
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+static bool _is_alphanumeric(CharType c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
}
-void GDScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) {
- TokenData &tk = tk_rb[tk_rb_pos];
- tk.type = TK_NEWLINE;
- tk.constant = Vector2(p_indentation, p_tabs);
- tk.line = line;
- tk.col = column;
- tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
+static bool _is_digit(CharType c) {
+ return (c >= '0' && c <= '9');
}
-void GDScriptTokenizerText::_advance() {
- if (error_flag) {
- //parser broke
- _make_error(last_error);
- return;
- }
-
- if (code_pos >= len) {
- _make_token(TK_EOF);
- return;
- }
-#define GETCHAR(m_ofs) ((m_ofs + code_pos) >= len ? 0 : _code[m_ofs + code_pos])
-#define INCPOS(m_amount) \
- { \
- code_pos += m_amount; \
- column += m_amount; \
- }
- while (true) {
- bool is_string_name = false;
- StringMode string_mode = STRING_DOUBLE_QUOTE;
-
- switch (GETCHAR(0)) {
- case 0:
- _make_token(TK_EOF);
- break;
- case '\\':
- INCPOS(1);
- if (GETCHAR(0) == '\r') {
- INCPOS(1);
- }
-
- if (GETCHAR(0) != '\n') {
- _make_error("Expected newline after '\\'.");
- return;
- }
-
- INCPOS(1);
- line++;
-
- while (GETCHAR(0) == ' ' || GETCHAR(0) == '\t') {
- INCPOS(1);
- }
-
- continue;
- case '\t':
- case '\r':
- case ' ':
- INCPOS(1);
- continue;
- case '#': { // line comment skip
-#ifdef DEBUG_ENABLED
- String comment;
-#endif // DEBUG_ENABLED
- while (GETCHAR(0) != '\n') {
-#ifdef DEBUG_ENABLED
- comment += GETCHAR(0);
-#endif // DEBUG_ENABLED
- code_pos++;
- if (GETCHAR(0) == 0) { //end of file
- //_make_error("Unterminated Comment");
- _make_token(TK_EOF);
- return;
- }
- }
-#ifdef DEBUG_ENABLED
- String comment_content = comment.trim_prefix("#").trim_prefix(" ");
- if (comment_content.begins_with("warning-ignore:")) {
- String code = comment_content.get_slice(":", 1);
- warning_skips.push_back(Pair<int, String>(line, code.strip_edges().to_lower()));
- } else if (comment_content.begins_with("warning-ignore-all:")) {
- String code = comment_content.get_slice(":", 1);
- warning_global_skips.insert(code.strip_edges().to_lower());
- } else if (comment_content.strip_edges() == "warnings-disable") {
- ignore_warnings = true;
- }
-#endif // DEBUG_ENABLED
- [[fallthrough]];
- }
- case '\n': {
- line++;
- INCPOS(1);
- bool used_spaces = false;
- int tabs = 0;
- column = 1;
- int i = 0;
- while (true) {
- if (GETCHAR(i) == ' ') {
- i++;
- used_spaces = true;
- } else if (GETCHAR(i) == '\t') {
- if (used_spaces) {
- _make_error("Spaces used before tabs on a line");
- return;
- }
- i++;
- tabs++;
- } else {
- break; // not indentation anymore
- }
- }
-
- _make_newline(i, tabs);
- return;
- }
- case '/': {
- switch (GETCHAR(1)) {
- case '=': { // diveq
-
- _make_token(TK_OP_ASSIGN_DIV);
- INCPOS(1);
-
- } break;
- default:
- _make_token(TK_OP_DIV);
- }
- } break;
- case '=': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_EQUAL);
- INCPOS(1);
-
- } else {
- _make_token(TK_OP_ASSIGN);
- }
-
- } break;
- case '<': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_LESS_EQUAL);
- INCPOS(1);
- } else if (GETCHAR(1) == '<') {
- if (GETCHAR(2) == '=') {
- _make_token(TK_OP_ASSIGN_SHIFT_LEFT);
- INCPOS(1);
- } else {
- _make_token(TK_OP_SHIFT_LEFT);
- }
- INCPOS(1);
- } else {
- _make_token(TK_OP_LESS);
- }
-
- } break;
- case '>': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_GREATER_EQUAL);
- INCPOS(1);
- } else if (GETCHAR(1) == '>') {
- if (GETCHAR(2) == '=') {
- _make_token(TK_OP_ASSIGN_SHIFT_RIGHT);
- INCPOS(1);
-
- } else {
- _make_token(TK_OP_SHIFT_RIGHT);
- }
- INCPOS(1);
- } else {
- _make_token(TK_OP_GREATER);
- }
-
- } break;
- case '!': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_NOT_EQUAL);
- INCPOS(1);
- } else {
- _make_token(TK_OP_NOT);
- }
-
- } break;
- //case '"' //string - no strings in shader
- //case '\'' //string - no strings in shader
- case '{':
- _make_token(TK_CURLY_BRACKET_OPEN);
- break;
- case '}':
- _make_token(TK_CURLY_BRACKET_CLOSE);
- break;
- case '[':
- _make_token(TK_BRACKET_OPEN);
- break;
- case ']':
- _make_token(TK_BRACKET_CLOSE);
- break;
- case '(':
- _make_token(TK_PARENTHESIS_OPEN);
- break;
- case ')':
- _make_token(TK_PARENTHESIS_CLOSE);
- break;
- case ',':
- _make_token(TK_COMMA);
- break;
- case ';':
- _make_token(TK_SEMICOLON);
- break;
- case '?':
- _make_token(TK_QUESTION_MARK);
- break;
- case ':':
- _make_token(TK_COLON); //for methods maybe but now useless.
- break;
- case '$':
- _make_token(TK_DOLLAR); //for the get_node() shortener
- break;
- case '^': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_BIT_XOR);
- INCPOS(1);
- } else {
- _make_token(TK_OP_BIT_XOR);
- }
+static bool _is_hex_digit(CharType c) {
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
- } break;
- case '~':
- _make_token(TK_OP_BIT_INVERT);
- break;
- case '&': {
- if (GETCHAR(1) == '&') {
- _make_token(TK_OP_AND);
- INCPOS(1);
- } else if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_BIT_AND);
- INCPOS(1);
- } else {
- _make_token(TK_OP_BIT_AND);
- }
- } break;
- case '|': {
- if (GETCHAR(1) == '|') {
- _make_token(TK_OP_OR);
- INCPOS(1);
- } else if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_BIT_OR);
- INCPOS(1);
- } else {
- _make_token(TK_OP_BIT_OR);
- }
- } break;
- case '*': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_MUL);
- INCPOS(1);
- } else {
- _make_token(TK_OP_MUL);
- }
- } break;
- case '+': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_ADD);
- INCPOS(1);
- /*
- } else if (GETCHAR(1)=='+') {
- _make_token(TK_OP_PLUS_PLUS);
- INCPOS(1);
- */
- } else {
- _make_token(TK_OP_ADD);
- }
+static bool _is_binary_digit(CharType c) {
+ return (c == '0' || c == '1');
+}
- } break;
- case '-': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_SUB);
- INCPOS(1);
- } else if (GETCHAR(1) == '>') {
- _make_token(TK_FORWARD_ARROW);
- INCPOS(1);
+GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) {
+ Token token(p_type);
+ token.start_line = start_line;
+ token.end_line = line;
+ token.start_column = start_column;
+ token.end_column = column;
+ token.leftmost_column = leftmost_column;
+ token.rightmost_column = rightmost_column;
+ token.source = String(_start, _current - _start);
+
+ if (p_type != Token::ERROR && cursor_line > -1) {
+ // Also count whitespace after token.
+ int offset = 0;
+ while (_peek(offset) == ' ' || _peek(offset) == '\t') {
+ offset++;
+ }
+ int last_column = column + offset;
+ // Check cursor position in token.
+ if (start_line == line) {
+ // Single line token.
+ if (cursor_line == start_line && cursor_column >= start_column && cursor_column <= last_column) {
+ token.cursor_position = cursor_column - start_column;
+ if (cursor_column == start_column) {
+ token.cursor_place = CURSOR_BEGINNING;
+ } else if (cursor_column < column) {
+ token.cursor_place = CURSOR_MIDDLE;
} else {
- _make_token(TK_OP_SUB);
+ token.cursor_place = CURSOR_END;
}
- } break;
- case '%': {
- if (GETCHAR(1) == '=') {
- _make_token(TK_OP_ASSIGN_MOD);
- INCPOS(1);
+ }
+ } else {
+ // Multi line token.
+ if (cursor_line == start_line && cursor_column >= start_column) {
+ // Is in first line.
+ token.cursor_position = cursor_column - start_column;
+ if (cursor_column == start_column) {
+ token.cursor_place = CURSOR_BEGINNING;
} else {
- _make_token(TK_OP_MOD);
- }
- } break;
- case '@':
- if (CharType(GETCHAR(1)) != '"' && CharType(GETCHAR(1)) != '\'') {
- _make_error("Unexpected '@'");
- return;
- }
- INCPOS(1);
- is_string_name = true;
- [[fallthrough]];
- case '\'':
- case '"': {
- if (GETCHAR(0) == '\'') {
- string_mode = STRING_SINGLE_QUOTE;
- }
-
- int i = 1;
- if (string_mode == STRING_DOUBLE_QUOTE && GETCHAR(i) == '"' && GETCHAR(i + 1) == '"') {
- i += 2;
- string_mode = STRING_MULTILINE;
+ token.cursor_place = CURSOR_MIDDLE;
}
-
- String str;
- while (true) {
- if (CharType(GETCHAR(i)) == 0) {
- _make_error("Unterminated String");
- return;
- } else if (string_mode == STRING_DOUBLE_QUOTE && CharType(GETCHAR(i)) == '"') {
- break;
- } else if (string_mode == STRING_SINGLE_QUOTE && CharType(GETCHAR(i)) == '\'') {
- break;
- } else if (string_mode == STRING_MULTILINE && CharType(GETCHAR(i)) == '\"' && CharType(GETCHAR(i + 1)) == '\"' && CharType(GETCHAR(i + 2)) == '\"') {
- i += 2;
- break;
- } else if (string_mode != STRING_MULTILINE && CharType(GETCHAR(i)) == '\n') {
- _make_error("Unexpected EOL at String.");
- return;
- } else if (CharType(GETCHAR(i)) == 0xFFFF) {
- //string ends here, next will be TK
- i--;
- break;
- } else if (CharType(GETCHAR(i)) == '\\') {
- //escaped characters...
- i++;
- CharType next = GETCHAR(i);
- if (next == 0) {
- _make_error("Unterminated String");
- return;
- }
- CharType res = 0;
-
- switch (next) {
- case 'a':
- res = '\a';
- break;
- case 'b':
- res = '\b';
- break;
- case 't':
- res = '\t';
- break;
- case 'n':
- res = '\n';
- break;
- case 'v':
- res = '\v';
- break;
- case 'f':
- res = '\f';
- break;
- case 'r':
- res = '\r';
- break;
- case '\'':
- res = '\'';
- break;
- case '\"':
- res = '\"';
- break;
- case '\\':
- res = '\\';
- break;
-
- case 'u': {
- // hex number
- i += 1;
- for (int j = 0; j < 4; j++) {
- CharType c = GETCHAR(i + j);
- if (c == 0) {
- _make_error("Unterminated String");
- return;
- }
-
- CharType v = 0;
- if (c >= '0' && c <= '9') {
- v = c - '0';
- } else if (c >= 'a' && c <= 'f') {
- v = c - 'a';
- v += 10;
- } else if (c >= 'A' && c <= 'F') {
- v = c - 'A';
- v += 10;
- } else {
- _make_error("Malformed hex constant in string");
- return;
- }
-
- res <<= 4;
- res |= v;
- }
- i += 3;
-
- } break;
- case '\n': {
- line++;
- column = 1;
- } break;
- default: {
- _make_error("Invalid escape sequence");
- return;
- } break;
- }
-
- if (next != '\n') {
- str += res;
- }
-
- } else {
- if (CharType(GETCHAR(i)) == '\n') {
- line++;
- column = 1;
- }
-
- str += CharType(GETCHAR(i));
- }
- i++;
- }
- INCPOS(i);
-
- if (is_string_name) {
- _make_constant(StringName(str));
+ } else if (cursor_line == line && cursor_column <= last_column) {
+ // Is in last line.
+ token.cursor_position = cursor_column - start_column;
+ if (cursor_column < column) {
+ token.cursor_place = CURSOR_MIDDLE;
} else {
- _make_constant(str);
- }
-
- } break;
- case 0xFFFF: {
- _make_token(TK_CURSOR);
- } break;
- default: {
- if (_is_number(GETCHAR(0)) || (GETCHAR(0) == '.' && _is_number(GETCHAR(1)))) {
- // parse number
- bool period_found = false;
- bool exponent_found = false;
- bool hexa_found = false;
- bool bin_found = false;
- bool sign_found = false;
-
- String str;
- int i = 0;
-
- while (true) {
- if (GETCHAR(i) == '.') {
- if (period_found || exponent_found) {
- _make_error("Invalid numeric constant at '.'");
- return;
- } else if (bin_found) {
- _make_error("Invalid binary constant at '.'");
- return;
- } else if (hexa_found) {
- _make_error("Invalid hexadecimal constant at '.'");
- return;
- }
- period_found = true;
- } else if (GETCHAR(i) == 'x') {
- if (hexa_found || bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
- _make_error("Invalid numeric constant at 'x'");
- return;
- }
- hexa_found = true;
- } else if (hexa_found && _is_hex(GETCHAR(i))) {
- } else if (!hexa_found && GETCHAR(i) == 'b') {
- if (bin_found || str.length() != 1 || !((i == 1 && str[0] == '0') || (i == 2 && str[1] == '0' && str[0] == '-'))) {
- _make_error("Invalid numeric constant at 'b'");
- return;
- }
- bin_found = true;
- } else if (!hexa_found && GETCHAR(i) == 'e') {
- if (exponent_found || bin_found) {
- _make_error("Invalid numeric constant at 'e'");
- return;
- }
- exponent_found = true;
- } else if (_is_number(GETCHAR(i))) {
- //all ok
-
- } else if (bin_found && _is_bin(GETCHAR(i))) {
- } else if ((GETCHAR(i) == '-' || GETCHAR(i) == '+') && exponent_found) {
- if (sign_found) {
- _make_error("Invalid numeric constant at '-'");
- return;
- }
- sign_found = true;
- } else if (GETCHAR(i) == '_') {
- i++;
- continue; // Included for readability, shouldn't be a part of the string
- } else {
- break;
- }
-
- str += CharType(GETCHAR(i));
- i++;
- }
-
- if (!(_is_number(str[str.length() - 1]) || (hexa_found && _is_hex(str[str.length() - 1])))) {
- _make_error("Invalid numeric constant: " + str);
- return;
- }
-
- INCPOS(i);
- if (hexa_found) {
- int64_t val = str.hex_to_int();
- _make_constant(val);
- } else if (bin_found) {
- int64_t val = str.bin_to_int();
- _make_constant(val);
- } else if (period_found || exponent_found) {
- double val = str.to_double();
- _make_constant(val);
- } else {
- int64_t val = str.to_int();
- _make_constant(val);
- }
-
- return;
+ token.cursor_place = CURSOR_END;
}
+ } else if (cursor_line > start_line && cursor_line < line) {
+ // Is in middle line.
+ token.cursor_position = CURSOR_MIDDLE;
+ }
+ }
+ }
- if (GETCHAR(0) == '.') {
- //parse period
- _make_token(TK_PERIOD);
- break;
- }
-
- if (_is_text_char(GETCHAR(0))) {
- // parse identifier
- String str;
- str += CharType(GETCHAR(0));
-
- int i = 1;
- while (_is_text_char(GETCHAR(i))) {
- str += CharType(GETCHAR(i));
- i++;
- }
-
- bool identifier = false;
-
- if (str == "null") {
- _make_constant(Variant());
-
- } else if (str == "true") {
- _make_constant(true);
-
- } else if (str == "false") {
- _make_constant(false);
- } else {
- bool found = false;
-
- {
- int idx = 0;
-
- while (_type_list[idx].text) {
- if (str == _type_list[idx].text) {
- _make_type(_type_list[idx].type);
- found = true;
- break;
- }
- idx++;
- }
- }
+ return token;
+}
- if (!found) {
- //built in func?
+GDScriptTokenizer::Token GDScriptTokenizer::make_literal(const Variant &p_literal) {
+ Token token = make_token(Token::LITERAL);
+ token.literal = p_literal;
+ return token;
+}
- for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
- if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) {
- _make_built_in_func(GDScriptFunctions::Function(j));
- found = true;
- break;
- }
- }
- }
+GDScriptTokenizer::Token GDScriptTokenizer::make_identifier(const StringName &p_identifier) {
+ Token identifier = make_token(Token::IDENTIFIER);
+ identifier.literal = p_identifier;
+ return identifier;
+}
- if (!found) {
- //keyword
+GDScriptTokenizer::Token GDScriptTokenizer::make_error(const String &p_message) {
+ Token error = make_token(Token::ERROR);
+ error.literal = p_message;
- int idx = 0;
- found = false;
+ return error;
+}
- while (_keyword_list[idx].text) {
- if (str == _keyword_list[idx].text) {
- _make_token(_keyword_list[idx].token);
- found = true;
- break;
- }
- idx++;
- }
- }
+void GDScriptTokenizer::push_error(const String &p_message) {
+ Token error = make_error(p_message);
+ error_stack.push_back(error);
+}
- if (!found) {
- identifier = true;
- }
- }
+void GDScriptTokenizer::push_error(const Token &p_error) {
+ error_stack.push_back(p_error);
+}
- if (identifier) {
- _make_identifier(str);
- }
- INCPOS(str.length());
- return;
- }
+GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(CharType p_paren) {
+ if (paren_stack.empty()) {
+ return make_error(vformat("Closing \"%c\" doesn't have an opening counterpart.", p_paren));
+ }
+ Token error = make_error(vformat("Closing \"%c\" doesn't match the opening \"%c\".", p_paren, paren_stack.back()->get()));
+ paren_stack.pop_back(); // Remove opening one anyway.
+ return error;
+}
- _make_error("Unknown character");
- return;
+GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(CharType p_test, Token::Type p_double_type) {
+ const CharType *next = _current + 1;
+ int chars = 2; // Two already matched.
- } break;
+ // Test before consuming characters, since we don't want to consume more than needed.
+ while (*next == p_test) {
+ chars++;
+ next++;
+ }
+ if (chars >= 7) {
+ // It is a VCS conflict marker.
+ while (chars > 1) {
+ // Consume all characters (first was already consumed by scan()).
+ _advance();
+ chars--;
}
-
- INCPOS(1);
- break;
+ return make_token(Token::VCS_CONFLICT_MARKER);
+ } else {
+ // It is only a regular double character token, so we consume the second character.
+ _advance();
+ return make_token(p_double_type);
}
}
-void GDScriptTokenizerText::set_code(const String &p_code) {
- code = p_code;
- len = p_code.length();
- if (len) {
- _code = &code[0];
- } else {
- _code = nullptr;
+GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
+ if (!_is_alphanumeric(_peek())) {
+ push_error("Expected annotation identifier after \"@\".");
}
- code_pos = 0;
- line = 1; //it is stand-ar-ized that lines begin in 1 in code..
- column = 1; //the same holds for columns
- tk_rb_pos = 0;
- error_flag = false;
-#ifdef DEBUG_ENABLED
- ignore_warnings = false;
-#endif // DEBUG_ENABLED
- last_error = "";
- for (int i = 0; i < MAX_LOOKAHEAD + 1; i++) {
+ while (_is_alphanumeric(_peek())) {
+ // Consume all identifier characters.
_advance();
}
+ Token annotation = make_token(Token::ANNOTATION);
+ annotation.literal = StringName(annotation.source);
+ return annotation;
}
-GDScriptTokenizerText::Token GDScriptTokenizerText::get_token(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, TK_ERROR);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, TK_ERROR);
-
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- return tk_rb[ofs].type;
-}
+GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
+#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
+ KEYWORD_GROUP('a') \
+ KEYWORD("as", Token::AS) \
+ KEYWORD("and", Token::AND) \
+ KEYWORD("assert", Token::ASSERT) \
+ KEYWORD("await", Token::AWAIT) \
+ KEYWORD_GROUP('b') \
+ KEYWORD("break", Token::BREAK) \
+ KEYWORD("breakpoint", Token::BREAKPOINT) \
+ KEYWORD_GROUP('c') \
+ KEYWORD("class", Token::CLASS) \
+ KEYWORD("class_name", Token::CLASS_NAME) \
+ KEYWORD("const", Token::CONST) \
+ KEYWORD("continue", Token::CONTINUE) \
+ KEYWORD_GROUP('e') \
+ KEYWORD("elif", Token::ELIF) \
+ KEYWORD("else", Token::ELSE) \
+ KEYWORD("enum", Token::ENUM) \
+ KEYWORD("extends", Token::EXTENDS) \
+ KEYWORD_GROUP('f') \
+ KEYWORD("for", Token::FOR) \
+ KEYWORD("func", Token::FUNC) \
+ KEYWORD_GROUP('i') \
+ KEYWORD("if", Token::IF) \
+ KEYWORD("in", Token::IN) \
+ KEYWORD("is", Token::IS) \
+ KEYWORD_GROUP('m') \
+ KEYWORD("match", Token::MATCH) \
+ KEYWORD_GROUP('n') \
+ KEYWORD("namespace", Token::NAMESPACE) \
+ KEYWORD("not", Token::NOT) \
+ KEYWORD_GROUP('o') \
+ KEYWORD("or", Token::OR) \
+ KEYWORD_GROUP('p') \
+ KEYWORD("pass", Token::PASS) \
+ KEYWORD("preload", Token::PRELOAD) \
+ KEYWORD_GROUP('r') \
+ KEYWORD("return", Token::RETURN) \
+ KEYWORD_GROUP('s') \
+ KEYWORD("self", Token::SELF) \
+ KEYWORD("signal", Token::SIGNAL) \
+ KEYWORD("static", Token::STATIC) \
+ KEYWORD("super", Token::SUPER) \
+ KEYWORD_GROUP('t') \
+ KEYWORD("trait", Token::TRAIT) \
+ KEYWORD_GROUP('v') \
+ KEYWORD("var", Token::VAR) \
+ KEYWORD("void", Token::VOID) \
+ KEYWORD_GROUP('w') \
+ KEYWORD("while", Token::WHILE) \
+ KEYWORD_GROUP('y') \
+ KEYWORD("yield", Token::YIELD) \
+ KEYWORD_GROUP('I') \
+ KEYWORD("INF", Token::CONST_INF) \
+ KEYWORD_GROUP('N') \
+ KEYWORD("NAN", Token::CONST_NAN) \
+ KEYWORD_GROUP('P') \
+ KEYWORD("PI", Token::CONST_PI) \
+ KEYWORD_GROUP('T') \
+ KEYWORD("TAU", Token::CONST_TAU)
+
+#define MIN_KEYWORD_LENGTH 2
+#define MAX_KEYWORD_LENGTH 10
+
+ // Consume all alphanumeric characters.
+ while (_is_alphanumeric(_peek())) {
+ _advance();
+ }
-int GDScriptTokenizerText::get_token_line(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
+ int length = _current - _start;
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- return tk_rb[ofs].line;
-}
+ if (length == 1 && _peek(-1) == '_') {
+ // Lone underscore.
+ return make_token(Token::UNDERSCORE);
+ }
-int GDScriptTokenizerText::get_token_column(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, -1);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, -1);
+ String name(_start, length);
+ if (length < MIN_KEYWORD_LENGTH || length > MAX_KEYWORD_LENGTH) {
+ // Cannot be a keyword, as the length doesn't match any.
+ return make_identifier(name);
+ }
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- return tk_rb[ofs].col;
-}
+ // Define some helper macros for the switch case.
+#define KEYWORD_GROUP_CASE(char) \
+ break; \
+ case char:
+#define KEYWORD(keyword, token_type) \
+ { \
+ const int keyword_length = sizeof(keyword) - 1; \
+ static_assert(keyword_length <= MAX_KEYWORD_LENGTH, "There's a keyword longer than the defined maximum length"); \
+ static_assert(keyword_length >= MIN_KEYWORD_LENGTH, "There's a keyword shorter than the defined minimum length"); \
+ if (keyword_length == length && name == keyword) { \
+ return make_token(token_type); \
+ } \
+ }
-const Variant &GDScriptTokenizerText::get_token_constant(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, tk_rb[0].constant);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, tk_rb[0].constant);
+ // Find if it's a keyword.
+ switch (_start[0]) {
+ default:
+ KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
+ break;
+ }
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_CONSTANT, tk_rb[0].constant);
- return tk_rb[ofs].constant;
-}
+ // Check if it's a special literal
+ if (length == 4) {
+ if (name == "true") {
+ return make_literal(true);
+ } else if (name == "null") {
+ return make_literal(Variant());
+ }
+ } else if (length == 5) {
+ if (name == "false") {
+ return make_literal(false);
+ }
+ }
-StringName GDScriptTokenizerText::get_token_identifier(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, StringName());
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, StringName());
+ // Not a keyword, so must be an identifier.
+ return make_identifier(name);
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_IDENTIFIER, StringName());
- return tk_rb[ofs].identifier;
+#undef KEYWORDS
+#undef MIN_KEYWORD_LENGTH
+#undef MAX_KEYWORD_LENGTH
+#undef KEYWORD_GROUP_CASE
+#undef KEYWORD
}
-GDScriptFunctions::Function GDScriptTokenizerText::get_token_built_in_func(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, GDScriptFunctions::FUNC_MAX);
+void GDScriptTokenizer::newline(bool p_make_token) {
+ // Don't overwrite previous newline, nor create if we want a line contination.
+ if (p_make_token && !pending_newline && !line_continuation) {
+ Token newline(Token::NEWLINE);
+ newline.start_line = line;
+ newline.end_line = line;
+ newline.start_column = column - 1;
+ newline.end_column = column;
+ newline.leftmost_column = newline.start_column;
+ newline.rightmost_column = newline.end_column;
+ pending_newline = true;
+ last_newline = newline;
+ }
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_FUNC, GDScriptFunctions::FUNC_MAX);
- return tk_rb[ofs].func;
+ // Increment line/column counters.
+ line++;
+ column = 1;
+ leftmost_column = 1;
}
-Variant::Type GDScriptTokenizerText::get_token_type(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, Variant::NIL);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, Variant::NIL);
+GDScriptTokenizer::Token GDScriptTokenizer::number() {
+ int base = 10;
+ bool has_decimal = false;
+ bool has_exponent = false;
+ bool has_error = false;
+ bool (*digit_check_func)(CharType) = _is_digit;
+
+ if (_peek(-1) == '.') {
+ has_decimal = true;
+ } else if (_peek(-1) == '0') {
+ if (_peek() == 'x') {
+ // Hexadecimal.
+ base = 16;
+ digit_check_func = _is_hex_digit;
+ _advance();
+ } else if (_peek() == 'b') {
+ // Binary.
+ base = 2;
+ digit_check_func = _is_binary_digit;
+ _advance();
+ }
+ }
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_BUILT_IN_TYPE, Variant::NIL);
- return tk_rb[ofs].vtype;
-}
+ // Allow '_' to be used in a number, for readability.
+ while (digit_check_func(_peek()) || _peek() == '_') {
+ _advance();
+ }
-int GDScriptTokenizerText::get_token_line_indent(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
+ // It might be a ".." token (instead of decimal point) so we check if it's not.
+ if (_peek() == '.' && _peek(1) != '.') {
+ if (base == 10 && !has_decimal) {
+ has_decimal = true;
+ } else if (base == 10) {
+ Token error = make_error("Cannot use a decimal point twice in a number.");
+ error.start_column = column;
+ error.leftmost_column = column;
+ error.end_column = column + 1;
+ error.rightmost_column = column + 1;
+ push_error(error);
+ has_error = true;
+ } else if (base == 16) {
+ Token error = make_error("Cannot use a decimal point in a hexadecimal number.");
+ error.start_column = column;
+ error.leftmost_column = column;
+ error.end_column = column + 1;
+ error.rightmost_column = column + 1;
+ push_error(error);
+ has_error = true;
+ } else {
+ Token error = make_error("Cannot use a decimal point in a binary number.");
+ error.start_column = column;
+ error.leftmost_column = column;
+ error.end_column = column + 1;
+ error.rightmost_column = column + 1;
+ push_error(error);
+ has_error = true;
+ }
+ if (!has_error) {
+ _advance();
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
- return tk_rb[ofs].constant.operator Vector2().x;
-}
+ // Consume decimal digits.
+ while (_is_digit(_peek()) || _peek() == '_') {
+ _advance();
+ }
+ }
+ }
+ if (base == 10) {
+ if (_peek() == 'e' || _peek() == 'E') {
+ has_exponent = true;
+ _advance();
+ if (_peek() == '+' || _peek() == '-') {
+ // Exponent sign.
+ _advance();
+ }
+ // Consume exponent digits.
+ while (_is_digit(_peek()) || _peek() == '_') {
+ _advance();
+ }
+ }
+ }
-int GDScriptTokenizerText::get_token_line_tab_indent(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
+ // Detect extra decimal point.
+ if (!has_error && has_decimal && _peek() == '.' && _peek(1) != '.') {
+ Token error = make_error("Cannot use a decimal point twice in a number.");
+ error.start_column = column;
+ error.leftmost_column = column;
+ error.end_column = column + 1;
+ error.rightmost_column = column + 1;
+ push_error(error);
+ has_error = true;
+ } else if (_is_alphanumeric(_peek())) {
+ // Letter at the end of the number.
+ push_error("Invalid numeric notation.");
+ }
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
- return tk_rb[ofs].constant.operator Vector2().y;
+ // Create a string with the whole number.
+ int length = _current - _start;
+ String number = String(_start, length).replace("_", "");
+
+ // Convert to the appropriate literal type.
+ if (base == 16) {
+ int64_t value = number.hex_to_int();
+ return make_literal(value);
+ } else if (base == 2) {
+ int64_t value = number.bin_to_int();
+ return make_literal(value);
+ } else if (has_decimal || has_exponent) {
+ double value = number.to_float();
+ return make_literal(value);
+ } else {
+ int64_t value = number.to_int();
+ return make_literal(value);
+ }
}
-String GDScriptTokenizerText::get_token_error(int p_offset) const {
- ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, String());
- ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, String());
+GDScriptTokenizer::Token GDScriptTokenizer::string() {
+ enum StringType {
+ STRING_REGULAR,
+ STRING_NAME,
+ STRING_NODEPATH,
+ };
- int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
- ERR_FAIL_COND_V(tk_rb[ofs].type != TK_ERROR, String());
- return tk_rb[ofs].constant;
-}
+ bool is_multiline = false;
+ StringType type = STRING_REGULAR;
-void GDScriptTokenizerText::advance(int p_amount) {
- ERR_FAIL_COND(p_amount <= 0);
- for (int i = 0; i < p_amount; i++) {
+ if (_peek(-1) == '&') {
+ type = STRING_NAME;
+ _advance();
+ } else if (_peek(-1) == '^') {
+ type = STRING_NODEPATH;
_advance();
}
-}
-
-//////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define BYTECODE_VERSION 13
-Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
- const uint8_t *buf = p_buffer.ptr();
- int total_len = p_buffer.size();
- ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
+ CharType quote_char = _peek(-1);
- int version = decode_uint32(&buf[4]);
- ERR_FAIL_COND_V_MSG(version > BYTECODE_VERSION, ERR_INVALID_DATA, "Bytecode is too recent! Please use a newer engine version.");
-
- int identifier_count = decode_uint32(&buf[8]);
- int constant_count = decode_uint32(&buf[12]);
- int line_count = decode_uint32(&buf[16]);
- int token_count = decode_uint32(&buf[20]);
+ if (_peek() == quote_char && _peek(1) == quote_char) {
+ is_multiline = true;
+ // Consume all quotes.
+ _advance();
+ _advance();
+ }
- const uint8_t *b = &buf[24];
- total_len -= 24;
+ String result;
- identifiers.resize(identifier_count);
- for (int i = 0; i < identifier_count; i++) {
- int len = decode_uint32(b);
- ERR_FAIL_COND_V(len > total_len, ERR_INVALID_DATA);
- b += 4;
- Vector<uint8_t> cs;
- cs.resize(len);
- for (int j = 0; j < len; j++) {
- cs.write[j] = b[j] ^ 0xb6;
+ for (;;) {
+ // Consume actual string.
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
}
- cs.write[cs.size() - 1] = 0;
- String s;
- s.parse_utf8((const char *)cs.ptr());
- b += len;
- total_len -= len + 4;
- identifiers.write[i] = s;
- }
+ CharType ch = _peek();
- constants.resize(constant_count);
- for (int i = 0; i < constant_count; i++) {
- Variant v;
- int len;
- // An object cannot be constant, never decode objects
- Error err = decode_variant(v, b, total_len, &len, false);
- if (err) {
- return err;
- }
- b += len;
- total_len -= len;
- constants.write[i] = v;
- }
+ if (ch == '\\') {
+ // Escape pattern.
+ _advance();
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
- ERR_FAIL_COND_V(line_count * 8 > total_len, ERR_INVALID_DATA);
+ // Grab escape character.
+ CharType code = _peek();
+ _advance();
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
- for (int i = 0; i < line_count; i++) {
- uint32_t token = decode_uint32(b);
- b += 4;
- uint32_t linecol = decode_uint32(b);
- b += 4;
+ CharType escaped = 0;
+ bool valid_escape = true;
- lines.insert(token, linecol);
- total_len -= 8;
- }
+ switch (code) {
+ case 'a':
+ escaped = '\a';
+ break;
+ case 'b':
+ escaped = '\b';
+ break;
+ case 'f':
+ escaped = '\f';
+ break;
+ case 'n':
+ escaped = '\n';
+ break;
+ case 'r':
+ escaped = '\r';
+ break;
+ case 't':
+ escaped = '\t';
+ break;
+ case 'v':
+ escaped = '\v';
+ break;
+ case '\'':
+ escaped = '\'';
+ break;
+ case '\"':
+ escaped = '\"';
+ break;
+ case '\\':
+ escaped = '\\';
+ break;
+ case 'u':
+ // Hexadecimal sequence.
+ for (int i = 0; i < 4; i++) {
+ if (_is_at_end()) {
+ return make_error("Unterminated string.");
+ }
- tokens.resize(token_count);
+ CharType digit = _peek();
+ CharType value = 0;
+ if (digit >= '0' && digit <= '9') {
+ value = digit - '0';
+ } else if (digit >= 'a' && digit <= 'f') {
+ value = digit - 'a';
+ value += 10;
+ } else if (digit >= 'A' && digit <= 'F') {
+ value = digit - 'A';
+ value += 10;
+ } else {
+ // Make error, but keep parsing the string.
+ Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
+ error.start_column = column;
+ error.leftmost_column = error.start_column;
+ error.end_column = column + 1;
+ error.rightmost_column = error.end_column;
+ push_error(error);
+ valid_escape = false;
+ break;
+ }
- for (int i = 0; i < token_count; i++) {
- ERR_FAIL_COND_V(total_len < 1, ERR_INVALID_DATA);
+ escaped <<= 4;
+ escaped |= value;
- if ((*b) & TOKEN_BYTE_MASK) { //little endian always
- ERR_FAIL_COND_V(total_len < 4, ERR_INVALID_DATA);
+ _advance();
+ }
+ break;
+ case '\r':
+ if (_peek() != '\n') {
+ // Carriage return without newline in string. (???)
+ // Just add it to the string and keep going.
+ result += ch;
+ _advance();
+ break;
+ }
+ [[fallthrough]];
+ case '\n':
+ // Escaping newline.
+ newline(false);
+ valid_escape = false; // Don't add to the string.
+ break;
+ default:
+ Token error = make_error("Invalid escape in string.");
+ error.start_column = column - 2;
+ error.leftmost_column = error.start_column;
+ push_error(error);
+ valid_escape = false;
+ break;
+ }
- tokens.write[i] = decode_uint32(b) & ~TOKEN_BYTE_MASK;
- b += 4;
+ if (valid_escape) {
+ result += escaped;
+ }
+ } else if (ch == quote_char) {
+ _advance();
+ if (is_multiline) {
+ if (_peek() == quote_char && _peek(1) == quote_char) {
+ // Ended the multiline string. Consume all quotes.
+ _advance();
+ _advance();
+ break;
+ }
+ } else {
+ // Ended single-line string.
+ break;
+ }
} else {
- tokens.write[i] = *b;
- b += 1;
- total_len--;
+ result += ch;
+ _advance();
+ if (ch == '\n') {
+ newline(false);
+ }
}
}
- token = 0;
+ // Make the literal.
+ Variant string;
+ switch (type) {
+ case STRING_NAME:
+ string = StringName(result);
+ break;
+ case STRING_NODEPATH:
+ string = NodePath(result);
+ break;
+ case STRING_REGULAR:
+ string = result;
+ break;
+ }
- return OK;
+ return make_literal(string);
}
-Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
- Vector<uint8_t> buf;
+void GDScriptTokenizer::check_indent() {
+ ERR_FAIL_COND_MSG(column != 1, "Checking tokenizer indentation in the middle of a line.");
- Map<StringName, int> identifier_map;
- HashMap<Variant, int, VariantHasher, VariantComparator> constant_map;
- Map<uint32_t, int> line_map;
- Vector<uint32_t> token_array;
+ if (_is_at_end()) {
+ // Send dedents for every indent level.
+ pending_indents -= indent_level();
+ indent_stack.clear();
+ return;
+ }
- GDScriptTokenizerText tt;
- tt.set_code(p_code);
- int line = -1;
+ for (;;) {
+ CharType current_indent_char = _peek();
+ int indent_count = 0;
- while (true) {
- if (tt.get_token_line() != line) {
- line = tt.get_token_line();
- line_map[line] = token_array.size();
+ if (current_indent_char != ' ' && current_indent_char != '\t' && current_indent_char != '\r' && current_indent_char != '\n' && current_indent_char != '#') {
+ // First character of the line is not whitespace, so we clear all indentation levels.
+ // Unless we are in a continuation or in multiline mode (inside expression).
+ if (line_continuation || multiline_mode) {
+ return;
+ }
+ pending_indents -= indent_level();
+ indent_stack.clear();
+ return;
}
- uint32_t token = tt.get_token();
- switch (tt.get_token()) {
- case TK_IDENTIFIER: {
- StringName id = tt.get_token_identifier();
- if (!identifier_map.has(id)) {
- int idx = identifier_map.size();
- identifier_map[id] = idx;
- }
- token |= identifier_map[id] << TOKEN_BITS;
- } break;
- case TK_CONSTANT: {
- const Variant &c = tt.get_token_constant();
- if (!constant_map.has(c)) {
- int idx = constant_map.size();
- constant_map[c] = idx;
- }
- token |= constant_map[c] << TOKEN_BITS;
- } break;
- case TK_BUILT_IN_TYPE: {
- token |= tt.get_token_type() << TOKEN_BITS;
- } break;
- case TK_BUILT_IN_FUNC: {
- token |= tt.get_token_built_in_func() << TOKEN_BITS;
-
- } break;
- case TK_NEWLINE: {
- token |= tt.get_token_line_indent() << TOKEN_BITS;
- } break;
- case TK_ERROR: {
- ERR_FAIL_V(Vector<uint8_t>());
- } break;
- default: {
+ if (_peek() == '\r') {
+ _advance();
+ if (_peek() != '\n') {
+ push_error("Stray carriage return character in source code.");
}
- };
-
- token_array.push_back(token);
-
- if (tt.get_token() == TK_EOF) {
- break;
}
- tt.advance();
- }
-
- //reverse maps
-
- Map<int, StringName> rev_identifier_map;
- for (Map<StringName, int>::Element *E = identifier_map.front(); E; E = E->next()) {
- rev_identifier_map[E->get()] = E->key();
- }
+ if (_peek() == '\n') {
+ // Empty line, keep going.
+ _advance();
+ newline(false);
+ continue;
+ }
- Map<int, Variant> rev_constant_map;
- const Variant *K = nullptr;
- while ((K = constant_map.next(K))) {
- rev_constant_map[constant_map[*K]] = *K;
- }
+ // Check indent level.
+ bool mixed = false;
+ while (!_is_at_end()) {
+ CharType space = _peek();
+ if (space == '\t') {
+ // Consider individual tab columns.
+ column += tab_size - 1;
+ indent_count += tab_size;
+ } else if (space == ' ') {
+ indent_count += 1;
+ } else {
+ break;
+ }
+ mixed = mixed || space != current_indent_char;
+ _advance();
+ }
- Map<int, uint32_t> rev_line_map;
- for (Map<uint32_t, int>::Element *E = line_map.front(); E; E = E->next()) {
- rev_line_map[E->get()] = E->key();
- }
+ if (mixed) {
+ Token error = make_error("Mixed use of tabs and spaces for indentation.");
+ error.start_line = line;
+ error.start_column = 1;
+ error.leftmost_column = 1;
+ error.rightmost_column = column;
+ push_error(error);
+ }
- //save header
- buf.resize(24);
- buf.write[0] = 'G';
- buf.write[1] = 'D';
- buf.write[2] = 'S';
- buf.write[3] = 'C';
- encode_uint32(BYTECODE_VERSION, &buf.write[4]);
- encode_uint32(identifier_map.size(), &buf.write[8]);
- encode_uint32(constant_map.size(), &buf.write[12]);
- encode_uint32(line_map.size(), &buf.write[16]);
- encode_uint32(token_array.size(), &buf.write[20]);
-
- //save identifiers
-
- for (Map<int, StringName>::Element *E = rev_identifier_map.front(); E; E = E->next()) {
- CharString cs = String(E->get()).utf8();
- int len = cs.length() + 1;
- int extra = 4 - (len % 4);
- if (extra == 4) {
- extra = 0;
+ if (_is_at_end()) {
+ // Reached the end with an empty line, so just dedent as much as needed.
+ pending_indents -= indent_level();
+ indent_stack.clear();
+ return;
}
- uint8_t ibuf[4];
- encode_uint32(len + extra, ibuf);
- for (int i = 0; i < 4; i++) {
- buf.push_back(ibuf[i]);
+ if (_peek() == '\r') {
+ _advance();
+ if (_peek() != '\n') {
+ push_error("Stray carriage return character in source code.");
+ }
}
- for (int i = 0; i < len; i++) {
- buf.push_back(cs[i] ^ 0xb6);
+ if (_peek() == '\n') {
+ // Empty line, keep going.
+ _advance();
+ newline(false);
+ continue;
}
- for (int i = 0; i < extra; i++) {
- buf.push_back(0 ^ 0xb6);
+ if (_peek() == '#') {
+ // Comment. Advance to the next line.
+ while (_peek() != '\n' && !_is_at_end()) {
+ _advance();
+ }
+ if (_is_at_end()) {
+ // Reached the end with an empty line, so just dedent as much as needed.
+ pending_indents -= indent_level();
+ indent_stack.clear();
+ return;
+ }
+ _advance(); // Consume '\n'.
+ newline(false);
+ continue;
}
- }
- for (Map<int, Variant>::Element *E = rev_constant_map.front(); E; E = E->next()) {
- int len;
- // Objects cannot be constant, never encode objects
- Error err = encode_variant(E->get(), nullptr, len, false);
- ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
- int pos = buf.size();
- buf.resize(pos + len);
- encode_variant(E->get(), &buf.write[pos], len, false);
- }
+ if (line_continuation || multiline_mode) {
+ // We cleared up all the whitespace at the beginning of the line.
+ // But if this is a continuation or multiline mode and we don't want any indentation change.
+ return;
+ }
- for (Map<int, uint32_t>::Element *E = rev_line_map.front(); E; E = E->next()) {
- uint8_t ibuf[8];
- encode_uint32(E->key(), &ibuf[0]);
- encode_uint32(E->get(), &ibuf[4]);
- for (int i = 0; i < 8; i++) {
- buf.push_back(ibuf[i]);
+ // Check if indentation character is consistent.
+ if (indent_char == '\0') {
+ // First time indenting, choose character now.
+ indent_char = current_indent_char;
+ } else if (current_indent_char != indent_char) {
+ Token error = make_error(vformat("Used \"%c\" for indentation instead \"%c\" as used before in the file.", String(&current_indent_char, 1).c_escape(), String(&indent_char, 1).c_escape()));
+ error.start_line = line;
+ error.start_column = 1;
+ error.leftmost_column = 1;
+ error.rightmost_column = column;
+ push_error(error);
}
- }
- for (int i = 0; i < token_array.size(); i++) {
- uint32_t token = token_array[i];
+ // Now we can do actual indentation changes.
- if (token & ~TOKEN_MASK) {
- uint8_t buf4[4];
- encode_uint32(token_array[i] | TOKEN_BYTE_MASK, &buf4[0]);
- for (int j = 0; j < 4; j++) {
- buf.push_back(buf4[j]);
- }
+ // Check if indent or dedent.
+ int previous_indent = 0;
+ if (indent_level() > 0) {
+ previous_indent = indent_stack.back()->get();
+ }
+ if (indent_count == previous_indent) {
+ // No change in indentation.
+ return;
+ }
+ if (indent_count > previous_indent) {
+ // Indentation increased.
+ indent_stack.push_back(indent_count);
+ pending_indents++;
} else {
- buf.push_back(token);
+ // Indentation decreased (dedent).
+ if (indent_level() == 0) {
+ push_error("Tokenizer bug: trying to dedent without previous indent.");
+ return;
+ }
+ while (indent_level() > 0 && indent_stack.back()->get() > indent_count) {
+ indent_stack.pop_back();
+ pending_indents--;
+ }
+ if ((indent_level() > 0 && indent_stack.back()->get() != indent_count) || (indent_level() == 0 && indent_count != 0)) {
+ // Mismatched indentation alignment.
+ Token error = make_error("Unindent doesn't match the previous indentation level.");
+ error.start_line = line;
+ error.start_column = 1;
+ error.leftmost_column = 1;
+ error.end_column = column + 1;
+ error.rightmost_column = column + 1;
+ push_error(error);
+ // Still, we'll be lenient and keep going, so keep this level in the stack.
+ indent_stack.push_back(indent_count);
+ }
}
+ break; // Get out of the loop in any case.
}
-
- return buf;
}
-GDScriptTokenizerBuffer::Token GDScriptTokenizerBuffer::get_token(int p_offset) const {
- int offset = token + p_offset;
-
- if (offset < 0 || offset >= tokens.size()) {
- return TK_EOF;
+void GDScriptTokenizer::_skip_whitespace() {
+ if (pending_indents != 0) {
+ // Still have some indent/dedent tokens to give.
+ return;
}
- return GDScriptTokenizerBuffer::Token(tokens[offset] & TOKEN_MASK);
-}
-
-StringName GDScriptTokenizerBuffer::get_token_identifier(int p_offset) const {
- int offset = token + p_offset;
+ bool is_bol = column == 1; // Beginning of line.
- ERR_FAIL_INDEX_V(offset, tokens.size(), StringName());
- uint32_t identifier = tokens[offset] >> TOKEN_BITS;
- ERR_FAIL_UNSIGNED_INDEX_V(identifier, (uint32_t)identifiers.size(), StringName());
+ if (is_bol) {
+ check_indent();
+ return;
+ }
- return identifiers[identifier];
+ for (;;) {
+ CharType c = _peek();
+ switch (c) {
+ case ' ':
+ _advance();
+ break;
+ case '\t':
+ _advance();
+ // Consider individual tab columns.
+ column += tab_size - 1;
+ break;
+ case '\r':
+ _advance(); // Consume either way.
+ if (_peek() != '\n') {
+ push_error("Stray carriage return character in source code.");
+ return;
+ }
+ break;
+ case '\n':
+ _advance();
+ newline(!is_bol); // Don't create new line token if line is empty.
+ check_indent();
+ break;
+ case '#':
+ // Comment.
+ while (_peek() != '\n' && !_is_at_end()) {
+ _advance();
+ }
+ if (_is_at_end()) {
+ return;
+ }
+ _advance(); // Consume '\n'
+ newline(!is_bol);
+ check_indent();
+ break;
+ default:
+ return;
+ }
+ }
}
-GDScriptFunctions::Function GDScriptTokenizerBuffer::get_token_built_in_func(int p_offset) const {
- int offset = token + p_offset;
- ERR_FAIL_INDEX_V(offset, tokens.size(), GDScriptFunctions::FUNC_MAX);
- return GDScriptFunctions::Function(tokens[offset] >> TOKEN_BITS);
-}
+GDScriptTokenizer::Token GDScriptTokenizer::scan() {
+ if (has_error()) {
+ return pop_error();
+ }
-Variant::Type GDScriptTokenizerBuffer::get_token_type(int p_offset) const {
- int offset = token + p_offset;
- ERR_FAIL_INDEX_V(offset, tokens.size(), Variant::NIL);
+ _skip_whitespace();
- return Variant::Type(tokens[offset] >> TOKEN_BITS);
-}
+ if (pending_newline) {
+ pending_newline = false;
+ if (!multiline_mode) {
+ // Don't return newline tokens on multine mode.
+ return last_newline;
+ }
+ }
-int GDScriptTokenizerBuffer::get_token_line(int p_offset) const {
- int offset = token + p_offset;
- int pos = lines.find_nearest(offset);
+ // Check for potential errors after skipping whitespace().
+ if (has_error()) {
+ return pop_error();
+ }
- if (pos < 0) {
- return -1;
+ _start = _current;
+ start_line = line;
+ start_column = column;
+ leftmost_column = column;
+ rightmost_column = column;
+
+ if (pending_indents != 0) {
+ // Adjust position for indent.
+ _start -= start_column - 1;
+ start_column = 1;
+ leftmost_column = 1;
+ if (pending_indents > 0) {
+ // Indents.
+ pending_indents--;
+ return make_token(Token::INDENT);
+ } else {
+ // Dedents.
+ pending_indents++;
+ Token dedent = make_token(Token::DEDENT);
+ dedent.end_column += 1;
+ dedent.rightmost_column += 1;
+ return dedent;
+ }
}
- if (pos >= lines.size()) {
- pos = lines.size() - 1;
+
+ if (_is_at_end()) {
+ return make_token(Token::TK_EOF);
}
- uint32_t l = lines.getv(pos);
- return l & TOKEN_LINE_MASK;
-}
+ const CharType c = _advance();
-int GDScriptTokenizerBuffer::get_token_column(int p_offset) const {
- int offset = token + p_offset;
- int pos = lines.find_nearest(offset);
- if (pos < 0) {
- return -1;
- }
- if (pos >= lines.size()) {
- pos = lines.size() - 1;
+ if (c == '\\') {
+ // Line continuation with backslash.
+ if (_peek() == '\r') {
+ if (_peek(1) != '\n') {
+ return make_error("Unexpected carriage return character.");
+ }
+ _advance();
+ }
+ if (_peek() != '\n') {
+ return make_error("Expected new line after \"\\\".");
+ }
+ _advance();
+ newline(false);
+ line_continuation = true;
+ return scan(); // Recurse to get next token.
}
- uint32_t l = lines.getv(pos);
- return l >> TOKEN_LINE_BITS;
-}
+ line_continuation = false;
-int GDScriptTokenizerBuffer::get_token_line_indent(int p_offset) const {
- int offset = token + p_offset;
- ERR_FAIL_INDEX_V(offset, tokens.size(), 0);
- return tokens[offset] >> TOKEN_BITS;
-}
+ if (_is_digit(c)) {
+ return number();
+ } else if (_is_alphanumeric(c)) {
+ return potential_identifier();
+ }
-const Variant &GDScriptTokenizerBuffer::get_token_constant(int p_offset) const {
- int offset = token + p_offset;
- ERR_FAIL_INDEX_V(offset, tokens.size(), nil);
- uint32_t constant = tokens[offset] >> TOKEN_BITS;
- ERR_FAIL_UNSIGNED_INDEX_V(constant, (uint32_t)constants.size(), nil);
- return constants[constant];
-}
+ switch (c) {
+ // String literals.
+ case '"':
+ case '\'':
+ return string();
+
+ // Annotation.
+ case '@':
+ return annotation();
+
+ // Single characters.
+ case '~':
+ return make_token(Token::TILDE);
+ case ',':
+ return make_token(Token::COMMA);
+ case ':':
+ return make_token(Token::COLON);
+ case ';':
+ return make_token(Token::SEMICOLON);
+ case '$':
+ return make_token(Token::DOLLAR);
+ case '?':
+ return make_token(Token::QUESTION_MARK);
+ case '`':
+ return make_token(Token::BACKTICK);
+
+ // Parens.
+ case '(':
+ push_paren('(');
+ return make_token(Token::PARENTHESIS_OPEN);
+ case '[':
+ push_paren('[');
+ return make_token(Token::BRACKET_OPEN);
+ case '{':
+ push_paren('{');
+ return make_token(Token::BRACE_OPEN);
+ case ')':
+ if (!pop_paren('(')) {
+ return make_paren_error(c);
+ }
+ return make_token(Token::PARENTHESIS_CLOSE);
+ case ']':
+ if (!pop_paren('[')) {
+ return make_paren_error(c);
+ }
+ return make_token(Token::BRACKET_CLOSE);
+ case '}':
+ if (!pop_paren('{')) {
+ return make_paren_error(c);
+ }
+ return make_token(Token::BRACE_CLOSE);
+
+ // Double characters.
+ case '!':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::BANG_EQUAL);
+ } else {
+ return make_token(Token::BANG);
+ }
+ case '.':
+ if (_peek() == '.') {
+ _advance();
+ return make_token(Token::PERIOD_PERIOD);
+ } else if (_is_digit(_peek())) {
+ // Number starting with '.'.
+ return number();
+ } else {
+ return make_token(Token::PERIOD);
+ }
+ case '+':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::PLUS_EQUAL);
+ } else {
+ return make_token(Token::PLUS);
+ }
+ case '-':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::MINUS_EQUAL);
+ } else if (_peek() == '>') {
+ _advance();
+ return make_token(Token::FORWARD_ARROW);
+ } else {
+ return make_token(Token::MINUS);
+ }
+ case '*':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::STAR_EQUAL);
+ } else {
+ return make_token(Token::STAR);
+ }
+ case '/':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::SLASH_EQUAL);
+ } else {
+ return make_token(Token::SLASH);
+ }
+ case '%':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::PERCENT_EQUAL);
+ } else {
+ return make_token(Token::PERCENT);
+ }
+ case '^':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::CARET_EQUAL);
+ } else if (_peek() == '"' || _peek() == '\'') {
+ // Node path
+ return string();
+ } else {
+ return make_token(Token::CARET);
+ }
+ case '&':
+ if (_peek() == '&') {
+ _advance();
+ return make_token(Token::AMPERSAND_AMPERSAND);
+ } else if (_peek() == '=') {
+ _advance();
+ return make_token(Token::AMPERSAND_EQUAL);
+ } else if (_peek() == '"' || _peek() == '\'') {
+ // String Name
+ return string();
+ } else {
+ return make_token(Token::AMPERSAND);
+ }
+ case '|':
+ if (_peek() == '|') {
+ _advance();
+ return make_token(Token::PIPE_PIPE);
+ } else if (_peek() == '=') {
+ _advance();
+ return make_token(Token::PIPE_EQUAL);
+ } else {
+ return make_token(Token::PIPE);
+ }
-String GDScriptTokenizerBuffer::get_token_error(int p_offset) const {
- ERR_FAIL_V(String());
-}
+ // Potential VCS conflict markers.
+ case '=':
+ if (_peek() == '=') {
+ return check_vcs_marker('=', Token::EQUAL_EQUAL);
+ } else {
+ return make_token(Token::EQUAL);
+ }
+ case '<':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::LESS_EQUAL);
+ } else if (_peek() == '<') {
+ if (_peek(1) == '=') {
+ _advance();
+ _advance(); // Advance both '<' and '='
+ return make_token(Token::LESS_LESS_EQUAL);
+ } else {
+ return check_vcs_marker('<', Token::LESS_LESS);
+ }
+ } else {
+ return make_token(Token::LESS);
+ }
+ case '>':
+ if (_peek() == '=') {
+ _advance();
+ return make_token(Token::GREATER_EQUAL);
+ } else if (_peek() == '>') {
+ if (_peek(1) == '=') {
+ _advance();
+ _advance(); // Advance both '>' and '='
+ return make_token(Token::GREATER_GREATER_EQUAL);
+ } else {
+ return check_vcs_marker('>', Token::GREATER_GREATER);
+ }
+ } else {
+ return make_token(Token::GREATER);
+ }
-void GDScriptTokenizerBuffer::advance(int p_amount) {
- ERR_FAIL_INDEX(p_amount + token, tokens.size());
- token += p_amount;
+ default:
+ return make_error(vformat(R"(Unknown character "%s".")", String(&c, 1)));
+ }
}
-GDScriptTokenizerBuffer::GDScriptTokenizerBuffer() {
- token = 0;
+GDScriptTokenizer::GDScriptTokenizer() {
+#ifdef TOOLS_ENABLED
+ if (EditorSettings::get_singleton()) {
+ tab_size = EditorSettings::get_singleton()->get_setting("text_editor/indent/size");
+ }
+#endif // TOOLS_ENABLED
}