diff options
Diffstat (limited to 'modules/gdscript/gdscript_tokenizer.cpp')
-rw-r--r-- | modules/gdscript/gdscript_tokenizer.cpp | 77 |
1 files changed, 63 insertions, 14 deletions
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index e17a804003..d586380c41 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -31,10 +31,14 @@ #include "gdscript_tokenizer.h" #include "core/error/error_macros.h" +#include "core/string/char_utils.h" #ifdef TOOLS_ENABLED #include "editor/editor_settings.h" #endif +#ifdef DEBUG_ENABLED +#include "servers/text_server.h" +#endif static const char *token_names[] = { "Empty", // EMPTY, @@ -164,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const { switch (type) { case IDENTIFIER: case MATCH: // Used in String.match(). - case CONST_INF: // Used in Vector{2,3,4}.INF + // Allow constants to be treated as regular identifiers. + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: return true; default: return false; @@ -184,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const { case CLASS_NAME: case CLASS: case CONST: + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: case CONTINUE: case ELIF: case ELSE: @@ -435,10 +447,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To } GDScriptTokenizer::Token GDScriptTokenizer::annotation() { - if (!is_ascii_identifier_char(_peek())) { + if (is_unicode_identifier_start(_peek())) { + _advance(); // Consume start character. + } else { push_error("Expected annotation identifier after \"@\"."); } - while (is_ascii_identifier_char(_peek())) { + while (is_unicode_identifier_continue(_peek())) { // Consume all identifier characters. _advance(); } @@ -447,7 +461,6 @@ GDScriptTokenizer::Token GDScriptTokenizer::annotation() { return annotation; } -GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ KEYWORD_GROUP('a') \ KEYWORD("as", Token::AS) \ @@ -512,9 +525,25 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #define MIN_KEYWORD_LENGTH 2 #define MAX_KEYWORD_LENGTH 10 - // Consume all alphanumeric characters. - while (is_ascii_identifier_char(_peek())) { - _advance(); +#ifdef DEBUG_ENABLED +void GDScriptTokenizer::make_keyword_list() { +#define KEYWORD_LINE(keyword, token_type) keyword, +#define KEYWORD_GROUP_IGNORE(group) + keyword_list = { + KEYWORDS(KEYWORD_GROUP_IGNORE, KEYWORD_LINE) + }; +#undef KEYWORD_LINE +#undef KEYWORD_GROUP_IGNORE +} +#endif // DEBUG_ENABLED + +GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { + bool only_ascii = _peek(-1) < 128; + + // Consume all identifier characters. + while (is_unicode_identifier_continue(_peek())) { + char32_t c = _advance(); + only_ascii = only_ascii && c < 128; } int len = _current - _start; @@ -565,15 +594,28 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { } // Not a keyword, so must be an identifier. - return make_identifier(name); + Token id = make_identifier(name); + +#ifdef DEBUG_ENABLED + // Additional checks for identifiers but only in debug and if it's available in TextServer. + if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { + int64_t confusable = TS->is_confusable(name, keyword_list); + if (confusable >= 0) { + push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable])); + } + } +#endif // DEBUG_ENABLED + + return id; -#undef KEYWORDS -#undef MIN_KEYWORD_LENGTH -#undef MAX_KEYWORD_LENGTH #undef KEYWORD_GROUP_CASE #undef KEYWORD } +#undef MAX_KEYWORD_LENGTH +#undef MIN_KEYWORD_LENGTH +#undef KEYWORDS + void GDScriptTokenizer::newline(bool p_make_token) { // Don't overwrite previous newline, nor create if we want a line continuation. if (p_make_token && !pending_newline && !line_continuation) { @@ -720,7 +762,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() { error.rightmost_column = column + 1; push_error(error); has_error = true; - } else if (is_ascii_identifier_char(_peek())) { + } else if (is_unicode_identifier_start(_peek()) || is_unicode_identifier_continue(_peek())) { // Letter at the end of the number. push_error("Invalid numeric notation."); } @@ -1311,7 +1353,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { if (is_digit(c)) { return number(); - } else if (is_ascii_identifier_char(c)) { + } else if (is_unicode_identifier_start(c)) { return potential_identifier(); } @@ -1504,7 +1546,11 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() { } default: - return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1))); + if (is_whitespace(c)) { + return make_error(vformat(R"(Invalid white space character "\\u%X".)", static_cast<int32_t>(c))); + } else { + return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1))); + } } } @@ -1514,4 +1560,7 @@ GDScriptTokenizer::GDScriptTokenizer() { tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size"); } #endif // TOOLS_ENABLED +#ifdef DEBUG_ENABLED + make_keyword_list(); +#endif // DEBUG_ENABLED } |