diff options
author | George Marques <george@gmarqu.es> | 2023-02-09 11:17:37 -0300 |
---|---|---|
committer | George Marques <george@gmarqu.es> | 2023-02-09 11:19:05 -0300 |
commit | 03ea77407c8a1e3d525e367e72d67c5152937747 (patch) | |
tree | 64deba9a55d05c27641f7cf6e48ae286b33f9b39 | |
parent | d02a7bc00d76d50843f1b938914340135060a119 (diff) |
GDScript: Be more lenient with identifiers
- Allow identifiers similar to keywords if they are in ASCII range.
- Allow constants to be treated as regular identifiers.
- Allow keywords that can be used as identifiers in expressions.
8 files changed, 67 insertions, 6 deletions
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index c6e4222213..0393a8c241 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -2145,7 +2145,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr make_completion_context(COMPLETION_IDENTIFIER, nullptr); GDScriptTokenizer::Token token = current; - ParseFunction prefix_rule = get_rule(token.type)->prefix; + GDScriptTokenizer::Token::Type token_type = token.type; + if (token.is_identifier()) { + // Allow keywords that can be treated as identifiers. + token_type = GDScriptTokenizer::Token::IDENTIFIER; + } + ParseFunction prefix_rule = get_rule(token_type)->prefix; if (prefix_rule == nullptr) { // Expected expression. Let the caller give the proper error message. @@ -3010,7 +3015,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p path_state = PATH_STATE_NODE_NAME; } else if (current.is_node_name()) { advance(); - get_node->full_path += previous.get_identifier(); + String identifier = previous.get_identifier(); +#ifdef DEBUG_ENABLED + // Check spoofing. + if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) { + push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier); + } +#endif + get_node->full_path += identifier; path_state = PATH_STATE_NODE_NAME; } else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) { diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index d7f1114fd3..d586380c41 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const { switch (type) { case IDENTIFIER: case MATCH: // Used in String.match(). - case CONST_INF: // Used in Vector{2,3,4}.INF + // Allow constants to be treated as regular identifiers. + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: return true; default: return false; @@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const { case CLASS_NAME: case CLASS: case CONST: + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: case CONTINUE: case ELIF: case ELSE: @@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() { #endif // DEBUG_ENABLED GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { + bool only_ascii = _peek(-1) < 128; + // Consume all identifier characters. while (is_unicode_identifier_continue(_peek())) { - _advance(); + char32_t c = _advance(); + only_ascii = only_ascii && c < 128; } int len = _current - _start; @@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #ifdef DEBUG_ENABLED // Additional checks for identifiers but only in debug and if it's available in TextServer. - if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { + if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { int64_t confusable = TS->is_confusable(name, keyword_list); if (confusable >= 0) { push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable])); diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd new file mode 100644 index 0000000000..390d314b94 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd @@ -0,0 +1,3 @@ +func test(): + var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range. + print(P1) diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out new file mode 100644 index 0000000000..1b47ed10dc --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out @@ -0,0 +1,2 @@ +GDTEST_OK +ok diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd new file mode 100644 index 0000000000..7e1982597c --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd @@ -0,0 +1,16 @@ +func test(): + # The following keywords are allowed as identifiers: + var match = "match" + print(match) + + var PI = "PI" + print(PI) + + var INF = "INF" + print(INF) + + var NAN = "NAN" + print(NAN) + + var TAU = "TAU" + print(TAU) diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out new file mode 100644 index 0000000000..aae2ae13d5 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out @@ -0,0 +1,6 @@ +GDTEST_OK +match +PI +INF +NAN +TAU diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd index e2caac8ffd..41b38c4bba 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd @@ -1,5 +1,12 @@ +extends Node + func test(): var port = 0 # Only latin characters. var pοrt = 1 # The "ο" is Greek omicron. prints(port, pοrt) + +# Do not call this since nodes aren't in the tree. It is just a parser check. +func nodes(): + var _node1 = $port # Only latin characters. + var _node2 = $pοrt # The "ο" is Greek omicron. diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out index c483396443..c189204285 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out @@ -1,6 +1,10 @@ GDTEST_OK >> WARNING ->> Line: 3 +>> Line: 5 +>> CONFUSABLE_IDENTIFIER +>> The identifier "pοrt" has misleading characters and might be confused with something else. +>> WARNING +>> Line: 12 >> CONFUSABLE_IDENTIFIER >> The identifier "pοrt" has misleading characters and might be confused with something else. 0 1 |