diff options
14 files changed, 158 insertions, 27 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index b34d9f3271..1b3b070592 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -1157,6 +1157,14 @@ Vector<String> String::split_spaces() const { Vector<String> String::split(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const { Vector<String> ret; + + if (is_empty()) { + if (p_allow_empty) { + ret.push_back(""); + } + return ret; + } + int from = 0; int len = length(); diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index c6e4222213..42e1f27603 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -483,24 +483,34 @@ void GDScriptParser::parse_program() { current_class = head; bool can_have_class_or_extends = true; - while (match(GDScriptTokenizer::Token::ANNOTATION)) { - AnnotationNode *annotation = parse_annotation(AnnotationInfo::SCRIPT | AnnotationInfo::STANDALONE | AnnotationInfo::CLASS_LEVEL); - if (annotation != nullptr) { - if (annotation->applies_to(AnnotationInfo::SCRIPT)) { - // `@icon` needs to be applied in the parser. See GH-72444. - if (annotation->name == SNAME("@icon")) { - annotation->apply(this, head); + while (!check(GDScriptTokenizer::Token::TK_EOF)) { + if (match(GDScriptTokenizer::Token::ANNOTATION)) { + AnnotationNode *annotation = parse_annotation(AnnotationInfo::SCRIPT | AnnotationInfo::STANDALONE | AnnotationInfo::CLASS_LEVEL); + if (annotation != nullptr) { + if (annotation->applies_to(AnnotationInfo::SCRIPT)) { + // `@icon` needs to be applied in the parser. See GH-72444. + if (annotation->name == SNAME("@icon")) { + annotation->apply(this, head); + } else { + head->annotations.push_back(annotation); + } } else { - head->annotations.push_back(annotation); + annotation_stack.push_back(annotation); + // This annotation must appear after script-level annotations + // and class_name/extends (ex: could be @onready or @export), + // so we stop looking for script-level stuff. + can_have_class_or_extends = false; + break; } - } else { - annotation_stack.push_back(annotation); - // This annotation must appear after script-level annotations - // and class_name/extends (ex: could be @onready or @export), - // so we stop looking for script-level stuff. - can_have_class_or_extends = false; - break; } + } else if (check(GDScriptTokenizer::Token::LITERAL) && current.literal.get_type() == Variant::STRING) { + // Allow strings in class body as multiline comments. + advance(); + if (!match(GDScriptTokenizer::Token::NEWLINE)) { + push_error("Expected newline after comment string."); + } + } else { + break; } } @@ -524,6 +534,16 @@ void GDScriptParser::parse_program() { end_statement("superclass"); } break; + case GDScriptTokenizer::Token::LITERAL: + if (current.literal.get_type() == Variant::STRING) { + // Allow strings in class body as multiline comments. + advance(); + if (!match(GDScriptTokenizer::Token::NEWLINE)) { + push_error("Expected newline after comment string."); + } + break; + } + [[fallthrough]]; default: // No tokens are allowed between script annotations and class/extends. can_have_class_or_extends = false; @@ -829,6 +849,16 @@ void GDScriptParser::parse_class_body(bool p_is_multiline) { case GDScriptTokenizer::Token::DEDENT: class_end = true; break; + case GDScriptTokenizer::Token::LITERAL: + if (current.literal.get_type() == Variant::STRING) { + // Allow strings in class body as multiline comments. + advance(); + if (!match(GDScriptTokenizer::Token::NEWLINE)) { + push_error("Expected newline after comment string."); + } + break; + } + [[fallthrough]]; default: // Display a completion with identifiers. make_completion_context(COMPLETION_IDENTIFIER, nullptr); @@ -1675,6 +1705,12 @@ GDScriptParser::Node *GDScriptParser::parse_statement() { // Standalone lambdas can't be used, so make this an error. push_error("Standalone lambdas cannot be accessed. Consider assigning it to a variable.", expression); break; + case Node::LITERAL: + if (static_cast<GDScriptParser::LiteralNode *>(expression)->value.get_type() == Variant::STRING) { + // Allow strings as multiline comments. + break; + } + [[fallthrough]]; default: push_warning(expression, GDScriptWarning::STANDALONE_EXPRESSION); } @@ -2145,7 +2181,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr make_completion_context(COMPLETION_IDENTIFIER, nullptr); GDScriptTokenizer::Token token = current; - ParseFunction prefix_rule = get_rule(token.type)->prefix; + GDScriptTokenizer::Token::Type token_type = token.type; + if (token.is_identifier()) { + // Allow keywords that can be treated as identifiers. + token_type = GDScriptTokenizer::Token::IDENTIFIER; + } + ParseFunction prefix_rule = get_rule(token_type)->prefix; if (prefix_rule == nullptr) { // Expected expression. Let the caller give the proper error message. @@ -3010,7 +3051,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p path_state = PATH_STATE_NODE_NAME; } else if (current.is_node_name()) { advance(); - get_node->full_path += previous.get_identifier(); + String identifier = previous.get_identifier(); +#ifdef DEBUG_ENABLED + // Check spoofing. + if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) { + push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier); + } +#endif + get_node->full_path += identifier; path_state = PATH_STATE_NODE_NAME; } else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) { diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index d7f1114fd3..d586380c41 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const { switch (type) { case IDENTIFIER: case MATCH: // Used in String.match(). - case CONST_INF: // Used in Vector{2,3,4}.INF + // Allow constants to be treated as regular identifiers. + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: return true; default: return false; @@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const { case CLASS_NAME: case CLASS: case CONST: + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: case CONTINUE: case ELIF: case ELSE: @@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() { #endif // DEBUG_ENABLED GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { + bool only_ascii = _peek(-1) < 128; + // Consume all identifier characters. while (is_unicode_identifier_continue(_peek())) { - _advance(); + char32_t c = _advance(); + only_ascii = only_ascii && c < 128; } int len = _current - _start; @@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #ifdef DEBUG_ENABLED // Additional checks for identifiers but only in debug and if it's available in TextServer. - if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { + if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { int64_t confusable = TS->is_confusable(name, keyword_list); if (confusable >= 0) { push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable])); diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd new file mode 100644 index 0000000000..390d314b94 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd @@ -0,0 +1,3 @@ +func test(): + var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range. + print(P1) diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out new file mode 100644 index 0000000000..1b47ed10dc --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out @@ -0,0 +1,2 @@ +GDTEST_OK +ok diff --git a/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd new file mode 100644 index 0000000000..3ecd65ad9c --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd @@ -0,0 +1,21 @@ +""" +This is a comment. +""" + +@tool + +""" +This is also a comment. +""" + +extends RefCounted + +''' +This is a comment too. +''' + +func test(): + """ + This too is a comment. + """ + print("ok") diff --git a/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out new file mode 100644 index 0000000000..1b47ed10dc --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out @@ -0,0 +1,2 @@ +GDTEST_OK +ok diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd new file mode 100644 index 0000000000..7e1982597c --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd @@ -0,0 +1,16 @@ +func test(): + # The following keywords are allowed as identifiers: + var match = "match" + print(match) + + var PI = "PI" + print(PI) + + var INF = "INF" + print(INF) + + var NAN = "NAN" + print(NAN) + + var TAU = "TAU" + print(TAU) diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out new file mode 100644 index 0000000000..aae2ae13d5 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out @@ -0,0 +1,6 @@ +GDTEST_OK +match +PI +INF +NAN +TAU diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd index e2caac8ffd..41b38c4bba 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd @@ -1,5 +1,12 @@ +extends Node + func test(): var port = 0 # Only latin characters. var pοrt = 1 # The "ο" is Greek omicron. prints(port, pοrt) + +# Do not call this since nodes aren't in the tree. It is just a parser check. +func nodes(): + var _node1 = $port # Only latin characters. + var _node2 = $pοrt # The "ο" is Greek omicron. diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out index c483396443..c189204285 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out @@ -1,6 +1,10 @@ GDTEST_OK >> WARNING ->> Line: 3 +>> Line: 5 +>> CONFUSABLE_IDENTIFIER +>> The identifier "pοrt" has misleading characters and might be confused with something else. +>> WARNING +>> Line: 12 >> CONFUSABLE_IDENTIFIER >> The identifier "pοrt" has misleading characters and might be confused with something else. 0 1 diff --git a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd index 18ea260fa2..dc4223ec2d 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd +++ b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd @@ -1,6 +1,5 @@ func test(): # The following statements should all be reported as standalone expressions: - "This is a standalone expression" 1234 0.0 + 0.0 Color(1, 1, 1) diff --git a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out index 99ec87438e..a2c67a6e51 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out +++ b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out @@ -8,14 +8,10 @@ GDTEST_OK >> STANDALONE_EXPRESSION >> Standalone expression (the line has no effect). >> WARNING ->> Line: 5 +>> Line: 6 >> STANDALONE_EXPRESSION >> Standalone expression (the line has no effect). >> WARNING >> Line: 7 >> STANDALONE_EXPRESSION >> Standalone expression (the line has no effect). ->> WARNING ->> Line: 8 ->> STANDALONE_EXPRESSION ->> Standalone expression (the line has no effect). diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h index 0cf3448a48..5d19b5a164 100644 --- a/tests/core/string/test_string.h +++ b/tests/core/string/test_string.h @@ -512,6 +512,14 @@ TEST_CASE("[String] Splitting") { CHECK(l[i] == slices_3[i]); } + s = ""; + l = s.split(); + CHECK(l.size() == 1); + CHECK(l[0] == ""); + + l = s.split("", false); + CHECK(l.size() == 0); + s = "Mars Jupiter Saturn Uranus"; const char *slices_s[4] = { "Mars", "Jupiter", "Saturn", "Uranus" }; l = s.split_spaces(); |