14 files changed, 158 insertions, 27 deletions
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index b34d9f3271..1b3b070592 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -1157,6 +1157,14 @@ Vector<String> String::split_spaces() const {
 
 Vector<String> String::split(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const {
 	Vector<String> ret;
+
+	if (is_empty()) {
+		if (p_allow_empty) {
+			ret.push_back("");
+		}
+		return ret;
+	}
+
 	int from = 0;
 	int len = length();
 
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp
index c6e4222213..42e1f27603 100644
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -483,24 +483,34 @@ void GDScriptParser::parse_program() {
 	current_class = head;
 	bool can_have_class_or_extends = true;
 
-	while (match(GDScriptTokenizer::Token::ANNOTATION)) {
-		AnnotationNode *annotation = parse_annotation(AnnotationInfo::SCRIPT | AnnotationInfo::STANDALONE | AnnotationInfo::CLASS_LEVEL);
-		if (annotation != nullptr) {
-			if (annotation->applies_to(AnnotationInfo::SCRIPT)) {
-				// `@icon` needs to be applied in the parser. See GH-72444.
-				if (annotation->name == SNAME("@icon")) {
-					annotation->apply(this, head);
+	while (!check(GDScriptTokenizer::Token::TK_EOF)) {
+		if (match(GDScriptTokenizer::Token::ANNOTATION)) {
+			AnnotationNode *annotation = parse_annotation(AnnotationInfo::SCRIPT | AnnotationInfo::STANDALONE | AnnotationInfo::CLASS_LEVEL);
+			if (annotation != nullptr) {
+				if (annotation->applies_to(AnnotationInfo::SCRIPT)) {
+					// `@icon` needs to be applied in the parser. See GH-72444.
+					if (annotation->name == SNAME("@icon")) {
+						annotation->apply(this, head);
+					} else {
+						head->annotations.push_back(annotation);
+					}
 				} else {
-					head->annotations.push_back(annotation);
+					annotation_stack.push_back(annotation);
+					// This annotation must appear after script-level annotations
+					// and class_name/extends (ex: could be @onready or @export),
+					// so we stop looking for script-level stuff.
+					can_have_class_or_extends = false;
+					break;
 				}
-			} else {
-				annotation_stack.push_back(annotation);
-				// This annotation must appear after script-level annotations
-				// and class_name/extends (ex: could be @onready or @export),
-				// so we stop looking for script-level stuff.
-				can_have_class_or_extends = false;
-				break;
 			}
+		} else if (check(GDScriptTokenizer::Token::LITERAL) && current.literal.get_type() == Variant::STRING) {
+			// Allow strings in class body as multiline comments.
+			advance();
+			if (!match(GDScriptTokenizer::Token::NEWLINE)) {
+				push_error("Expected newline after comment string.");
+			}
+		} else {
+			break;
 		}
 	}
 
@@ -524,6 +534,16 @@ void GDScriptParser::parse_program() {
 					end_statement("superclass");
 				}
 				break;
+			case GDScriptTokenizer::Token::LITERAL:
+				if (current.literal.get_type() == Variant::STRING) {
+					// Allow strings in class body as multiline comments.
+					advance();
+					if (!match(GDScriptTokenizer::Token::NEWLINE)) {
+						push_error("Expected newline after comment string.");
+					}
+					break;
+				}
+				[[fallthrough]];
 			default:
 				// No tokens are allowed between script annotations and class/extends.
 				can_have_class_or_extends = false;
@@ -829,6 +849,16 @@ void GDScriptParser::parse_class_body(bool p_is_multiline) {
 			case GDScriptTokenizer::Token::DEDENT:
 				class_end = true;
 				break;
+			case GDScriptTokenizer::Token::LITERAL:
+				if (current.literal.get_type() == Variant::STRING) {
+					// Allow strings in class body as multiline comments.
+					advance();
+					if (!match(GDScriptTokenizer::Token::NEWLINE)) {
+						push_error("Expected newline after comment string.");
+					}
+					break;
+				}
+				[[fallthrough]];
 			default:
 				// Display a completion with identifiers.
 				make_completion_context(COMPLETION_IDENTIFIER, nullptr);
@@ -1675,6 +1705,12 @@ GDScriptParser::Node *GDScriptParser::parse_statement() {
 						// Standalone lambdas can't be used, so make this an error.
 						push_error("Standalone lambdas cannot be accessed. Consider assigning it to a variable.", expression);
 						break;
+					case Node::LITERAL:
+						if (static_cast<GDScriptParser::LiteralNode *>(expression)->value.get_type() == Variant::STRING) {
+							// Allow strings as multiline comments.
+							break;
+						}
+						[[fallthrough]];
 					default:
 						push_warning(expression, GDScriptWarning::STANDALONE_EXPRESSION);
 				}
@@ -2145,7 +2181,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr
 	make_completion_context(COMPLETION_IDENTIFIER, nullptr);
 
 	GDScriptTokenizer::Token token = current;
-	ParseFunction prefix_rule = get_rule(token.type)->prefix;
+	GDScriptTokenizer::Token::Type token_type = token.type;
+	if (token.is_identifier()) {
+		// Allow keywords that can be treated as identifiers.
+		token_type = GDScriptTokenizer::Token::IDENTIFIER;
+	}
+	ParseFunction prefix_rule = get_rule(token_type)->prefix;
 
 	if (prefix_rule == nullptr) {
 		// Expected expression. Let the caller give the proper error message.
@@ -3010,7 +3051,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p
 			path_state = PATH_STATE_NODE_NAME;
 		} else if (current.is_node_name()) {
 			advance();
-			get_node->full_path += previous.get_identifier();
+			String identifier = previous.get_identifier();
+#ifdef DEBUG_ENABLED
+			// Check spoofing.
+			if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) {
+				push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier);
+			}
+#endif
+			get_node->full_path += identifier;
 
 			path_state = PATH_STATE_NODE_NAME;
 		} else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) {
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp
index d7f1114fd3..d586380c41 100644
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const {
 	switch (type) {
 		case IDENTIFIER:
 		case MATCH: // Used in String.match().
-		case CONST_INF: // Used in Vector{2,3,4}.INF
+		// Allow constants to be treated as regular identifiers.
+		case CONST_PI:
+		case CONST_INF:
+		case CONST_NAN:
+		case CONST_TAU:
 			return true;
 		default:
 			return false;
@@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const {
 		case CLASS_NAME:
 		case CLASS:
 		case CONST:
+		case CONST_PI:
+		case CONST_INF:
+		case CONST_NAN:
+		case CONST_TAU:
 		case CONTINUE:
 		case ELIF:
 		case ELSE:
@@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() {
 #endif // DEBUG_ENABLED
 
 GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
+	bool only_ascii = _peek(-1) < 128;
+
 	// Consume all identifier characters.
 	while (is_unicode_identifier_continue(_peek())) {
-		_advance();
+		char32_t c = _advance();
+		only_ascii = only_ascii && c < 128;
 	}
 
 	int len = _current - _start;
@@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
 
 #ifdef DEBUG_ENABLED
 	// Additional checks for identifiers but only in debug and if it's available in TextServer.
-	if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
+	if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
 		int64_t confusable = TS->is_confusable(name, keyword_list);
 		if (confusable >= 0) {
 			push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable]));
diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd
new file mode 100644
index 0000000000..390d314b94
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd
@@ -0,0 +1,3 @@
+func test():
+	var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range.
+	print(P1)
diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out
new file mode 100644
index 0000000000..1b47ed10dc
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out
@@ -0,0 +1,2 @@
+GDTEST_OK
+ok
diff --git a/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd
new file mode 100644
index 0000000000..3ecd65ad9c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.gd
@@ -0,0 +1,21 @@
+"""
+This is a comment.
+"""
+
+@tool
+
+"""
+This is also a comment.
+"""
+
+extends RefCounted
+
+'''
+This is a comment too.
+'''
+
+func test():
+	"""
+	This too is a comment.
+	"""
+	print("ok")
diff --git a/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out
new file mode 100644
index 0000000000..1b47ed10dc
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allow_strings_as_comments.out
@@ -0,0 +1,2 @@
+GDTEST_OK
+ok
diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd
new file mode 100644
index 0000000000..7e1982597c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd
@@ -0,0 +1,16 @@
+func test():
+	# The following keywords are allowed as identifiers:
+	var match = "match"
+	print(match)
+
+	var PI = "PI"
+	print(PI)
+
+	var INF = "INF"
+	print(INF)
+
+	var NAN = "NAN"
+	print(NAN)
+
+	var TAU = "TAU"
+	print(TAU)
diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out
new file mode 100644
index 0000000000..aae2ae13d5
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out
@@ -0,0 +1,6 @@
+GDTEST_OK
+match
+PI
+INF
+NAN
+TAU
diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd
index e2caac8ffd..41b38c4bba 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd
+++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd
@@ -1,5 +1,12 @@
+extends Node
+
 func test():
 	var port = 0 # Only latin characters.
 	var pοrt = 1 # The "ο" is Greek omicron.
 
 	prints(port, pοrt)
+
+# Do not call this since nodes aren't in the tree. It is just a parser check.
+func nodes():
+	var _node1 = $port # Only latin characters.
+	var _node2 = $pοrt # The "ο" is Greek omicron.
diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out
index c483396443..c189204285 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out
+++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out
@@ -1,6 +1,10 @@
 GDTEST_OK
 >> WARNING
->> Line: 3
+>> Line: 5
+>> CONFUSABLE_IDENTIFIER
+>> The identifier "pοrt" has misleading characters and might be confused with something else.
+>> WARNING
+>> Line: 12
 >> CONFUSABLE_IDENTIFIER
 >> The identifier "pοrt" has misleading characters and might be confused with something else.
 0 1
diff --git a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd
index 18ea260fa2..dc4223ec2d 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd
+++ b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.gd
@@ -1,6 +1,5 @@
 func test():
 	# The following statements should all be reported as standalone expressions:
-	"This is a standalone expression"
 	1234
 	0.0 + 0.0
 	Color(1, 1, 1)
diff --git a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out
index 99ec87438e..a2c67a6e51 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out
+++ b/modules/gdscript/tests/scripts/parser/warnings/standalone_expression.out
@@ -8,14 +8,10 @@ GDTEST_OK
 >> STANDALONE_EXPRESSION
 >> Standalone expression (the line has no effect).
 >> WARNING
->> Line: 5
+>> Line: 6
 >> STANDALONE_EXPRESSION
 >> Standalone expression (the line has no effect).
 >> WARNING
 >> Line: 7
 >> STANDALONE_EXPRESSION
 >> Standalone expression (the line has no effect).
->> WARNING
->> Line: 8
->> STANDALONE_EXPRESSION
->> Standalone expression (the line has no effect).
diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h
index 0cf3448a48..5d19b5a164 100644
--- a/tests/core/string/test_string.h
+++ b/tests/core/string/test_string.h
@@ -512,6 +512,14 @@ TEST_CASE("[String] Splitting") {
 		CHECK(l[i] == slices_3[i]);
 	}
 
+	s = "";
+	l = s.split();
+	CHECK(l.size() == 1);
+	CHECK(l[0] == "");
+
+	l = s.split("", false);
+	CHECK(l.size() == 0);
+
 	s = "Mars Jupiter Saturn Uranus";
 	const char *slices_s[4] = { "Mars", "Jupiter", "Saturn", "Uranus" };
 	l = s.split_spaces();