summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes2
-rw-r--r--core/core_bind.cpp6
-rw-r--r--core/core_bind.h2
-rw-r--r--core/io/file_access.cpp4
-rw-r--r--core/io/file_access.h2
-rw-r--r--core/string/ustring.cpp10
-rw-r--r--core/string/ustring.h2
-rw-r--r--doc/classes/File.xml5
-rwxr-xr-xmisc/scripts/file_format.sh2
-rw-r--r--platform/android/file_access_filesystem_jandroid.cpp6
-rw-r--r--tests/core/io/test_file_access.h23
-rw-r--r--tests/core/string/test_string.h14
-rw-r--r--tests/data/line_endings_cr.test.txt1
-rw-r--r--tests/data/line_endings_crlf.test.txt4
-rw-r--r--tests/data/line_endings_lf.test.txt4
15 files changed, 74 insertions, 13 deletions
diff --git a/.gitattributes b/.gitattributes
index 45ea6c25e8..8d2fb108ba 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,6 +7,8 @@ thirdparty/* linguist-vendored
* text=auto eol=lf
# Except for bat files, which are Windows only files
*.bat eol=crlf
+# And some test files where the EOL matters
+*.test.txt -text
# The above only works properly for Git 2.10+, so for older versions
# we need to manually list the binary files we don't want modified.
diff --git a/core/core_bind.cpp b/core/core_bind.cpp
index 3f94ff8329..9382e174f1 100644
--- a/core/core_bind.cpp
+++ b/core/core_bind.cpp
@@ -1227,13 +1227,13 @@ Vector<uint8_t> File::get_buffer(int64_t p_length) const {
return data;
}
-String File::get_as_text() const {
+String File::get_as_text(bool p_skip_cr) const {
ERR_FAIL_COND_V_MSG(f.is_null(), String(), "File must be opened before use, or is lacking read-write permission.");
uint64_t original_pos = f->get_position();
const_cast<FileAccess *>(*f)->seek(0);
- String text = f->get_as_utf8_string();
+ String text = f->get_as_utf8_string(p_skip_cr);
const_cast<FileAccess *>(*f)->seek(original_pos);
@@ -1430,7 +1430,7 @@ void File::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_buffer", "length"), &File::get_buffer);
ClassDB::bind_method(D_METHOD("get_line"), &File::get_line);
ClassDB::bind_method(D_METHOD("get_csv_line", "delim"), &File::get_csv_line, DEFVAL(","));
- ClassDB::bind_method(D_METHOD("get_as_text"), &File::get_as_text);
+ ClassDB::bind_method(D_METHOD("get_as_text", "skip_cr"), &File::get_as_text, DEFVAL(false));
ClassDB::bind_method(D_METHOD("get_md5", "path"), &File::get_md5);
ClassDB::bind_method(D_METHOD("get_sha256", "path"), &File::get_sha256);
ClassDB::bind_method(D_METHOD("is_big_endian"), &File::is_big_endian);
diff --git a/core/core_bind.h b/core/core_bind.h
index 3a4faa3422..7564bdc7e3 100644
--- a/core/core_bind.h
+++ b/core/core_bind.h
@@ -411,7 +411,7 @@ public:
Vector<uint8_t> get_buffer(int64_t p_length) const; // Get an array of bytes.
String get_line() const;
Vector<String> get_csv_line(const String &p_delim = ",") const;
- String get_as_text() const;
+ String get_as_text(bool p_skip_cr = false) const;
String get_md5(const String &p_path) const;
String get_sha256(const String &p_path) const;
diff --git a/core/io/file_access.cpp b/core/io/file_access.cpp
index da25f23917..8ed3d40c22 100644
--- a/core/io/file_access.cpp
+++ b/core/io/file_access.cpp
@@ -377,7 +377,7 @@ uint64_t FileAccess::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
return i;
}
-String FileAccess::get_as_utf8_string() const {
+String FileAccess::get_as_utf8_string(bool p_skip_cr) const {
Vector<uint8_t> sourcef;
uint64_t len = get_length();
sourcef.resize(len + 1);
@@ -388,7 +388,7 @@ String FileAccess::get_as_utf8_string() const {
w[len] = 0;
String s;
- s.parse_utf8((const char *)w);
+ s.parse_utf8((const char *)w, -1, p_skip_cr);
return s;
}
diff --git a/core/io/file_access.h b/core/io/file_access.h
index e2c11142d7..3386800686 100644
--- a/core/io/file_access.h
+++ b/core/io/file_access.h
@@ -113,7 +113,7 @@ public:
virtual String get_line() const;
virtual String get_token() const;
virtual Vector<String> get_csv_line(const String &p_delim = ",") const;
- virtual String get_as_utf8_string() const;
+ virtual String get_as_utf8_string(bool p_skip_cr = false) const;
/**
* Use this for files WRITTEN in _big_ endian machines (ie, amiga/mac)
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index beefe54faf..e93375bff7 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -1656,7 +1656,7 @@ String String::utf8(const char *p_utf8, int p_len) {
return ret;
}
-Error String::parse_utf8(const char *p_utf8, int p_len) {
+Error String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
if (!p_utf8) {
return ERR_INVALID_DATA;
}
@@ -1689,6 +1689,10 @@ Error String::parse_utf8(const char *p_utf8, int p_len) {
uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ ptrtmp++;
+ continue;
+ }
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
skip = 0;
@@ -1753,6 +1757,10 @@ Error String::parse_utf8(const char *p_utf8, int p_len) {
uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
if (skip == 0) {
+ if (p_skip_cr && c == '\r') {
+ p_utf8++;
+ continue;
+ }
/* Determine the number of characters in sequence */
if ((c & 0x80) == 0) {
*(dst++) = c;
diff --git a/core/string/ustring.h b/core/string/ustring.h
index 7672663964..6c3169f136 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -377,7 +377,7 @@ public:
CharString ascii(bool p_allow_extended = false) const;
CharString utf8() const;
- Error parse_utf8(const char *p_utf8, int p_len = -1);
+ Error parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false);
static String utf8(const char *p_utf8, int p_len = -1);
Char16String utf16() const;
diff --git a/doc/classes/File.xml b/doc/classes/File.xml
index 0b4a8fa46e..3a2776ff21 100644
--- a/doc/classes/File.xml
+++ b/doc/classes/File.xml
@@ -115,9 +115,10 @@
</method>
<method name="get_as_text" qualifiers="const">
<return type="String" />
+ <argument index="0" name="skip_cr" type="bool" default="false" />
<description>
- Returns the whole file as a [String].
- Text is interpreted as being UTF-8 encoded.
+ Returns the whole file as a [String]. Text is interpreted as being UTF-8 encoded.
+ If [code]skip_cr[/code] is [code]true[/code], carriage return characters ([code]\r[/code], CR) will be ignored when parsing the UTF-8, so that only line feed characters ([code]\n[/code], LF) represent a new line (Unix convention).
</description>
</method>
<method name="get_buffer" qualifiers="const">
diff --git a/misc/scripts/file_format.sh b/misc/scripts/file_format.sh
index c767d3f8a0..731b3ee005 100755
--- a/misc/scripts/file_format.sh
+++ b/misc/scripts/file_format.sh
@@ -37,6 +37,8 @@ while IFS= read -rd '' f; do
continue
elif [[ "$f" == *"-so_wrap."* ]]; then
continue
+ elif [[ "$f" == *".test.txt" ]]; then
+ continue
fi
# Ensure that files are UTF-8 formatted.
recode UTF-8 "$f" 2> /dev/null
diff --git a/platform/android/file_access_filesystem_jandroid.cpp b/platform/android/file_access_filesystem_jandroid.cpp
index 733d92f741..6b21c18d59 100644
--- a/platform/android/file_access_filesystem_jandroid.cpp
+++ b/platform/android/file_access_filesystem_jandroid.cpp
@@ -29,9 +29,11 @@
/*************************************************************************/
#include "file_access_filesystem_jandroid.h"
+
#include "core/os/os.h"
#include "core/templates/local_vector.h"
#include "thread_jandroid.h"
+
#include <unistd.h>
jobject FileAccessFilesystemJAndroid::file_access_handler = nullptr;
@@ -198,7 +200,7 @@ String FileAccessFilesystemJAndroid::get_line() const {
if (elem == '\n' || elem == '\0') {
// Found the end of the line
const_cast<FileAccessFilesystemJAndroid *>(this)->seek(start_position + line_buffer_position + 1);
- if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position)) {
+ if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) {
return String();
}
return result;
@@ -206,7 +208,7 @@ String FileAccessFilesystemJAndroid::get_line() const {
}
}
- if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position)) {
+ if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) {
return String();
}
return result;
diff --git a/tests/core/io/test_file_access.h b/tests/core/io/test_file_access.h
index f0e1cceacf..aab62955cb 100644
--- a/tests/core/io/test_file_access.h
+++ b/tests/core/io/test_file_access.h
@@ -78,6 +78,29 @@ TEST_CASE("[FileAccess] CSV read") {
CHECK(row5[1] == "tab separated");
CHECK(row5[2] == "lines, good?");
}
+
+TEST_CASE("[FileAccess] Get as UTF-8 String") {
+ Ref<FileAccess> f_lf = FileAccess::open(TestUtils::get_data_path("line_endings_lf.test.txt"), FileAccess::READ);
+ String s_lf = f_lf->get_as_utf8_string();
+ f_lf->seek(0);
+ String s_lf_nocr = f_lf->get_as_utf8_string(true);
+ CHECK(s_lf == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n");
+ CHECK(s_lf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n");
+
+ Ref<FileAccess> f_crlf = FileAccess::open(TestUtils::get_data_path("line_endings_crlf.test.txt"), FileAccess::READ);
+ String s_crlf = f_crlf->get_as_utf8_string();
+ f_crlf->seek(0);
+ String s_crlf_nocr = f_crlf->get_as_utf8_string(true);
+ CHECK(s_crlf == "Hello darkness\r\nMy old friend\r\nI've come to talk\r\nWith you again\r\n");
+ CHECK(s_crlf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n");
+
+ Ref<FileAccess> f_cr = FileAccess::open(TestUtils::get_data_path("line_endings_cr.test.txt"), FileAccess::READ);
+ String s_cr = f_cr->get_as_utf8_string();
+ f_cr->seek(0);
+ String s_cr_nocr = f_cr->get_as_utf8_string(true);
+ CHECK(s_cr == "Hello darkness\rMy old friend\rI've come to talk\rWith you again\r");
+ CHECK(s_cr_nocr == "Hello darknessMy old friendI've come to talkWith you again");
+}
} // namespace TestFileAccess
#endif // TEST_FILE_ACCESS_H
diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h
index 0c5704d6c9..b8b766023a 100644
--- a/tests/core/string/test_string.h
+++ b/tests/core/string/test_string.h
@@ -152,6 +152,20 @@ TEST_CASE("[String] UTF16 with BOM") {
CHECK(String::utf16(cs) == s);
}
+TEST_CASE("[String] UTF8 with CR") {
+ const String base = U"Hello darkness\r\nMy old friend\nI've come to talk\rWith you again";
+
+ String keep_cr;
+ Error err = keep_cr.parse_utf8(base.utf8().get_data());
+ CHECK(err == OK);
+ CHECK(keep_cr == base);
+
+ String no_cr;
+ err = no_cr.parse_utf8(base.utf8().get_data(), -1, true); // Skip CR.
+ CHECK(err == OK);
+ CHECK(no_cr == base.replace("\r", ""));
+}
+
TEST_CASE("[String] Invalid UTF8 (non-standard)") {
ERR_PRINT_OFF
static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 };
diff --git a/tests/data/line_endings_cr.test.txt b/tests/data/line_endings_cr.test.txt
new file mode 100644
index 0000000000..556154bb25
--- /dev/null
+++ b/tests/data/line_endings_cr.test.txt
@@ -0,0 +1 @@
+Hello darkness My old friend I've come to talk With you again \ No newline at end of file
diff --git a/tests/data/line_endings_crlf.test.txt b/tests/data/line_endings_crlf.test.txt
new file mode 100644
index 0000000000..a3cbe55b7f
--- /dev/null
+++ b/tests/data/line_endings_crlf.test.txt
@@ -0,0 +1,4 @@
+Hello darkness
+My old friend
+I've come to talk
+With you again
diff --git a/tests/data/line_endings_lf.test.txt b/tests/data/line_endings_lf.test.txt
new file mode 100644
index 0000000000..0aabcd911e
--- /dev/null
+++ b/tests/data/line_endings_lf.test.txt
@@ -0,0 +1,4 @@
+Hello darkness
+My old friend
+I've come to talk
+With you again