From 1418f97c70a5551bdbfeea853cbc479b32ea9e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sun, 31 Jul 2022 18:46:53 +0200 Subject: File: Re-add support to skip CR (`\r`) in `File::get_as_text` This was removed in #63481, and we confirmed that it's better like this, but we add back the possibility to strip CR as an option, to optionally restore the previous behavior. For performance this is done directly in `String::parse_utf8`. Also fixes Android `FileAccess::get_line()` as this one _should_ strip CR. Supersedes #63717. --- tests/core/io/test_file_access.h | 23 +++++++++++++++++++++++ tests/core/string/test_string.h | 14 ++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'tests/core') diff --git a/tests/core/io/test_file_access.h b/tests/core/io/test_file_access.h index f0e1cceacf..aab62955cb 100644 --- a/tests/core/io/test_file_access.h +++ b/tests/core/io/test_file_access.h @@ -78,6 +78,29 @@ TEST_CASE("[FileAccess] CSV read") { CHECK(row5[1] == "tab separated"); CHECK(row5[2] == "lines, good?"); } + +TEST_CASE("[FileAccess] Get as UTF-8 String") { + Ref f_lf = FileAccess::open(TestUtils::get_data_path("line_endings_lf.test.txt"), FileAccess::READ); + String s_lf = f_lf->get_as_utf8_string(); + f_lf->seek(0); + String s_lf_nocr = f_lf->get_as_utf8_string(true); + CHECK(s_lf == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + CHECK(s_lf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + + Ref f_crlf = FileAccess::open(TestUtils::get_data_path("line_endings_crlf.test.txt"), FileAccess::READ); + String s_crlf = f_crlf->get_as_utf8_string(); + f_crlf->seek(0); + String s_crlf_nocr = f_crlf->get_as_utf8_string(true); + CHECK(s_crlf == "Hello darkness\r\nMy old friend\r\nI've come to talk\r\nWith you again\r\n"); + CHECK(s_crlf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + + Ref f_cr = FileAccess::open(TestUtils::get_data_path("line_endings_cr.test.txt"), FileAccess::READ); + String s_cr = f_cr->get_as_utf8_string(); + f_cr->seek(0); + String s_cr_nocr = f_cr->get_as_utf8_string(true); + CHECK(s_cr == "Hello darkness\rMy old friend\rI've come to talk\rWith you again\r"); + CHECK(s_cr_nocr == "Hello darknessMy old friendI've come to talkWith you again"); +} } // namespace TestFileAccess #endif // TEST_FILE_ACCESS_H diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h index 0c5704d6c9..b8b766023a 100644 --- a/tests/core/string/test_string.h +++ b/tests/core/string/test_string.h @@ -152,6 +152,20 @@ TEST_CASE("[String] UTF16 with BOM") { CHECK(String::utf16(cs) == s); } +TEST_CASE("[String] UTF8 with CR") { + const String base = U"Hello darkness\r\nMy old friend\nI've come to talk\rWith you again"; + + String keep_cr; + Error err = keep_cr.parse_utf8(base.utf8().get_data()); + CHECK(err == OK); + CHECK(keep_cr == base); + + String no_cr; + err = no_cr.parse_utf8(base.utf8().get_data(), -1, true); // Skip CR. + CHECK(err == OK); + CHECK(no_cr == base.replace("\r", "")); +} + TEST_CASE("[String] Invalid UTF8 (non-standard)") { ERR_PRINT_OFF static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 }; -- cgit v1.2.3