From 207e52c161a44869f1af022030c3129b8c38a5f7 Mon Sep 17 00:00:00 2001 From: Haoyu Qiu Date: Thu, 18 Aug 2022 16:20:20 +0800 Subject: Fix String::word_wrap() for long words - Changes `TextServer.string_get_word_breaks()` - Returns pairs of boundary start and end offsets - Accepts `chars_per_line` to return line breaks - Removes `String::word_wrap()` Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com> --- core/string/ustring.cpp | 31 ---------- core/string/ustring.h | 1 - doc/classes/TextServer.xml | 11 +++- doc/classes/TextServerExtension.xml | 1 + editor/debugger/script_editor_debugger.cpp | 11 +++- editor/scene_tree_editor.cpp | 12 +++- modules/text_server_adv/text_server_adv.cpp | 94 ++++++++++++++++++++++------- modules/text_server_adv/text_server_adv.h | 2 +- modules/text_server_fb/text_server_fb.cpp | 71 +++++++++++++++++----- modules/text_server_fb/text_server_fb.h | 2 +- platform/linuxbsd/tts_linux.cpp | 11 ++-- servers/text/text_server_extension.cpp | 6 +- servers/text/text_server_extension.h | 4 +- servers/text_server.cpp | 2 +- servers/text_server.h | 2 +- tests/servers/test_text_server.h | 48 ++++++++++----- 16 files changed, 204 insertions(+), 105 deletions(-) diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 4e26b61334..adab6d07c7 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -220,37 +220,6 @@ void CharString::copy_from(const char *p_cstr) { /* String */ /*************************************************************************/ -//kind of poor should be rewritten properly -String String::word_wrap(int p_chars_per_line) const { - int from = 0; - int last_space = 0; - String ret; - for (int i = 0; i < length(); i++) { - if (i - from >= p_chars_per_line) { - if (last_space == -1) { - ret += substr(from, i - from + 1) + "\n"; - } else { - ret += substr(from, last_space - from) + "\n"; - i = last_space; //rewind - } - from = i + 1; - last_space = -1; - } else if (operator[](i) == ' ' || operator[](i) == '\t') { - last_space = i; - } else if (operator[](i) == '\n') { - ret += substr(from, i - from) + "\n"; - from = i + 1; - last_space = -1; - } - } - - if (from < length()) { - ret += substr(from, length()); - } - - return ret; -} - Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const { // Splits the URL into scheme, host, port, path. Strip credentials when present. String base = *this; diff --git a/core/string/ustring.h b/core/string/ustring.h index ed3848fb8a..559f679f0f 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -425,7 +425,6 @@ public: String c_escape_multiline() const; String c_unescape() const; String json_escape() const; - String word_wrap(int p_chars_per_line) const; Error parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const; String property_name_encode() const; diff --git a/doc/classes/TextServer.xml b/doc/classes/TextServer.xml index a4bccfb545..637e502d49 100644 --- a/doc/classes/TextServer.xml +++ b/doc/classes/TextServer.xml @@ -1548,8 +1548,15 @@ - - Returns array of the word break character offsets. + + + Returns an array of the word break boundaries. Elements in the returned array are the offsets of the start and end of words. Therefore the length of the array is always even. + When [param chars_per_line] is greater than zero, line break boundaries are returned instead. + [codeblock] + var ts = TextServerManager.get_primary_interface() + print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12] + print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12] + [/codeblock] diff --git a/doc/classes/TextServerExtension.xml b/doc/classes/TextServerExtension.xml index 44e65efc8c..e144b09eb6 100644 --- a/doc/classes/TextServerExtension.xml +++ b/doc/classes/TextServerExtension.xml @@ -1346,6 +1346,7 @@ + diff --git a/editor/debugger/script_editor_debugger.cpp b/editor/debugger/script_editor_debugger.cpp index deca638f3b..5cb7016b35 100644 --- a/editor/debugger/script_editor_debugger.cpp +++ b/editor/debugger/script_editor_debugger.cpp @@ -751,7 +751,16 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType reason->add_theme_color_override("font_color", get_theme_color(SNAME("success_color"), SNAME("Editor"))); } reason->set_text(p_reason); - reason->set_tooltip_text(p_reason.word_wrap(80)); + + const PackedInt32Array boundaries = TS->string_get_word_breaks(p_reason, "", 80); + PackedStringArray lines; + for (int i = 0; i < boundaries.size(); i += 2) { + const int start = boundaries[i]; + const int end = boundaries[i + 1]; + lines.append(p_reason.substr(start, end - start + 1)); + } + + reason->set_tooltip_text(String("\n").join(lines)); } void ScriptEditorDebugger::_notification(int p_what) { diff --git a/editor/scene_tree_editor.cpp b/editor/scene_tree_editor.cpp index 092ef30678..30a9dc5bbf 100644 --- a/editor/scene_tree_editor.cpp +++ b/editor/scene_tree_editor.cpp @@ -132,8 +132,16 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i if (config_err.is_empty()) { return; } - config_err = config_err.word_wrap(80); - warning->set_text(config_err); + + const PackedInt32Array boundaries = TS->string_get_word_breaks(config_err, "", 80); + PackedStringArray lines; + for (int i = 0; i < boundaries.size(); i += 2) { + const int start = boundaries[i]; + const int end = boundaries[i + 1]; + lines.append(config_err.substr(start, end - start + 1)); + } + + warning->set_text(String("\n").join(lines)); warning->popup_centered(); } else if (p_id == BUTTON_SIGNALS) { diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index b4f3389c36..046973d193 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String return String::utf16(lower.ptr(), len); } -PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language; // Convert to UTF-16. Char16String utf16 = p_string.utf16(); @@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str HashSet breaks; UErrorCode err = U_ZERO_ERROR; UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err); - if (U_FAILURE(err)) { - // No data loaded - use fallback. - for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (is_whitespace(c) || is_linebreak(c)) { - breaks.insert(i); - } - } - } else { + if (U_SUCCESS(err)) { while (ubrk_next(bi) != UBRK_DONE) { int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; if (pos != p_string.length() - 1) { @@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str ubrk_close(bi); PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (u_ispunct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (breaks.has(i)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else if (breaks.has(i)) { + // End current word, no space. + if (word_length > 0) { + line_end = i; + word_start = i + 1; + word_length = 0; + } + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 8a9aa4356b..59f44cf142 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -915,7 +915,7 @@ public: MODBIND2RC(String, parse_number, const String &, const String &); MODBIND1RC(String, percent_sign, const String &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &); MODBIND1RC(bool, spoof_check, const String &); diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index 19abcde1fd..2cee360f42 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String return lower; } -PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (is_punct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (is_whitespace(c) || is_linebreak(c)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index 11f37ab6d1..49e89214ec 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -786,7 +786,7 @@ public: MODBIND1RC(double, shaped_text_get_underline_position, const RID &); MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(String, string_to_upper, const String &, const String &); MODBIND2RC(String, string_to_lower, const String &, const String &); diff --git a/platform/linuxbsd/tts_linux.cpp b/platform/linuxbsd/tts_linux.cpp index aea1183d3d..8fa708aad6 100644 --- a/platform/linuxbsd/tts_linux.cpp +++ b/platform/linuxbsd/tts_linux.cpp @@ -117,13 +117,12 @@ void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNo free_spd_voices(voices); } PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language); - int prev = 0; - for (int i = 0; i < breaks.size(); i++) { - text += message.text.substr(prev, breaks[i] - prev); - text += ""; - prev = breaks[i]; + for (int i = 0; i < breaks.size(); i += 2) { + const int start = breaks[i]; + const int end = breaks[i + 1]; + text += message.text.substr(start, end - start + 1); + text += ""; } - text += message.text.substr(prev, -1); spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data()); spd_set_volume(tts->synth, message.volume * 2 - 100); diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp index c219029650..4baa1db9bf 100644 --- a/servers/text/text_server_extension.cpp +++ b/servers/text/text_server_extension.cpp @@ -308,7 +308,7 @@ void TextServerExtension::_bind_methods() { GDVIRTUAL_BIND(_strip_diacritics, "string"); GDVIRTUAL_BIND(_is_valid_identifier, "string"); - GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language"); + GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line"); GDVIRTUAL_BIND(_is_confusable, "string", "dict"); GDVIRTUAL_BIND(_spoof_check, "string"); @@ -1379,9 +1379,9 @@ TypedArray TextServerExtension::parse_structured_text(StructuredTextPa return ret; } -PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { PackedInt32Array ret; - GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, ret); + GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, p_chars_per_line, ret); return ret; } diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h index 56ed2e41ec..551e4e9087 100644 --- a/servers/text/text_server_extension.h +++ b/servers/text/text_server_extension.h @@ -510,8 +510,8 @@ public: virtual String strip_diacritics(const String &p_string) const override; GDVIRTUAL1RC(String, _strip_diacritics, const String &); - virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; - GDVIRTUAL2RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &); + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const override; + GDVIRTUAL3RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &, int); virtual bool is_valid_identifier(const String &p_string) const override; GDVIRTUAL1RC(bool, _is_valid_identifier, const String &); diff --git a/servers/text_server.cpp b/servers/text_server.cpp index e11da53852..c0f235fe50 100644 --- a/servers/text_server.cpp +++ b/servers/text_server.cpp @@ -454,7 +454,7 @@ void TextServer::_bind_methods() { ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL("")); ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL("")); - ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL("")); + ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language", "chars_per_line"), &TextServer::string_get_word_breaks, DEFVAL(""), DEFVAL(0)); ClassDB::bind_method(D_METHOD("is_confusable", "string", "dict"), &TextServer::is_confusable); ClassDB::bind_method(D_METHOD("spoof_check", "string"), &TextServer::spoof_check); diff --git a/servers/text_server.h b/servers/text_server.h index 9508187cbc..0d94f45b79 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -493,7 +493,7 @@ public: virtual String percent_sign(const String &p_language = "") const = 0; // String functions. - virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0; + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const = 0; virtual int64_t is_confusable(const String &p_string, const PackedStringArray &p_dict) const { return -1; }; virtual bool spoof_check(const String &p_string) const { return false; }; diff --git a/tests/servers/test_text_server.h b/tests/servers/test_text_server.h index 297f7d2068..b3c120e0ba 100644 --- a/tests/servers/test_text_server.h +++ b/tests/servers/test_text_server.h @@ -593,12 +593,18 @@ TEST_SUITE("[TextServer]") { String text1 = U"linguistically similar and effectively form"; // 14^ 22^ 26^ 38^ PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en"); - CHECK(breaks.size() == 4); - if (breaks.size() == 4) { - CHECK(breaks[0] == 14); - CHECK(breaks[1] == 22); - CHECK(breaks[2] == 26); - CHECK(breaks[3] == 38); + CHECK(breaks.size() == 10); + if (breaks.size() == 10) { + CHECK(breaks[0] == 0); + CHECK(breaks[1] == 14); + CHECK(breaks[2] == 15); + CHECK(breaks[3] == 22); + CHECK(breaks[4] == 23); + CHECK(breaks[5] == 26); + CHECK(breaks[6] == 27); + CHECK(breaks[7] == 38); + CHECK(breaks[8] == 39); + CHECK(breaks[9] == 43); } } @@ -608,16 +614,26 @@ TEST_SUITE("[TextServer]") { // 3^ 7^ 13^ 16^ 20^ 25^ 29^ 32^ PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th"); - CHECK(breaks.size() == 8); - if (breaks.size() == 8) { - CHECK(breaks[0] == 3); - CHECK(breaks[1] == 7); - CHECK(breaks[2] == 13); - CHECK(breaks[3] == 16); - CHECK(breaks[4] == 20); - CHECK(breaks[5] == 25); - CHECK(breaks[6] == 29); - CHECK(breaks[7] == 32); + CHECK(breaks.size() == 18); + if (breaks.size() == 18) { + CHECK(breaks[0] == 0); + CHECK(breaks[1] == 4); + CHECK(breaks[2] == 4); + CHECK(breaks[3] == 8); + CHECK(breaks[4] == 8); + CHECK(breaks[5] == 14); + CHECK(breaks[6] == 14); + CHECK(breaks[7] == 17); + CHECK(breaks[8] == 17); + CHECK(breaks[9] == 21); + CHECK(breaks[10] == 21); + CHECK(breaks[11] == 26); + CHECK(breaks[12] == 26); + CHECK(breaks[13] == 30); + CHECK(breaks[14] == 30); + CHECK(breaks[15] == 33); + CHECK(breaks[16] == 33); + CHECK(breaks[17] == 42); } } } -- cgit v1.2.3