diff options
author | Haoyu Qiu <timothyqiu32@gmail.com> | 2022-08-18 16:20:20 +0800 |
---|---|---|
committer | Haoyu Qiu <timothyqiu32@gmail.com> | 2022-12-16 13:18:57 +0800 |
commit | 207e52c161a44869f1af022030c3129b8c38a5f7 (patch) | |
tree | 3df831f12e9ded2b734b37d61837d2a35afbf685 /modules | |
parent | f18f2740da9cce7383c2aa41fe8d081d56c8b6cf (diff) |
Fix String::word_wrap() for long words
- Changes `TextServer.string_get_word_breaks()`
- Returns pairs of boundary start and end offsets
- Accepts `chars_per_line` to return line breaks
- Removes `String::word_wrap()`
Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com>
Diffstat (limited to 'modules')
-rw-r--r-- | modules/text_server_adv/text_server_adv.cpp | 94 | ||||
-rw-r--r-- | modules/text_server_adv/text_server_adv.h | 2 | ||||
-rw-r--r-- | modules/text_server_fb/text_server_fb.cpp | 71 | ||||
-rw-r--r-- | modules/text_server_fb/text_server_fb.h | 2 |
4 files changed, 130 insertions, 39 deletions
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index b4f3389c36..046973d193 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String return String::utf16(lower.ptr(), len); } -PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language; // Convert to UTF-16. Char16String utf16 = p_string.utf16(); @@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str HashSet<int> breaks; UErrorCode err = U_ZERO_ERROR; UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err); - if (U_FAILURE(err)) { - // No data loaded - use fallback. - for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (is_whitespace(c) || is_linebreak(c)) { - breaks.insert(i); - } - } - } else { + if (U_SUCCESS(err)) { while (ubrk_next(bi) != UBRK_DONE) { int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; if (pos != p_string.length() - 1) { @@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str ubrk_close(bi); PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (u_ispunct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (breaks.has(i)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else if (breaks.has(i)) { + // End current word, no space. + if (word_length > 0) { + line_end = i; + word_start = i + 1; + word_length = 0; + } + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 8a9aa4356b..59f44cf142 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -915,7 +915,7 @@ public: MODBIND2RC(String, parse_number, const String &, const String &); MODBIND1RC(String, percent_sign, const String &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &); MODBIND1RC(bool, spoof_check, const String &); diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index 19abcde1fd..2cee360f42 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String return lower; } -PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (is_punct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (is_whitespace(c) || is_linebreak(c)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index 11f37ab6d1..49e89214ec 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -786,7 +786,7 @@ public: MODBIND1RC(double, shaped_text_get_underline_position, const RID &); MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(String, string_to_upper, const String &, const String &); MODBIND2RC(String, string_to_lower, const String &, const String &); |