summaryrefslogtreecommitdiff
path: root/modules/text_server_adv
diff options
context:
space:
mode:
authorHaoyu Qiu <timothyqiu32@gmail.com>2022-08-18 16:20:20 +0800
committerHaoyu Qiu <timothyqiu32@gmail.com>2022-12-16 13:18:57 +0800
commit207e52c161a44869f1af022030c3129b8c38a5f7 (patch)
tree3df831f12e9ded2b734b37d61837d2a35afbf685 /modules/text_server_adv
parentf18f2740da9cce7383c2aa41fe8d081d56c8b6cf (diff)
Fix String::word_wrap() for long words
- Changes `TextServer.string_get_word_breaks()` - Returns pairs of boundary start and end offsets - Accepts `chars_per_line` to return line breaks - Removes `String::word_wrap()` Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com>
Diffstat (limited to 'modules/text_server_adv')
-rw-r--r--modules/text_server_adv/text_server_adv.cpp94
-rw-r--r--modules/text_server_adv/text_server_adv.h2
2 files changed, 72 insertions, 24 deletions
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp
index b4f3389c36..046973d193 100644
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String
return String::utf16(lower.ptr(), len);
}
-PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const {
+PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language;
// Convert to UTF-16.
Char16String utf16 = p_string.utf16();
@@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
HashSet<int> breaks;
UErrorCode err = U_ZERO_ERROR;
UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
- if (U_FAILURE(err)) {
- // No data loaded - use fallback.
- for (int i = 0; i < p_string.length(); i++) {
- char32_t c = p_string[i];
- if (is_whitespace(c) || is_linebreak(c)) {
- breaks.insert(i);
- }
- }
- } else {
+ if (U_SUCCESS(err)) {
while (ubrk_next(bi) != UBRK_DONE) {
int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
if (pos != p_string.length() - 1) {
@@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
ubrk_close(bi);
PackedInt32Array ret;
+
+ int line_start = 0;
+ int line_end = 0; // End of last word on current line.
+ int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
+ int word_length = 0;
+
for (int i = 0; i < p_string.length(); i++) {
- char32_t c = p_string[i];
- if (c == 0xfffc) {
- continue;
- }
- if (u_ispunct(c) && c != 0x005F) {
- ret.push_back(i);
- continue;
- }
- if (is_underscore(c)) {
- ret.push_back(i);
- continue;
- }
- if (breaks.has(i)) {
+ const char32_t c = p_string[i];
+
+ if (is_linebreak(c)) {
+ // Force newline.
+ ret.push_back(line_start);
ret.push_back(i);
+ line_start = i + 1;
+ line_end = line_start;
+ word_start = line_start;
+ word_length = 0;
+ } else if (c == 0xfffc) {
continue;
+ } else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
+ // A whitespace ends current word.
+ if (word_length > 0) {
+ line_end = i - 1;
+ word_start = -1;
+ word_length = 0;
+ }
+ } else if (breaks.has(i)) {
+ // End current word, no space.
+ if (word_length > 0) {
+ line_end = i;
+ word_start = i + 1;
+ word_length = 0;
+ }
+ if (p_chars_per_line <= 0) {
+ ret.push_back(line_start);
+ ret.push_back(line_end + 1);
+ line_start = word_start;
+ line_end = line_start;
+ }
+ } else {
+ if (word_start == -1) {
+ word_start = i;
+ if (p_chars_per_line <= 0) {
+ ret.push_back(line_start);
+ ret.push_back(line_end + 1);
+ line_start = word_start;
+ line_end = line_start;
+ }
+ }
+ word_length += 1;
+
+ if (p_chars_per_line > 0) {
+ if (word_length > p_chars_per_line) {
+ // Word too long: wrap before current character.
+ ret.push_back(line_start);
+ ret.push_back(i);
+ line_start = i;
+ line_end = i;
+ word_start = i;
+ word_length = 1;
+ } else if (i - line_start + 1 > p_chars_per_line) {
+ // Line too long: wrap after the last word.
+ ret.push_back(line_start);
+ ret.push_back(line_end + 1);
+ line_start = word_start;
+ line_end = line_start;
+ }
+ }
}
}
+ if (line_start < p_string.length()) {
+ ret.push_back(line_start);
+ ret.push_back(p_string.length());
+ }
return ret;
}
diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h
index 8a9aa4356b..59f44cf142 100644
--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@@ -915,7 +915,7 @@ public:
MODBIND2RC(String, parse_number, const String &, const String &);
MODBIND1RC(String, percent_sign, const String &);
- MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
+ MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);
MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &);
MODBIND1RC(bool, spoof_check, const String &);