From 6ab672d1ef7ece5c3019d46aeb98df3686f37e26 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Thu, 4 Nov 2021 14:33:37 +0200 Subject: Implement text-to-speech support on Android, iOS, HTML5, Linux, macOS and Windows. Implement TextServer word break method. --- modules/text_server_adv/text_server_adv.cpp | 60 +++++++++++++++++++++++++++++ modules/text_server_adv/text_server_adv.h | 4 ++ 2 files changed, 64 insertions(+) (limited to 'modules/text_server_adv') diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index 0ae8219e23..437fbe76ab 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -3255,6 +3255,19 @@ void TextServerAdvanced::font_set_global_oversampling(double p_oversampling) { /* Shaped text buffer interface */ /*************************************************************************/ +int64_t TextServerAdvanced::_convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const { + int64_t limit = p_pos; + if (p_utf32.length() != p_utf16.length()) { + const UChar *data = p_utf16.ptr(); + for (int i = 0; i < p_pos; i++) { + if (U16_IS_LEAD(data[i])) { + limit--; + } + } + } + return limit; +} + int64_t TextServerAdvanced::_convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const { int64_t limit = p_pos; if (p_sd->text.length() != p_sd->utf16.length()) { @@ -5555,6 +5568,53 @@ String TextServerAdvanced::string_to_lower(const String &p_string, const String return String::utf16(lower.ptr(), len); } +PackedInt32Array TextServerAdvanced::string_get_word_breaks(const String &p_string, const String &p_language) const { + // Convert to UTF-16. + Char16String utf16 = p_string.utf16(); + + Set breaks; + UErrorCode err = U_ZERO_ERROR; + UBreakIterator *bi = ubrk_open(UBRK_LINE, p_language.ascii().get_data(), (const UChar *)utf16.ptr(), utf16.length(), &err); + if (U_FAILURE(err)) { + // No data loaded - use fallback. + for (int i = 0; i < p_string.length(); i++) { + char32_t c = p_string[i]; + if (is_whitespace(c) || is_linebreak(c)) { + breaks.insert(i); + } + } + } else { + while (ubrk_next(bi) != UBRK_DONE) { + int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; + if (pos != p_string.length() - 1) { + breaks.insert(pos); + } + } + } + ubrk_close(bi); + + PackedInt32Array ret; + for (int i = 0; i < p_string.length(); i++) { + char32_t c = p_string[i]; + if (c == 0xfffc) { + continue; + } + if (u_ispunct(c) && c != 0x005F) { + ret.push_back(i); + continue; + } + if (is_underscore(c)) { + ret.push_back(i); + continue; + } + if (breaks.has(i)) { + ret.push_back(i); + continue; + } + } + return ret; +} + TextServerAdvanced::TextServerAdvanced() { _insert_num_systems_lang(); _insert_feature_sets(); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index fa59566a94..1b4293aa72 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -393,11 +393,13 @@ class TextServerAdvanced : public TextServerExtension { mutable RID_PtrOwner shaped_owner; void _realign(ShapedTextDataAdvanced *p_sd) const; + int64_t _convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const; int64_t _convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const; int64_t _convert_pos_inv(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const; bool _shape_substr(ShapedTextDataAdvanced *p_new_sd, const ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_length) const; void _shape_run(ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_end, hb_script_t p_script, hb_direction_t p_direction, Array p_fonts, int64_t p_span, int64_t p_fb_index); Glyph _shape_single_glyph(ShapedTextDataAdvanced *p_sd, char32_t p_char, hb_script_t p_script, hb_direction_t p_direction, const RID &p_font, int64_t p_font_size); + _FORCE_INLINE_ void _add_featuers(const Dictionary &p_source, Vector &r_ftrs); // HarfBuzz bitmap font interface. @@ -686,6 +688,8 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const override; virtual String percent_sign(const String &p_language = "") const override; + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + virtual String strip_diacritics(const String &p_string) const override; virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; -- cgit v1.2.3