From 6ab672d1ef7ece5c3019d46aeb98df3686f37e26 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Thu, 4 Nov 2021 14:33:37 +0200 Subject: Implement text-to-speech support on Android, iOS, HTML5, Linux, macOS and Windows. Implement TextServer word break method. --- servers/display_server.cpp | 94 ++++++++++++++++++++++++++++++++++ servers/display_server.h | 36 +++++++++++++ servers/text/text_server_extension.cpp | 10 ++++ servers/text/text_server_extension.h | 3 ++ servers/text_server.cpp | 2 + servers/text_server.h | 3 ++ 6 files changed, 148 insertions(+) (limited to 'servers') diff --git a/servers/display_server.cpp b/servers/display_server.cpp index 67bfc75426..8d97cd2543 100644 --- a/servers/display_server.cpp +++ b/servers/display_server.cpp @@ -220,6 +220,81 @@ void DisplayServer::global_menu_clear(const String &p_menu_root) { WARN_PRINT("Global menus not supported by this display server."); } +bool DisplayServer::tts_is_speaking() const { + WARN_PRINT("TTS is not supported by this display server."); + return false; +} + +bool DisplayServer::tts_is_paused() const { + WARN_PRINT("TTS is not supported by this display server."); + return false; +} + +void DisplayServer::tts_pause() { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_resume() { + WARN_PRINT("TTS is not supported by this display server."); +} + +Array DisplayServer::tts_get_voices() const { + WARN_PRINT("TTS is not supported by this display server."); + return Array(); +} + +PackedStringArray DisplayServer::tts_get_voices_for_language(const String &p_language) const { + PackedStringArray ret; + Array voices = tts_get_voices(); + for (int i = 0; i < voices.size(); i++) { + const Dictionary &voice = voices[i]; + if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) { + ret.push_back(voice["id"]); + } + } + return ret; +} + +void DisplayServer::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_stop() { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable) { + ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); + utterance_callback[p_event] = p_callable; +} + +void DisplayServer::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) { + ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); + switch (p_event) { + case DisplayServer::TTS_UTTERANCE_STARTED: + case DisplayServer::TTS_UTTERANCE_ENDED: + case DisplayServer::TTS_UTTERANCE_CANCELED: { + if (utterance_callback[p_event].is_valid()) { + Variant args[1]; + args[0] = p_id; + const Variant *argp[] = { &args[0] }; + utterance_callback[p_event].call_deferred(argp, 1); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. + } + } break; + case DisplayServer::TTS_UTTERANCE_BOUNDARY: { + if (utterance_callback[p_event].is_valid()) { + Variant args[2]; + args[0] = p_pos; + args[1] = p_id; + const Variant *argp[] = { &args[0], &args[1] }; + utterance_callback[p_event].call_deferred(argp, 2); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. + } + } break; + default: + break; + } +} + void DisplayServer::mouse_set_mode(MouseMode p_mode) { WARN_PRINT("Mouse is not supported by this display server."); } @@ -478,6 +553,19 @@ void DisplayServer::_bind_methods() { ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu_root", "idx"), &DisplayServer::global_menu_remove_item); ClassDB::bind_method(D_METHOD("global_menu_clear", "menu_root"), &DisplayServer::global_menu_clear); + ClassDB::bind_method(D_METHOD("tts_is_speaking"), &DisplayServer::tts_is_speaking); + ClassDB::bind_method(D_METHOD("tts_is_paused"), &DisplayServer::tts_is_paused); + ClassDB::bind_method(D_METHOD("tts_get_voices"), &DisplayServer::tts_get_voices); + ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &DisplayServer::tts_get_voices_for_language); + + ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &DisplayServer::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false)); + ClassDB::bind_method(D_METHOD("tts_pause"), &DisplayServer::tts_pause); + ClassDB::bind_method(D_METHOD("tts_resume"), &DisplayServer::tts_resume); + ClassDB::bind_method(D_METHOD("tts_stop"), &DisplayServer::tts_stop); + + ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "callable"), &DisplayServer::tts_set_utterance_callback); + ClassDB::bind_method(D_METHOD("_tts_post_utterance_event", "event", "id", "char_pos"), &DisplayServer::tts_post_utterance_event); + ClassDB::bind_method(D_METHOD("mouse_set_mode", "mouse_mode"), &DisplayServer::mouse_set_mode); ClassDB::bind_method(D_METHOD("mouse_get_mode"), &DisplayServer::mouse_get_mode); @@ -621,6 +709,7 @@ void DisplayServer::_bind_methods() { BIND_ENUM_CONSTANT(FEATURE_ORIENTATION); BIND_ENUM_CONSTANT(FEATURE_SWAP_BUFFERS); BIND_ENUM_CONSTANT(FEATURE_CLIPBOARD_PRIMARY); + BIND_ENUM_CONSTANT(FEATURE_TEXT_TO_SPEECH); BIND_ENUM_CONSTANT(MOUSE_MODE_VISIBLE); BIND_ENUM_CONSTANT(MOUSE_MODE_HIDDEN); @@ -689,6 +778,11 @@ void DisplayServer::_bind_methods() { BIND_ENUM_CONSTANT(DISPLAY_HANDLE); BIND_ENUM_CONSTANT(WINDOW_HANDLE); BIND_ENUM_CONSTANT(WINDOW_VIEW); + + BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY); } void DisplayServer::register_create_function(const char *p_name, CreateFunction p_function, GetRenderingDriversFunction p_get_drivers) { diff --git a/servers/display_server.h b/servers/display_server.h index 4961b07ba3..19efcbd3dd 100644 --- a/servers/display_server.h +++ b/servers/display_server.h @@ -121,6 +121,7 @@ public: FEATURE_SWAP_BUFFERS, FEATURE_KEEP_SCREEN_ON, FEATURE_CLIPBOARD_PRIMARY, + FEATURE_TEXT_TO_SPEECH, }; virtual bool has_feature(Feature p_feature) const = 0; @@ -172,6 +173,40 @@ public: virtual void global_menu_remove_item(const String &p_menu_root, int p_idx); virtual void global_menu_clear(const String &p_menu_root); + struct TTSUtterance { + String text; + String voice; + int volume = 50; + float pitch = 1.f; + float rate = 1.f; + int id = 0; + }; + + enum TTSUtteranceEvent { + TTS_UTTERANCE_STARTED, + TTS_UTTERANCE_ENDED, + TTS_UTTERANCE_CANCELED, + TTS_UTTERANCE_BOUNDARY, + TTS_UTTERANCE_MAX, + }; + +private: + Callable utterance_callback[TTS_UTTERANCE_MAX]; + +public: + virtual bool tts_is_speaking() const; + virtual bool tts_is_paused() const; + virtual Array tts_get_voices() const; + virtual PackedStringArray tts_get_voices_for_language(const String &p_language) const; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); + virtual void tts_pause(); + virtual void tts_resume(); + virtual void tts_stop(); + + virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable); + virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0); + enum MouseMode { MOUSE_MODE_VISIBLE, MOUSE_MODE_HIDDEN, @@ -431,5 +466,6 @@ VARIANT_ENUM_CAST(DisplayServer::WindowFlags) VARIANT_ENUM_CAST(DisplayServer::HandleType) VARIANT_ENUM_CAST(DisplayServer::CursorShape) VARIANT_ENUM_CAST(DisplayServer::VSyncMode) +VARIANT_ENUM_CAST(DisplayServer::TTSUtteranceEvent) #endif // DISPLAY_SERVER_H diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp index 001706bb6f..005cb68302 100644 --- a/servers/text/text_server_extension.cpp +++ b/servers/text/text_server_extension.cpp @@ -293,6 +293,8 @@ void TextServerExtension::_bind_methods() { GDVIRTUAL_BIND(strip_diacritics, "string"); + GDVIRTUAL_BIND(string_get_word_breaks, "string", "language"); + GDVIRTUAL_BIND(string_to_upper, "string", "language"); GDVIRTUAL_BIND(string_to_lower, "string", "language"); @@ -1503,6 +1505,14 @@ Array TextServerExtension::parse_structured_text(StructuredTextParser p_parser_t return Array(); } +PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const { + PackedInt32Array ret; + if (GDVIRTUAL_CALL(string_get_word_breaks, p_string, p_language, ret)) { + return ret; + } + return PackedInt32Array(); +} + TextServerExtension::TextServerExtension() { //NOP } diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h index ce781097f3..7b7fc61ed7 100644 --- a/servers/text/text_server_extension.h +++ b/servers/text/text_server_extension.h @@ -485,6 +485,9 @@ public: virtual String strip_diacritics(const String &p_string) const override; GDVIRTUAL1RC(String, strip_diacritics, const String &); + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + GDVIRTUAL2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; virtual String string_to_lower(const String &p_string, const String &p_language = "") const override; GDVIRTUAL2RC(String, string_to_upper, const String &, const String &); diff --git a/servers/text_server.cpp b/servers/text_server.cpp index d66e769e3c..7d9945f5d7 100644 --- a/servers/text_server.cpp +++ b/servers/text_server.cpp @@ -439,6 +439,8 @@ void TextServer::_bind_methods() { ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL("")); ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL("")); + ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL("")); + ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics); ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL("")); diff --git a/servers/text_server.h b/servers/text_server.h index 7e7f26b32d..b08aa26917 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -431,6 +431,9 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const = 0; virtual String percent_sign(const String &p_language = "") const = 0; + // String functions. + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0; + virtual String strip_diacritics(const String &p_string) const; // Other string operations. -- cgit v1.2.3