diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-11-04 14:33:37 +0200 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2022-04-28 14:35:41 +0300 |
commit | 6ab672d1ef7ece5c3019d46aeb98df3686f37e26 (patch) | |
tree | be10d088e90c6a9e60efef823f54f9aa0d70aa07 | |
parent | 3e1b824c050b765095285c67b3e4c8092e1f88c6 (diff) |
Implement text-to-speech support on Android, iOS, HTML5, Linux, macOS and Windows.
Implement TextServer word break method.
54 files changed, 3962 insertions, 2 deletions
diff --git a/.github/workflows/linux_builds.yml b/.github/workflows/linux_builds.yml index 7ac364c45f..338278f461 100644 --- a/.github/workflows/linux_builds.yml +++ b/.github/workflows/linux_builds.yml @@ -87,7 +87,7 @@ jobs: sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \ libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \ libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \ - llvm + llvm libspeechd-dev speech-dispatcher - name: Setup Godot build cache uses: ./.github/actions/godot-cache diff --git a/doc/classes/DisplayServer.xml b/doc/classes/DisplayServer.xml index 0d99c600d5..ede3a1e199 100644 --- a/doc/classes/DisplayServer.xml +++ b/doc/classes/DisplayServer.xml @@ -814,6 +814,93 @@ [b]Note:[/b] This method is implemented on Windows. </description> </method> + <method name="tts_get_voices" qualifiers="const"> + <return type="Array" /> + <description> + Returns an [Array] of voice information dictionaries. + Each [Dictionary] contains two [String] entries: + - [code]name[/code] is voice name. + - [code]id[/code] is voice identifier. + - [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_get_voices_for_language" qualifiers="const"> + <return type="PackedStringArray" /> + <argument index="0" name="language" type="String" /> + <description> + Returns an [PackedStringArray] of voice identifiers for the [code]language[/code]. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_is_paused" qualifiers="const"> + <return type="bool" /> + <description> + Returns [code]true[/code] if the synthesizer is in a paused state. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_is_speaking" qualifiers="const"> + <return type="bool" /> + <description> + Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_pause"> + <return type="void" /> + <description> + Puts the synthesizer into a paused state. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_resume"> + <return type="void" /> + <description> + Resumes the synthesizer if it was paused. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_set_utterance_callback"> + <return type="void" /> + <argument index="0" name="event" type="int" enum="DisplayServer.TTSUtteranceEvent" /> + <argument index="1" name="callable" type="Callable" /> + <description> + Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary. + - [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id. + - [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id. + [b]Note:[/b] The granularity of the boundary callbacks is engine dependent. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_speak"> + <return type="void" /> + <argument index="0" name="text" type="String" /> + <argument index="1" name="voice" type="String" /> + <argument index="2" name="volume" type="int" default="50" /> + <argument index="3" name="pitch" type="float" default="1.0" /> + <argument index="4" name="rate" type="float" default="1.0" /> + <argument index="5" name="utterance_id" type="int" default="0" /> + <argument index="6" name="interrupt" type="bool" default="false" /> + <description> + Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first. + - [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language]. + - [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest). + - [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice. + - [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative. + - [code]utterance_id[/code] is passed as a parameter to the callback functions. + [b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak]. + [b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> + <method name="tts_stop"> + <return type="void" /> + <description> + Stops synthesis in progress and removes all utterances from the queue. + [b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. + </description> + </method> <method name="virtual_keyboard_get_height" qualifiers="const"> <return type="int" /> <description> @@ -1184,6 +1271,9 @@ </constant> <constant name="FEATURE_CLIPBOARD_PRIMARY" value="18" enum="Feature"> </constant> + <constant name="FEATURE_TEXT_TO_SPEECH" value="19" enum="Feature"> + Display server supports text-to-speech. See [code]tts_*[/code] methods. + </constant> <constant name="MOUSE_MODE_VISIBLE" value="0" enum="MouseMode"> Makes the mouse cursor visible if it is hidden. </constant> @@ -1335,5 +1425,17 @@ - MacOS: [code]NSView*[/code] for the window main view. - iOS: [code]UIView*[/code] for the window main view. </constant> + <constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent"> + Utterance has begun to be spoken. + </constant> + <constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent"> + Utterance was successfully finished. + </constant> + <constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent"> + Utterance was canceled, or TTS service was unable to process it. + </constant> + <constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent"> + Utterance reached a word or sentence boundary. + </constant> </constants> </class> diff --git a/doc/classes/TextServer.xml b/doc/classes/TextServer.xml index dba4bd24a5..2f57b76374 100644 --- a/doc/classes/TextServer.xml +++ b/doc/classes/TextServer.xml @@ -1441,6 +1441,14 @@ Aligns shaped text to the given tab-stops. </description> </method> + <method name="string_get_word_breaks" qualifiers="const"> + <return type="PackedInt32Array" /> + <argument index="0" name="string" type="String" /> + <argument index="1" name="language" type="String" default="""" /> + <description> + Returns array of the word break character offsets. + </description> + </method> <method name="string_to_lower" qualifiers="const"> <return type="String" /> <argument index="0" name="string" type="String" /> diff --git a/doc/classes/TextServerExtension.xml b/doc/classes/TextServerExtension.xml index ef522d08a7..434d6f909c 100644 --- a/doc/classes/TextServerExtension.xml +++ b/doc/classes/TextServerExtension.xml @@ -1461,6 +1461,14 @@ [b]Note:[/b] This method is used by default line/word breaking methods, and its implementation might be omitted if custom line breaking in implemented. </description> </method> + <method name="string_get_word_breaks" qualifiers="virtual const"> + <return type="PackedInt32Array" /> + <argument index="0" name="string" type="String" /> + <argument index="1" name="language" type="String" /> + <description> + Returns array of the word break character offsets. + </description> + </method> <method name="string_to_lower" qualifiers="virtual const"> <return type="String" /> <argument index="0" name="string" type="String" /> diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index 0ae8219e23..437fbe76ab 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -3255,6 +3255,19 @@ void TextServerAdvanced::font_set_global_oversampling(double p_oversampling) { /* Shaped text buffer interface */ /*************************************************************************/ +int64_t TextServerAdvanced::_convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const { + int64_t limit = p_pos; + if (p_utf32.length() != p_utf16.length()) { + const UChar *data = p_utf16.ptr(); + for (int i = 0; i < p_pos; i++) { + if (U16_IS_LEAD(data[i])) { + limit--; + } + } + } + return limit; +} + int64_t TextServerAdvanced::_convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const { int64_t limit = p_pos; if (p_sd->text.length() != p_sd->utf16.length()) { @@ -5555,6 +5568,53 @@ String TextServerAdvanced::string_to_lower(const String &p_string, const String return String::utf16(lower.ptr(), len); } +PackedInt32Array TextServerAdvanced::string_get_word_breaks(const String &p_string, const String &p_language) const { + // Convert to UTF-16. + Char16String utf16 = p_string.utf16(); + + Set<int> breaks; + UErrorCode err = U_ZERO_ERROR; + UBreakIterator *bi = ubrk_open(UBRK_LINE, p_language.ascii().get_data(), (const UChar *)utf16.ptr(), utf16.length(), &err); + if (U_FAILURE(err)) { + // No data loaded - use fallback. + for (int i = 0; i < p_string.length(); i++) { + char32_t c = p_string[i]; + if (is_whitespace(c) || is_linebreak(c)) { + breaks.insert(i); + } + } + } else { + while (ubrk_next(bi) != UBRK_DONE) { + int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; + if (pos != p_string.length() - 1) { + breaks.insert(pos); + } + } + } + ubrk_close(bi); + + PackedInt32Array ret; + for (int i = 0; i < p_string.length(); i++) { + char32_t c = p_string[i]; + if (c == 0xfffc) { + continue; + } + if (u_ispunct(c) && c != 0x005F) { + ret.push_back(i); + continue; + } + if (is_underscore(c)) { + ret.push_back(i); + continue; + } + if (breaks.has(i)) { + ret.push_back(i); + continue; + } + } + return ret; +} + TextServerAdvanced::TextServerAdvanced() { _insert_num_systems_lang(); _insert_feature_sets(); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index fa59566a94..1b4293aa72 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -393,11 +393,13 @@ class TextServerAdvanced : public TextServerExtension { mutable RID_PtrOwner<ShapedTextDataAdvanced> shaped_owner; void _realign(ShapedTextDataAdvanced *p_sd) const; + int64_t _convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const; int64_t _convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const; int64_t _convert_pos_inv(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const; bool _shape_substr(ShapedTextDataAdvanced *p_new_sd, const ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_length) const; void _shape_run(ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_end, hb_script_t p_script, hb_direction_t p_direction, Array p_fonts, int64_t p_span, int64_t p_fb_index); Glyph _shape_single_glyph(ShapedTextDataAdvanced *p_sd, char32_t p_char, hb_script_t p_script, hb_direction_t p_direction, const RID &p_font, int64_t p_font_size); + _FORCE_INLINE_ void _add_featuers(const Dictionary &p_source, Vector<hb_feature_t> &r_ftrs); // HarfBuzz bitmap font interface. @@ -686,6 +688,8 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const override; virtual String percent_sign(const String &p_language = "") const override; + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + virtual String strip_diacritics(const String &p_string) const override; virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index 1251aaf2b9..d84e9e581a 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -3079,7 +3079,7 @@ bool TextServerFallback::shaped_text_update_breaks(const RID &p_shaped) { if (sd_glyphs[i].count > 0) { char32_t c = sd->text[sd_glyphs[i].start - sd->start]; if (c_punct_size == 0) { - if (is_punct(c)) { + if (is_punct(c) && c != 0x005F) { sd_glyphs[i].flags |= GRAPHEME_IS_PUNCTUATION; } } else { @@ -3623,6 +3623,29 @@ String TextServerFallback::string_to_lower(const String &p_string, const String return lower; } +PackedInt32Array TextServerFallback::string_get_word_breaks(const String &p_string, const String &p_language) const { + PackedInt32Array ret; + for (int i = 0; i < p_string.length(); i++) { + char32_t c = p_string[i]; + if (c == 0xfffc) { + continue; + } + if (is_punct(c) && c != 0x005F) { + ret.push_back(i); + continue; + } + if (is_underscore(c)) { + ret.push_back(i); + continue; + } + if (is_whitespace(c) || is_linebreak(c)) { + ret.push_back(i); + continue; + } + } + return ret; +} + TextServerFallback::TextServerFallback() { _insert_feature_sets(); }; diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index d6f61e02f8..c837029623 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -573,6 +573,8 @@ public: virtual double shaped_text_get_underline_position(const RID &p_shaped) const override; virtual double shaped_text_get_underline_thickness(const RID &p_shaped) const override; + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; virtual String string_to_lower(const String &p_string, const String &p_language = "") const override; diff --git a/platform/android/SCsub b/platform/android/SCsub index 1a3c158d2e..ad226255bc 100644 --- a/platform/android/SCsub +++ b/platform/android/SCsub @@ -8,6 +8,7 @@ android_files = [ "file_access_android.cpp", "audio_driver_opensl.cpp", "dir_access_jandroid.cpp", + "tts_android.cpp", "thread_jandroid.cpp", "net_socket_android.cpp", "java_godot_lib_jni.cpp", diff --git a/platform/android/display_server_android.cpp b/platform/android/display_server_android.cpp index e7de287fc6..2eb7056a36 100644 --- a/platform/android/display_server_android.cpp +++ b/platform/android/display_server_android.cpp @@ -34,6 +34,7 @@ #include "java_godot_io_wrapper.h" #include "java_godot_wrapper.h" #include "os_android.h" +#include "tts_android.h" #if defined(VULKAN_ENABLED) #include "drivers/vulkan/rendering_device_vulkan.h" @@ -63,6 +64,7 @@ bool DisplayServerAndroid::has_feature(Feature p_feature) const { case FEATURE_ORIENTATION: case FEATURE_TOUCHSCREEN: case FEATURE_VIRTUAL_KEYBOARD: + case FEATURE_TEXT_TO_SPEECH: return true; default: return false; @@ -73,6 +75,34 @@ String DisplayServerAndroid::get_name() const { return "Android"; } +bool DisplayServerAndroid::tts_is_speaking() const { + return TTS_Android::is_speaking(); +} + +bool DisplayServerAndroid::tts_is_paused() const { + return TTS_Android::is_paused(); +} + +Array DisplayServerAndroid::tts_get_voices() const { + return TTS_Android::get_voices(); +} + +void DisplayServerAndroid::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + TTS_Android::speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerAndroid::tts_pause() { + TTS_Android::pause(); +} + +void DisplayServerAndroid::tts_resume() { + TTS_Android::resume(); +} + +void DisplayServerAndroid::tts_stop() { + TTS_Android::stop(); +} + void DisplayServerAndroid::clipboard_set(const String &p_text) { GodotJavaWrapper *godot_java = OS_Android::get_singleton()->get_godot_java(); ERR_FAIL_COND(!godot_java); diff --git a/platform/android/display_server_android.h b/platform/android/display_server_android.h index 1d268bbcfd..2604214ac0 100644 --- a/platform/android/display_server_android.h +++ b/platform/android/display_server_android.h @@ -91,6 +91,15 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + virtual void clipboard_set(const String &p_text) override; virtual String clipboard_get() const override; virtual bool clipboard_has() const override; diff --git a/platform/android/java/lib/src/org/godotengine/godot/Godot.java b/platform/android/java/lib/src/org/godotengine/godot/Godot.java index 6e597163ab..351e2e60a0 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/Godot.java +++ b/platform/android/java/lib/src/org/godotengine/godot/Godot.java @@ -36,6 +36,7 @@ import static android.content.Context.WINDOW_SERVICE; import org.godotengine.godot.input.GodotEditText; import org.godotengine.godot.plugin.GodotPlugin; import org.godotengine.godot.plugin.GodotPluginRegistry; +import org.godotengine.godot.tts.GodotTTS; import org.godotengine.godot.utils.GodotNetUtils; import org.godotengine.godot.utils.PermissionsUtil; import org.godotengine.godot.xr.XRMode; @@ -165,6 +166,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC public static GodotIO io; public static GodotNetUtils netUtils; + public static GodotTTS tts; public interface ResultCallback { void callback(int requestCode, int resultCode, Intent data); @@ -458,6 +460,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC io = new GodotIO(activity); GodotLib.io = io; netUtils = new GodotNetUtils(activity); + tts = new GodotTTS(activity); mSensorManager = (SensorManager)activity.getSystemService(Context.SENSOR_SERVICE); mAccelerometer = mSensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER); mSensorManager.registerListener(this, mAccelerometer, SensorManager.SENSOR_DELAY_GAME); diff --git a/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java b/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java index 253a51b83c..1f8f8c82a6 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java +++ b/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java @@ -92,6 +92,11 @@ public class GodotLib { public static native boolean step(); /** + * TTS callback. + */ + public static native void ttsCallback(int event, int id, int pos); + + /** * Forward touch events from the main thread to the GL thread. */ public static native void touch(int inputDevice, int event, int pointer, int pointerCount, float[] positions); diff --git a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java new file mode 100644 index 0000000000..2239ddac8e --- /dev/null +++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java @@ -0,0 +1,298 @@ +/*************************************************************************/ +/* GodotTTS.java */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +package org.godotengine.godot.tts; + +import org.godotengine.godot.GodotLib; + +import android.app.Activity; +import android.os.Bundle; +import android.speech.tts.TextToSpeech; +import android.speech.tts.UtteranceProgressListener; +import android.speech.tts.Voice; + +import androidx.annotation.Keep; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Set; + +/** + * Wrapper for Android Text to Speech API and custom utterance query implementation. + * <p> + * A [GodotTTS] provides the following features: + * <p> + * <ul> + * <li>Access to the Android Text to Speech API. + * <li>Utterance pause / resume functions, unsupported by Android TTS API. + * </ul> + */ +@Keep +public class GodotTTS extends UtteranceProgressListener { + // Note: These constants must be in sync with DisplayServer::TTSUtteranceEvent enum from "servers/display_server.h". + final private static int EVENT_START = 0; + final private static int EVENT_END = 1; + final private static int EVENT_CANCEL = 2; + final private static int EVENT_BOUNDARY = 3; + + final private TextToSpeech synth; + final private LinkedList<GodotUtterance> queue; + final private Object lock = new Object(); + private GodotUtterance lastUtterance; + + private boolean speaking; + private boolean paused; + + public GodotTTS(Activity p_activity) { + synth = new TextToSpeech(p_activity, null); + queue = new LinkedList<GodotUtterance>(); + + synth.setOnUtteranceProgressListener(this); + } + + private void updateTTS() { + if (!speaking && queue.size() > 0) { + int mode = TextToSpeech.QUEUE_FLUSH; + GodotUtterance message = queue.pollFirst(); + + Set<Voice> voices = synth.getVoices(); + for (Voice v : voices) { + if (v.getName().equals(message.voice)) { + synth.setVoice(v); + break; + } + } + synth.setPitch(message.pitch); + synth.setSpeechRate(message.rate); + + Bundle params = new Bundle(); + params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, message.volume / 100.f); + + lastUtterance = message; + lastUtterance.start = 0; + lastUtterance.offset = 0; + paused = false; + + synth.speak(message.text, mode, params, String.valueOf(message.id)); + speaking = true; + } + } + + /** + * Called by TTS engine when the TTS service is about to speak the specified range. + */ + @Override + public void onRangeStart(String utteranceId, int start, int end, int frame) { + synchronized (lock) { + if (lastUtterance != null && Integer.parseInt(utteranceId) == lastUtterance.id) { + lastUtterance.offset = start; + GodotLib.ttsCallback(EVENT_BOUNDARY, lastUtterance.id, start + lastUtterance.start); + } + } + } + + /** + * Called by TTS engine when an utterance was canceled in progress. + */ + @Override + public void onStop(String utteranceId, boolean interrupted) { + synchronized (lock) { + if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { + GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); + speaking = false; + updateTTS(); + } + } + } + + /** + * Called by TTS engine when an utterance has begun to be spoken.. + */ + @Override + public void onStart(String utteranceId) { + synchronized (lock) { + if (lastUtterance != null && lastUtterance.start == 0 && Integer.parseInt(utteranceId) == lastUtterance.id) { + GodotLib.ttsCallback(EVENT_START, lastUtterance.id, 0); + } + } + } + + /** + * Called by TTS engine when an utterance was successfully finished. + */ + @Override + public void onDone(String utteranceId) { + synchronized (lock) { + if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { + GodotLib.ttsCallback(EVENT_END, lastUtterance.id, 0); + speaking = false; + updateTTS(); + } + } + } + + /** + * Called by TTS engine when an error has occurred during processing. + */ + @Override + public void onError(String utteranceId, int errorCode) { + synchronized (lock) { + if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { + GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); + speaking = false; + updateTTS(); + } + } + } + + /** + * Called by TTS engine when an error has occurred during processing (pre API level 21 version). + */ + @Override + public void onError(String utteranceId) { + synchronized (lock) { + if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { + GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); + speaking = false; + updateTTS(); + } + } + } + + /** + * Adds an utterance to the queue. + */ + public void speak(String text, String voice, int volume, float pitch, float rate, int utterance_id, boolean interrupt) { + synchronized (lock) { + GodotUtterance message = new GodotUtterance(text, voice, volume, pitch, rate, utterance_id); + queue.addLast(message); + + if (isPaused()) { + resumeSpeaking(); + } else { + updateTTS(); + } + } + } + + /** + * Puts the synthesizer into a paused state. + */ + public void pauseSpeaking() { + synchronized (lock) { + if (!paused) { + paused = true; + synth.stop(); + } + } + } + + /** + * Resumes the synthesizer if it was paused. + */ + public void resumeSpeaking() { + synchronized (lock) { + if (lastUtterance != null && paused) { + int mode = TextToSpeech.QUEUE_FLUSH; + + Set<Voice> voices = synth.getVoices(); + for (Voice v : voices) { + if (v.getName().equals(lastUtterance.voice)) { + synth.setVoice(v); + break; + } + } + synth.setPitch(lastUtterance.pitch); + synth.setSpeechRate(lastUtterance.rate); + + Bundle params = new Bundle(); + params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, lastUtterance.volume / 100.f); + + lastUtterance.start = lastUtterance.offset; + lastUtterance.offset = 0; + paused = false; + + synth.speak(lastUtterance.text.substring(lastUtterance.start), mode, params, String.valueOf(lastUtterance.id)); + speaking = true; + } else { + paused = false; + } + } + } + + /** + * Stops synthesis in progress and removes all utterances from the queue. + */ + public void stopSpeaking() { + synchronized (lock) { + for (GodotUtterance u : queue) { + GodotLib.ttsCallback(EVENT_CANCEL, u.id, 0); + } + queue.clear(); + + if (lastUtterance != null) { + GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); + } + lastUtterance = null; + + paused = false; + speaking = false; + + synth.stop(); + } + } + + /** + * Returns voice information. + */ + public String[] getVoices() { + Set<Voice> voices = synth.getVoices(); + String[] list = new String[voices.size()]; + int i = 0; + for (Voice v : voices) { + list[i++] = v.getLocale().toString() + ";" + v.getName(); + } + return list; + } + + /** + * Returns true if the synthesizer is generating speech, or have utterance waiting in the queue. + */ + public boolean isSpeaking() { + return speaking; + } + + /** + * Returns true if the synthesizer is in a paused state. + */ + public boolean isPaused() { + return paused; + } +} diff --git a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java new file mode 100644 index 0000000000..bde37e7315 --- /dev/null +++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java @@ -0,0 +1,55 @@ +/*************************************************************************/ +/* GodotUtterance.java */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +package org.godotengine.godot.tts; + +/** + * A speech request for GodotTTS. + */ +class GodotUtterance { + final String text; + final String voice; + final int volume; + final float pitch; + final float rate; + final int id; + + int offset = -1; + int start = 0; + + GodotUtterance(String text, String voice, int volume, float pitch, float rate, int id) { + this.text = text; + this.voice = voice; + this.volume = volume; + this.pitch = pitch; + this.rate = rate; + this.id = id; + } +} diff --git a/platform/android/java_godot_lib_jni.cpp b/platform/android/java_godot_lib_jni.cpp index 5e0a9d967b..8ad72b499e 100644 --- a/platform/android/java_godot_lib_jni.cpp +++ b/platform/android/java_godot_lib_jni.cpp @@ -49,6 +49,7 @@ #include "os_android.h" #include "string_android.h" #include "thread_jandroid.h" +#include "tts_android.h" #include <android/input.h> #include <unistd.h> @@ -96,6 +97,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *en DirAccessJAndroid::setup(godot_io_java->get_instance()); NetSocketAndroid::setup(godot_java->get_member_object("netUtils", "Lorg/godotengine/godot/utils/GodotNetUtils;", env)); + TTS_Android::setup(godot_java->get_member_object("tts", "Lorg/godotengine/godot/tts/GodotTTS;", env)); os_android = new OS_Android(godot_java, godot_io_java, p_use_apk_expansion); @@ -213,6 +215,10 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jcl } } +JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos) { + TTS_Android::_java_utterance_callback(event, id, pos); +} + JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz) { if (step.get() == -1) { return true; diff --git a/platform/android/java_godot_lib_jni.h b/platform/android/java_godot_lib_jni.h index e686ee5c09..4f2195942c 100644 --- a/platform/android/java_godot_lib_jni.h +++ b/platform/android/java_godot_lib_jni.h @@ -43,6 +43,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setup(JNIEnv *env, jc JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_resize(JNIEnv *env, jclass clazz, jobject p_surface, jint p_width, jint p_height); JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_newcontext(JNIEnv *env, jclass clazz, jobject p_surface); JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz); +JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos); JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jclass clazz); void touch_preprocessing(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions, jint buttons_mask = 0, jfloat vertical_factor = 0, jfloat horizontal_factor = 0); JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_touch__IIII_3F(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions); diff --git a/platform/android/tts_android.cpp b/platform/android/tts_android.cpp new file mode 100644 index 0000000000..528878f14e --- /dev/null +++ b/platform/android/tts_android.cpp @@ -0,0 +1,189 @@ +/*************************************************************************/ +/* tts_android.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_android.h" + +#include "java_godot_wrapper.h" +#include "os_android.h" +#include "string_android.h" +#include "thread_jandroid.h" + +jobject TTS_Android::tts = 0; +jclass TTS_Android::cls = 0; + +jmethodID TTS_Android::_is_speaking = 0; +jmethodID TTS_Android::_is_paused = 0; +jmethodID TTS_Android::_get_voices = 0; +jmethodID TTS_Android::_speak = 0; +jmethodID TTS_Android::_pause_speaking = 0; +jmethodID TTS_Android::_resume_speaking = 0; +jmethodID TTS_Android::_stop_speaking = 0; + +Map<int, Char16String> TTS_Android::ids; + +void TTS_Android::setup(jobject p_tts) { + JNIEnv *env = get_jni_env(); + + tts = env->NewGlobalRef(p_tts); + + jclass c = env->GetObjectClass(tts); + cls = (jclass)env->NewGlobalRef(c); + + _is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z"); + _is_paused = env->GetMethodID(cls, "isPaused", "()Z"); + _get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;"); + _speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V"); + _pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V"); + _resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V"); + _stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V"); +} + +void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) { + if (ids.has(p_id)) { + int pos = 0; + if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) { + // Convert position from UTF-16 to UTF-32. + const Char16String &string = ids[p_id]; + for (int i = 0; i < MIN(p_pos, string.length()); i++) { + char16_t c = string[i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + } else if ((DisplayServer::TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) { + ids.erase(p_id); + } + DisplayServer::get_singleton()->tts_post_utterance_event((DisplayServer::TTSUtteranceEvent)p_event, p_id, pos); + } +} + +bool TTS_Android::is_speaking() { + if (_is_speaking) { + JNIEnv *env = get_jni_env(); + + ERR_FAIL_COND_V(env == nullptr, false); + return env->CallBooleanMethod(tts, _is_speaking); + } else { + return false; + } +} + +bool TTS_Android::is_paused() { + if (_is_paused) { + JNIEnv *env = get_jni_env(); + + ERR_FAIL_COND_V(env == nullptr, false); + return env->CallBooleanMethod(tts, _is_paused); + } else { + return false; + } +} + +Array TTS_Android::get_voices() { + Array list; + if (_get_voices) { + JNIEnv *env = get_jni_env(); + ERR_FAIL_COND_V(env == nullptr, list); + + jobject voices_object = env->CallObjectMethod(tts, _get_voices); + jobjectArray *arr = reinterpret_cast<jobjectArray *>(&voices_object); + + jsize len = env->GetArrayLength(*arr); + for (int i = 0; i < len; i++) { + jstring jStr = (jstring)env->GetObjectArrayElement(*arr, i); + String str = jstring_to_string(jStr, env); + Vector<String> tokens = str.split(";", true, 2); + if (tokens.size() == 2) { + Dictionary voice_d; + voice_d["name"] = tokens[1]; + voice_d["id"] = tokens[1]; + voice_d["language"] = tokens[0]; + list.push_back(voice_d); + } + env->DeleteLocalRef(jStr); + } + } + return list; +} + +void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + if (p_interrupt) { + stop(); + } + + if (p_text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + ids[p_utterance_id] = p_text.utf16(); + + if (_speak) { + JNIEnv *env = get_jni_env(); + ERR_FAIL_COND(env == nullptr); + + jstring jStrT = env->NewStringUTF(p_text.utf8().get_data()); + jstring jStrV = env->NewStringUTF(p_voice.utf8().get_data()); + env->CallVoidMethod(tts, _speak, jStrT, jStrV, CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, p_interrupt); + } +} + +void TTS_Android::pause() { + if (_pause_speaking) { + JNIEnv *env = get_jni_env(); + + ERR_FAIL_COND(env == nullptr); + env->CallVoidMethod(tts, _pause_speaking); + } +} + +void TTS_Android::resume() { + if (_resume_speaking) { + JNIEnv *env = get_jni_env(); + + ERR_FAIL_COND(env == nullptr); + env->CallVoidMethod(tts, _resume_speaking); + } +} + +void TTS_Android::stop() { + for (Map<int, Char16String>::Element *E = ids.front(); E; E = E->next()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key()); + } + ids.clear(); + + if (_stop_speaking) { + JNIEnv *env = get_jni_env(); + + ERR_FAIL_COND(env == nullptr); + env->CallVoidMethod(tts, _stop_speaking); + } +} diff --git a/platform/android/tts_android.h b/platform/android/tts_android.h new file mode 100644 index 0000000000..efeed94856 --- /dev/null +++ b/platform/android/tts_android.h @@ -0,0 +1,67 @@ +/*************************************************************************/ +/* tts_android.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_ANDROID_H +#define TTS_ANDROID_H + +#include "core/string/ustring.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <jni.h> + +class TTS_Android { + static jobject tts; + static jclass cls; + + static jmethodID _is_speaking; + static jmethodID _is_paused; + static jmethodID _get_voices; + static jmethodID _speak; + static jmethodID _pause_speaking; + static jmethodID _resume_speaking; + static jmethodID _stop_speaking; + + static Map<int, Char16String> ids; + +public: + static void setup(jobject p_tts); + static void _java_utterance_callback(int p_event, int p_id, int p_pos); + + static bool is_speaking(); + static bool is_paused(); + static Array get_voices(); + static void speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt); + static void pause(); + static void resume(); + static void stop(); +}; + +#endif // TTS_ANDROID_H diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 58b574a72f..5e10bf5646 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -13,6 +13,7 @@ iphone_lib = [ "display_server_iphone.mm", "joypad_iphone.mm", "godot_view.mm", + "tts_ios.mm", "display_layer.mm", "godot_app_delegate.m", "godot_view_renderer.mm", diff --git a/platform/iphone/display_server_iphone.h b/platform/iphone/display_server_iphone.h index 7441550f67..6ae190b81a 100644 --- a/platform/iphone/display_server_iphone.h +++ b/platform/iphone/display_server_iphone.h @@ -58,6 +58,8 @@ class DisplayServerIPhone : public DisplayServer { RenderingDeviceVulkan *rendering_device_vulkan = nullptr; #endif + id tts = nullptr; + DisplayServer::ScreenOrientation screen_orientation; ObjectID window_attached_instance_id; @@ -123,6 +125,15 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + virtual int get_screen_count() const override; virtual Point2i screen_get_position(int p_screen = SCREEN_OF_MAIN_WINDOW) const override; virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override; diff --git a/platform/iphone/display_server_iphone.mm b/platform/iphone/display_server_iphone.mm index a0f8daf5a0..ec58ab195a 100644 --- a/platform/iphone/display_server_iphone.mm +++ b/platform/iphone/display_server_iphone.mm @@ -38,6 +38,7 @@ #include "ios.h" #import "keyboard_input_view.h" #include "os_iphone.h" +#include "tts_ios.h" #import "view_controller.h" #import <Foundation/Foundation.h> @@ -52,6 +53,9 @@ DisplayServerIPhone *DisplayServerIPhone::get_singleton() { DisplayServerIPhone::DisplayServerIPhone(const String &p_rendering_driver, WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error) { rendering_driver = p_rendering_driver; + // Init TTS + tts = [[TTS_IOS alloc] init]; + #if defined(GLES3_ENABLED) // FIXME: Add support for both OpenGL and Vulkan when OpenGL is implemented // again, @@ -310,6 +314,7 @@ bool DisplayServerIPhone::has_feature(Feature p_feature) const { case FEATURE_ORIENTATION: case FEATURE_TOUCHSCREEN: case FEATURE_VIRTUAL_KEYBOARD: + case FEATURE_TEXT_TO_SPEECH: return true; default: return false; @@ -320,6 +325,41 @@ String DisplayServerIPhone::get_name() const { return "iPhone"; } +bool DisplayServerIPhone::tts_is_speaking() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isSpeaking]; +} + +bool DisplayServerIPhone::tts_is_paused() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isPaused]; +} + +Array DisplayServerIPhone::tts_get_voices() const { + ERR_FAIL_COND_V(!tts, Array()); + return [tts getVoices]; +} + +void DisplayServerIPhone::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!tts); + [tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt]; +} + +void DisplayServerIPhone::tts_pause() { + ERR_FAIL_COND(!tts); + [tts pauseSpeaking]; +} + +void DisplayServerIPhone::tts_resume() { + ERR_FAIL_COND(!tts); + [tts resumeSpeaking]; +} + +void DisplayServerIPhone::tts_stop() { + ERR_FAIL_COND(!tts); + [tts stopSpeaking]; +} + int DisplayServerIPhone::get_screen_count() const { return 1; } diff --git a/platform/iphone/tts_ios.h b/platform/iphone/tts_ios.h new file mode 100644 index 0000000000..c7defeb98f --- /dev/null +++ b/platform/iphone/tts_ios.h @@ -0,0 +1,59 @@ +/*************************************************************************/ +/* tts_ios.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_IOS_H +#define TTS_IOS_H + +#include <AVFAudio/AVSpeechSynthesis.h> + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +@interface TTS_IOS : NSObject <AVSpeechSynthesizerDelegate> { + bool speaking; + Map<id, int> ids; + + AVSpeechSynthesizer *av_synth; + List<DisplayServer::TTSUtterance> queue; +} + +- (void)pauseSpeaking; +- (void)resumeSpeaking; +- (void)stopSpeaking; +- (bool)isSpeaking; +- (bool)isPaused; +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt; +- (Array)getVoices; +@end + +#endif // TTS_IOS_H diff --git a/platform/iphone/tts_ios.mm b/platform/iphone/tts_ios.mm new file mode 100644 index 0000000000..a079d02add --- /dev/null +++ b/platform/iphone/tts_ios.mm @@ -0,0 +1,164 @@ +/*************************************************************************/ +/* tts_ios.mm */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_ios.h" + +@implementation TTS_IOS + +- (id)init { + self = [super init]; + self->speaking = false; + self->av_synth = [[AVSpeechSynthesizer alloc] init]; + [self->av_synth setDelegate:self]; + print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized."); + return self; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance { + NSString *string = [utterance speechString]; + + // Convert from UTF-16 to UTF-32 position. + int pos = 0; + for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { + unichar c = [string characterAtIndex:i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos); +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +- (void)update { + if (!speaking && queue.size() > 0) { + DisplayServer::TTSUtterance &message = queue.front()->get(); + + AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; + [new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]]; + if (message.rate > 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)]; + } else if (message.rate < 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)]; + } + [new_utterance setPitchMultiplier:message.pitch]; + [new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + + ids[new_utterance] = message.id; + [av_synth speakUtterance:new_utterance]; + + queue.pop_front(); + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); + speaking = true; + } +} + +- (void)pauseSpeaking { + [av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +} + +- (void)resumeSpeaking { + [av_synth continueSpeaking]; +} + +- (void)stopSpeaking { + for (DisplayServer::TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + [av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate]; + speaking = false; +} + +- (bool)isSpeaking { + return speaking || (queue.size() > 0); +} + +- (bool)isPaused { + return [av_synth isPaused]; +} + +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt { + if (interrupt) { + [self stopSpeaking]; + } + + if (text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id); + return; + } + + DisplayServer::TTSUtterance message; + message.text = text; + message.voice = voice; + message.volume = CLAMP(volume, 0, 100); + message.pitch = CLAMP(pitch, 0.f, 2.f); + message.rate = CLAMP(rate, 0.1f, 10.f); + message.id = utterance_id; + queue.push_back(message); + + if ([self isPaused]) { + [self resumeSpeaking]; + } else { + [self update]; + } +} + +- (Array)getVoices { + Array list; + for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) { + NSString *voiceIdentifierString = [voice identifier]; + NSString *voiceLocaleIdentifier = [voice language]; + NSString *voiceName = [voice name]; + Dictionary voice_d; + voice_d["name"] = String::utf8([voiceName UTF8String]); + voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]); + voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]); + list.push_back(voice_d); + } + return list; +} + +@end diff --git a/platform/javascript/display_server_javascript.cpp b/platform/javascript/display_server_javascript.cpp index a38040922d..cc77c8fcd5 100644 --- a/platform/javascript/display_server_javascript.cpp +++ b/platform/javascript/display_server_javascript.cpp @@ -274,6 +274,90 @@ const char *DisplayServerJavaScript::godot2dom_cursor(DisplayServer::CursorShape } } +bool DisplayServerJavaScript::tts_is_speaking() const { + return godot_js_tts_is_speaking(); +} + +bool DisplayServerJavaScript::tts_is_paused() const { + return godot_js_tts_is_paused(); +} + +void DisplayServerJavaScript::update_voices_callback(int p_size, const char **p_voice) { + get_singleton()->voices.clear(); + for (int i = 0; i < p_size; i++) { + Vector<String> tokens = String::utf8(p_voice[i]).split(";", true, 2); + if (tokens.size() == 2) { + Dictionary voice_d; + voice_d["name"] = tokens[1]; + voice_d["id"] = tokens[1]; + voice_d["language"] = tokens[0]; + get_singleton()->voices.push_back(voice_d); + } + } +} + +Array DisplayServerJavaScript::tts_get_voices() const { + godot_js_tts_get_voices(update_voices_callback); + return voices; +} + +void DisplayServerJavaScript::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + if (p_interrupt) { + tts_stop(); + } + + if (p_text.is_empty()) { + tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + CharString string = p_text.utf8(); + utterance_ids[p_utterance_id] = string; + + godot_js_tts_speak(string.get_data(), p_voice.utf8().get_data(), CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, DisplayServerJavaScript::_js_utterance_callback); +} + +void DisplayServerJavaScript::tts_pause() { + godot_js_tts_pause(); +} + +void DisplayServerJavaScript::tts_resume() { + godot_js_tts_resume(); +} + +void DisplayServerJavaScript::tts_stop() { + for (Map<int, CharString>::Element *E = utterance_ids.front(); E; E = E->next()) { + tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key()); + } + utterance_ids.clear(); + godot_js_tts_stop(); +} + +void DisplayServerJavaScript::_js_utterance_callback(int p_event, int p_id, int p_pos) { + DisplayServerJavaScript *ds = (DisplayServerJavaScript *)DisplayServer::get_singleton(); + if (ds->utterance_ids.has(p_id)) { + int pos = 0; + if ((TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) { + // Convert position from UTF-8 to UTF-32. + const CharString &string = ds->utterance_ids[p_id]; + for (int i = 0; i < MIN(p_pos, string.length()); i++) { + uint8_t c = string[i]; + if ((c & 0xe0) == 0xc0) { + i += 1; + } else if ((c & 0xf0) == 0xe0) { + i += 2; + } else if ((c & 0xf8) == 0xf0) { + i += 3; + } + pos++; + } + } else if ((TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) { + ds->utterance_ids.erase(p_id); + } + ds->tts_post_utterance_event((TTSUtteranceEvent)p_event, p_id, pos); + } +} + void DisplayServerJavaScript::cursor_set_shape(CursorShape p_shape) { ERR_FAIL_INDEX(p_shape, CURSOR_MAX); if (cursor_shape == p_shape) { @@ -755,6 +839,8 @@ bool DisplayServerJavaScript::has_feature(Feature p_feature) const { //case FEATURE_ORIENTATION: case FEATURE_VIRTUAL_KEYBOARD: return godot_js_display_vk_available() != 0; + case FEATURE_TEXT_TO_SPEECH: + return godot_js_display_tts_available() != 0; default: return false; } diff --git a/platform/javascript/display_server_javascript.h b/platform/javascript/display_server_javascript.h index b50956d91c..bbd0206087 100644 --- a/platform/javascript/display_server_javascript.h +++ b/platform/javascript/display_server_javascript.h @@ -55,6 +55,8 @@ private: EMSCRIPTEN_WEBGL_CONTEXT_HANDLE webgl_ctx = 0; #endif + Map<int, CharString> utterance_ids; + WindowMode window_mode = WINDOW_MODE_WINDOWED; ObjectID window_attached_instance_id = {}; @@ -66,6 +68,8 @@ private: String clipboard; Point2 touches[32]; + Array voices; + char canvas_id[256] = { 0 }; bool cursor_inside_canvas = true; CursorShape cursor_shape = CURSOR_ARROW; @@ -89,6 +93,7 @@ private: static void vk_input_text_callback(const char *p_text, int p_cursor); static void gamepad_callback(int p_index, int p_connected, const char *p_id, const char *p_guid); void process_joypads(); + static void _js_utterance_callback(int p_event, int p_id, int p_pos); static Vector<String> get_rendering_drivers_func(); static DisplayServer *create_func(const String &p_rendering_driver, WindowMode p_window_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error); @@ -97,6 +102,7 @@ private: static void request_quit_callback(); static void window_blur_callback(); + static void update_voices_callback(int p_size, const char **p_voice); static void update_clipboard_callback(const char *p_text); static void send_window_event_callback(int p_notification); static void drop_files_js_callback(char **p_filev, int p_filec); @@ -115,6 +121,16 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; + // tts + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + // cursor virtual void cursor_set_shape(CursorShape p_shape) override; virtual CursorShape cursor_get_shape() const override; diff --git a/platform/javascript/godot_js.h b/platform/javascript/godot_js.h index 2cb5c3025c..1a383c9799 100644 --- a/platform/javascript/godot_js.h +++ b/platform/javascript/godot_js.h @@ -67,6 +67,15 @@ extern int godot_js_input_gamepad_sample_get(int p_idx, float r_btns[16], int32_ extern void godot_js_input_paste_cb(void (*p_callback)(const char *p_text)); extern void godot_js_input_drop_files_cb(void (*p_callback)(char **p_filev, int p_filec)); +// TTS +extern int godot_js_tts_is_speaking(); +extern int godot_js_tts_is_paused(); +extern int godot_js_tts_get_voices(void (*p_callback)(int p_size, const char **p_voices)); +extern void godot_js_tts_speak(const char *p_text, const char *p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, void (*p_callback)(int p_event, int p_id, int p_pos)); +extern void godot_js_tts_pause(); +extern void godot_js_tts_resume(); +extern void godot_js_tts_stop(); + // Display extern int godot_js_display_screen_dpi_get(); extern double godot_js_display_pixel_ratio_get(); @@ -109,6 +118,7 @@ extern void godot_js_display_notification_cb(void (*p_callback)(int p_notificati // Display Virtual Keyboard extern int godot_js_display_vk_available(); +extern int godot_js_display_tts_available(); extern void godot_js_display_vk_cb(void (*p_input)(const char *p_text, int p_cursor)); extern void godot_js_display_vk_show(const char *p_text, int p_multiline, int p_start, int p_end); extern void godot_js_display_vk_hide(); diff --git a/platform/javascript/js/libs/library_godot_display.js b/platform/javascript/js/libs/library_godot_display.js index 2bdf4e56ad..54d48643db 100644 --- a/platform/javascript/js/libs/library_godot_display.js +++ b/platform/javascript/js/libs/library_godot_display.js @@ -330,6 +330,91 @@ const GodotDisplay = { return 0; }, + godot_js_tts_is_speaking__sig: 'i', + godot_js_tts_is_speaking: function () { + return window.speechSynthesis.speaking; + }, + + godot_js_tts_is_paused__sig: 'i', + godot_js_tts_is_paused: function () { + return window.speechSynthesis.paused; + }, + + godot_js_tts_get_voices__sig: 'vi', + godot_js_tts_get_voices: function (p_callback) { + const func = GodotRuntime.get_func(p_callback); + try { + const arr = []; + const voices = window.speechSynthesis.getVoices(); + for (let i = 0; i < voices.length; i++) { + arr.push(`${voices[i].lang};${voices[i].name}`); + } + const c_ptr = GodotRuntime.allocStringArray(arr); + func(arr.length, c_ptr); + GodotRuntime.freeStringArray(c_ptr, arr.length); + } catch (e) { + // Fail graciously. + } + }, + + godot_js_tts_speak__sig: 'viiiffii', + godot_js_tts_speak: function (p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_callback) { + const func = GodotRuntime.get_func(p_callback); + + function listener_end(evt) { + evt.currentTarget.cb(1 /*TTS_UTTERANCE_ENDED*/, evt.currentTarget.id, 0); + } + + function listener_start(evt) { + evt.currentTarget.cb(0 /*TTS_UTTERANCE_STARTED*/, evt.currentTarget.id, 0); + } + + function listener_error(evt) { + evt.currentTarget.cb(2 /*TTS_UTTERANCE_CANCELED*/, evt.currentTarget.id, 0); + } + + function listener_bound(evt) { + evt.currentTarget.cb(3 /*TTS_UTTERANCE_BOUNDARY*/, evt.currentTarget.id, evt.charIndex); + } + + const utterance = new SpeechSynthesisUtterance(GodotRuntime.parseString(p_text)); + utterance.rate = p_rate; + utterance.pitch = p_pitch; + utterance.volume = p_volume / 100.0; + utterance.addEventListener('end', listener_end); + utterance.addEventListener('start', listener_start); + utterance.addEventListener('error', listener_error); + utterance.addEventListener('boundary', listener_bound); + utterance.id = p_utterance_id; + utterance.cb = func; + const voice = GodotRuntime.parseString(p_voice); + const voices = window.speechSynthesis.getVoices(); + for (let i = 0; i < voices.length; i++) { + if (voices[i].name === voice) { + utterance.voice = voices[i]; + break; + } + } + window.speechSynthesis.resume(); + window.speechSynthesis.speak(utterance); + }, + + godot_js_tts_pause__sig: 'v', + godot_js_tts_pause: function () { + window.speechSynthesis.pause(); + }, + + godot_js_tts_resume__sig: 'v', + godot_js_tts_resume: function () { + window.speechSynthesis.resume(); + }, + + godot_js_tts_stop__sig: 'v', + godot_js_tts_stop: function () { + window.speechSynthesis.cancel(); + window.speechSynthesis.resume(); + }, + godot_js_display_alert__sig: 'vi', godot_js_display_alert: function (p_text) { window.alert(GodotRuntime.parseString(p_text)); // eslint-disable-line no-alert @@ -625,6 +710,11 @@ const GodotDisplay = { return GodotDisplayVK.available(); }, + godot_js_display_tts_available__sig: 'i', + godot_js_display_tts_available: function () { + return 'speechSynthesis' in window; + }, + godot_js_display_vk_cb__sig: 'vi', godot_js_display_vk_cb: function (p_input_cb) { const input_cb = GodotRuntime.get_func(p_input_cb); diff --git a/platform/linuxbsd/SCsub b/platform/linuxbsd/SCsub index cec8706fbc..09a432eae2 100644 --- a/platform/linuxbsd/SCsub +++ b/platform/linuxbsd/SCsub @@ -20,6 +20,9 @@ if "x11" in env and env["x11"]: "key_mapping_x11.cpp", ] +if "speechd" in env and env["speechd"]: + common_linuxbsd.append(["speechd-so_wrap.c", "tts_linux.cpp"]) + if "vulkan" in env and env["vulkan"]: common_linuxbsd.append("vulkan_context_x11.cpp") diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index 2fba58fc53..f8d69b4ca9 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -75,6 +75,7 @@ def get_opts(): BoolVariable("use_msan", "Use LLVM compiler memory sanitizer (MSAN)", False), BoolVariable("pulseaudio", "Detect and use PulseAudio", True), BoolVariable("dbus", "Detect and use D-Bus to handle screensaver", True), + BoolVariable("speechd", "Detect and use Speech Dispatcher for Text-to-Speech support", True), BoolVariable("udev", "Use udev for gamepad connection callbacks", True), BoolVariable("x11", "Enable X11 display", True), BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), @@ -337,6 +338,13 @@ def configure(env): else: print("Warning: D-Bus development libraries not found. Disabling screensaver prevention.") + if env["speechd"]: + if os.system("pkg-config --exists speech-dispatcher") == 0: # 0 means found + env.Append(CPPDEFINES=["SPEECHD_ENABLED"]) + env.ParseConfig("pkg-config speech-dispatcher --cflags") # Only cflags, we dlopen the library. + else: + print("Warning: Speech Dispatcher development libraries not found. Disabling Text-to-Speech support.") + if platform.system() == "Linux": env.Append(CPPDEFINES=["JOYDEV_ENABLED"]) if env["udev"]: diff --git a/platform/linuxbsd/display_server_x11.cpp b/platform/linuxbsd/display_server_x11.cpp index a36fcabd91..027f8562eb 100644 --- a/platform/linuxbsd/display_server_x11.cpp +++ b/platform/linuxbsd/display_server_x11.cpp @@ -139,6 +139,7 @@ bool DisplayServerX11::has_feature(Feature p_feature) const { case FEATURE_KEEP_SCREEN_ON: #endif case FEATURE_CLIPBOARD_PRIMARY: + case FEATURE_TEXT_TO_SPEECH: return true; default: { } @@ -307,6 +308,45 @@ void DisplayServerX11::_flush_mouse_motion() { xi.relative_motion.y = 0; } +#ifdef SPEECHD_ENABLED + +bool DisplayServerX11::tts_is_speaking() const { + ERR_FAIL_COND_V(!tts, false); + return tts->is_speaking(); +} + +bool DisplayServerX11::tts_is_paused() const { + ERR_FAIL_COND_V(!tts, false); + return tts->is_paused(); +} + +Array DisplayServerX11::tts_get_voices() const { + ERR_FAIL_COND_V(!tts, Array()); + return tts->get_voices(); +} + +void DisplayServerX11::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!tts); + tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerX11::tts_pause() { + ERR_FAIL_COND(!tts); + tts->pause(); +} + +void DisplayServerX11::tts_resume() { + ERR_FAIL_COND(!tts); + tts->resume(); +} + +void DisplayServerX11::tts_stop() { + ERR_FAIL_COND(!tts); + tts->stop(); +} + +#endif + void DisplayServerX11::mouse_set_mode(MouseMode p_mode) { _THREAD_SAFE_METHOD_ @@ -4633,6 +4673,11 @@ DisplayServerX11::DisplayServerX11(const String &p_rendering_driver, WindowMode xdnd_finished = XInternAtom(x11_display, "XdndFinished", False); xdnd_selection = XInternAtom(x11_display, "XdndSelection", False); +#ifdef SPEECHD_ENABLED + // Init TTS + tts = memnew(TTS_Linux); +#endif + //!!!!!!!!!!!!!!!!!!!!!!!!!! //TODO - do Vulkan and OpenGL support checks, driver selection and fallback rendering_driver = p_rendering_driver; @@ -4985,6 +5030,10 @@ DisplayServerX11::~DisplayServerX11() { memfree(xmbstring); } +#ifdef SPEECHD_ENABLED + memdelete(tts); +#endif + #ifdef DBUS_ENABLED memdelete(screensaver); #endif diff --git a/platform/linuxbsd/display_server_x11.h b/platform/linuxbsd/display_server_x11.h index cd673d94d9..3d49886b94 100644 --- a/platform/linuxbsd/display_server_x11.h +++ b/platform/linuxbsd/display_server_x11.h @@ -46,6 +46,10 @@ #include "servers/rendering/renderer_compositor.h" #include "servers/rendering_server.h" +#if defined(SPEECHD_ENABLED) +#include "tts_linux.h" +#endif + #if defined(GLES3_ENABLED) #include "gl_manager_x11.h" #endif @@ -112,6 +116,10 @@ class DisplayServerX11 : public DisplayServer { bool keep_screen_on = false; #endif +#ifdef SPEECHD_ENABLED + TTS_Linux *tts = nullptr; +#endif + struct WindowData { Window x11_window; ::XIC xic; @@ -298,6 +306,17 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; +#ifdef SPEECHD_ENABLED + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; +#endif + virtual void mouse_set_mode(MouseMode p_mode) override; virtual MouseMode mouse_get_mode() const override; diff --git a/platform/linuxbsd/speechd-so_wrap.c b/platform/linuxbsd/speechd-so_wrap.c new file mode 100644 index 0000000000..749474e181 --- /dev/null +++ b/platform/linuxbsd/speechd-so_wrap.c @@ -0,0 +1,881 @@ +// This file is generated. Do not edit! +// see https://github.com/hpvb/dynload-wrapper for details +// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21 +// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c +// +#include <stdint.h> + +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd +#define spd_open spd_open_dylibloader_orig_speechd +#define spd_open2 spd_open2_dylibloader_orig_speechd +#define spd_close spd_close_dylibloader_orig_speechd +#define spd_say spd_say_dylibloader_orig_speechd +#define spd_sayf spd_sayf_dylibloader_orig_speechd +#define spd_stop spd_stop_dylibloader_orig_speechd +#define spd_stop_all spd_stop_all_dylibloader_orig_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd +#define spd_cancel spd_cancel_dylibloader_orig_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd +#define spd_pause spd_pause_dylibloader_orig_speechd +#define spd_pause_all spd_pause_all_dylibloader_orig_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd +#define spd_resume spd_resume_dylibloader_orig_speechd +#define spd_resume_all spd_resume_all_dylibloader_orig_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd +#define spd_key spd_key_dylibloader_orig_speechd +#define spd_char spd_char_dylibloader_orig_speechd +#define spd_wchar spd_wchar_dylibloader_orig_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd +#define spd_set_notification spd_set_notification_dylibloader_orig_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd +#define spd_set_volume spd_set_volume_dylibloader_orig_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd +#define spd_get_volume spd_get_volume_dylibloader_orig_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd +#define spd_set_language spd_set_language_dylibloader_orig_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd +#define spd_get_language spd_get_language_dylibloader_orig_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd +#define spd_list_modules spd_list_modules_dylibloader_orig_speechd +#define free_spd_modules free_spd_modules_dylibloader_orig_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd +#define spd_list_voices spd_list_voices_dylibloader_orig_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd +#define free_spd_voices free_spd_voices_dylibloader_orig_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd +#define spd_execute_command spd_execute_command_dylibloader_orig_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd +#define spd_send_data spd_send_data_dylibloader_orig_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd +#include <libspeechd.h> +#undef SPDConnectionAddress__free +#undef spd_get_default_address +#undef spd_open +#undef spd_open2 +#undef spd_close +#undef spd_say +#undef spd_sayf +#undef spd_stop +#undef spd_stop_all +#undef spd_stop_uid +#undef spd_cancel +#undef spd_cancel_all +#undef spd_cancel_uid +#undef spd_pause +#undef spd_pause_all +#undef spd_pause_uid +#undef spd_resume +#undef spd_resume_all +#undef spd_resume_uid +#undef spd_key +#undef spd_char +#undef spd_wchar +#undef spd_sound_icon +#undef spd_set_voice_type +#undef spd_set_voice_type_all +#undef spd_set_voice_type_uid +#undef spd_get_voice_type +#undef spd_set_synthesis_voice +#undef spd_set_synthesis_voice_all +#undef spd_set_synthesis_voice_uid +#undef spd_set_data_mode +#undef spd_set_notification_on +#undef spd_set_notification_off +#undef spd_set_notification +#undef spd_set_voice_rate +#undef spd_set_voice_rate_all +#undef spd_set_voice_rate_uid +#undef spd_get_voice_rate +#undef spd_set_voice_pitch +#undef spd_set_voice_pitch_all +#undef spd_set_voice_pitch_uid +#undef spd_get_voice_pitch +#undef spd_set_voice_pitch_range +#undef spd_set_voice_pitch_range_all +#undef spd_set_voice_pitch_range_uid +#undef spd_set_volume +#undef spd_set_volume_all +#undef spd_set_volume_uid +#undef spd_get_volume +#undef spd_set_punctuation +#undef spd_set_punctuation_all +#undef spd_set_punctuation_uid +#undef spd_set_capital_letters +#undef spd_set_capital_letters_all +#undef spd_set_capital_letters_uid +#undef spd_set_spelling +#undef spd_set_spelling_all +#undef spd_set_spelling_uid +#undef spd_set_language +#undef spd_set_language_all +#undef spd_set_language_uid +#undef spd_get_language +#undef spd_set_output_module +#undef spd_set_output_module_all +#undef spd_set_output_module_uid +#undef spd_get_message_list_fd +#undef spd_list_modules +#undef free_spd_modules +#undef spd_get_output_module +#undef spd_list_voices +#undef spd_list_synthesis_voices +#undef free_spd_voices +#undef spd_execute_command_with_list_reply +#undef spd_execute_command +#undef spd_execute_command_with_reply +#undef spd_execute_command_wo_mutex +#undef spd_send_data +#undef spd_send_data_wo_mutex +#include <dlfcn.h> +#include <stdio.h> +void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*); +SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**); +SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode); +SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**); +void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...); +int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t); +int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int); +SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode); +int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*); +int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int); +int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int); +int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int); +int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**); +char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*); +void (*free_spd_modules_dylibloader_wrapper_speechd)( char**); +char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*); +char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*); +SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*); +void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**); +char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*); +int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*); +int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**); +int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*); +char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +int initialize_speechd(int verbose) { + void *handle; + char *error; + handle = dlopen("libspeechd.so.2", RTLD_LAZY); + if (!handle) { + if (verbose) { + fprintf(stderr, "%s\n", dlerror()); + } + return(1); + } + dlerror(); +// SPDConnectionAddress__free + *(void **) (&SPDConnectionAddress__free_dylibloader_wrapper_speechd) = dlsym(handle, "SPDConnectionAddress__free"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_default_address + *(void **) (&spd_get_default_address_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_default_address"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_open + *(void **) (&spd_open_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_open2 + *(void **) (&spd_open2_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open2"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_close + *(void **) (&spd_close_dylibloader_wrapper_speechd) = dlsym(handle, "spd_close"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_say + *(void **) (&spd_say_dylibloader_wrapper_speechd) = dlsym(handle, "spd_say"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_sayf + *(void **) (&spd_sayf_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sayf"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_stop + *(void **) (&spd_stop_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_stop_all + *(void **) (&spd_stop_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_stop_uid + *(void **) (&spd_stop_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_cancel + *(void **) (&spd_cancel_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_cancel_all + *(void **) (&spd_cancel_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_cancel_uid + *(void **) (&spd_cancel_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_pause + *(void **) (&spd_pause_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_pause_all + *(void **) (&spd_pause_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_pause_uid + *(void **) (&spd_pause_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_resume + *(void **) (&spd_resume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_resume_all + *(void **) (&spd_resume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_resume_uid + *(void **) (&spd_resume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_key + *(void **) (&spd_key_dylibloader_wrapper_speechd) = dlsym(handle, "spd_key"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_char + *(void **) (&spd_char_dylibloader_wrapper_speechd) = dlsym(handle, "spd_char"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_wchar + *(void **) (&spd_wchar_dylibloader_wrapper_speechd) = dlsym(handle, "spd_wchar"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_sound_icon + *(void **) (&spd_sound_icon_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sound_icon"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_type + *(void **) (&spd_set_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_type_all + *(void **) (&spd_set_voice_type_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_type_uid + *(void **) (&spd_set_voice_type_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_voice_type + *(void **) (&spd_get_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_type"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_synthesis_voice + *(void **) (&spd_set_synthesis_voice_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_synthesis_voice_all + *(void **) (&spd_set_synthesis_voice_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_synthesis_voice_uid + *(void **) (&spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_data_mode + *(void **) (&spd_set_data_mode_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_data_mode"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_notification_on + *(void **) (&spd_set_notification_on_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_on"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_notification_off + *(void **) (&spd_set_notification_off_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_off"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_notification + *(void **) (&spd_set_notification_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_rate + *(void **) (&spd_set_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_rate_all + *(void **) (&spd_set_voice_rate_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_rate_uid + *(void **) (&spd_set_voice_rate_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_voice_rate + *(void **) (&spd_get_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_rate"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch + *(void **) (&spd_set_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch_all + *(void **) (&spd_set_voice_pitch_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch_uid + *(void **) (&spd_set_voice_pitch_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_voice_pitch + *(void **) (&spd_get_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_pitch"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch_range + *(void **) (&spd_set_voice_pitch_range_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch_range_all + *(void **) (&spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_voice_pitch_range_uid + *(void **) (&spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_volume + *(void **) (&spd_set_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_volume_all + *(void **) (&spd_set_volume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_volume_uid + *(void **) (&spd_set_volume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_volume + *(void **) (&spd_get_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_volume"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_punctuation + *(void **) (&spd_set_punctuation_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_punctuation_all + *(void **) (&spd_set_punctuation_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_punctuation_uid + *(void **) (&spd_set_punctuation_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_capital_letters + *(void **) (&spd_set_capital_letters_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_capital_letters_all + *(void **) (&spd_set_capital_letters_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_capital_letters_uid + *(void **) (&spd_set_capital_letters_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_spelling + *(void **) (&spd_set_spelling_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_spelling_all + *(void **) (&spd_set_spelling_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_spelling_uid + *(void **) (&spd_set_spelling_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_language + *(void **) (&spd_set_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_language_all + *(void **) (&spd_set_language_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_language_uid + *(void **) (&spd_set_language_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_language + *(void **) (&spd_get_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_language"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_output_module + *(void **) (&spd_set_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_output_module_all + *(void **) (&spd_set_output_module_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_all"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_set_output_module_uid + *(void **) (&spd_set_output_module_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_uid"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_message_list_fd + *(void **) (&spd_get_message_list_fd_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_message_list_fd"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_list_modules + *(void **) (&spd_list_modules_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_modules"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// free_spd_modules + *(void **) (&free_spd_modules_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_modules"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_get_output_module + *(void **) (&spd_get_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_output_module"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_list_voices + *(void **) (&spd_list_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_voices"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_list_synthesis_voices + *(void **) (&spd_list_synthesis_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_synthesis_voices"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// free_spd_voices + *(void **) (&free_spd_voices_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_voices"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_execute_command_with_list_reply + *(void **) (&spd_execute_command_with_list_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_list_reply"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_execute_command + *(void **) (&spd_execute_command_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_execute_command_with_reply + *(void **) (&spd_execute_command_with_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_reply"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_execute_command_wo_mutex + *(void **) (&spd_execute_command_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_wo_mutex"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_send_data + *(void **) (&spd_send_data_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +// spd_send_data_wo_mutex + *(void **) (&spd_send_data_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data_wo_mutex"); + if (verbose) { + error = dlerror(); + if (error != NULL) { + fprintf(stderr, "%s\n", error); + } + } +return 0; +} diff --git a/platform/linuxbsd/speechd-so_wrap.h b/platform/linuxbsd/speechd-so_wrap.h new file mode 100644 index 0000000000..8e1c053348 --- /dev/null +++ b/platform/linuxbsd/speechd-so_wrap.h @@ -0,0 +1,330 @@ +#ifndef DYLIBLOAD_WRAPPER_SPEECHD +#define DYLIBLOAD_WRAPPER_SPEECHD +// This file is generated. Do not edit! +// see https://github.com/hpvb/dynload-wrapper for details +// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21 +// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c +// +#include <stdint.h> + +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd +#define spd_open spd_open_dylibloader_orig_speechd +#define spd_open2 spd_open2_dylibloader_orig_speechd +#define spd_close spd_close_dylibloader_orig_speechd +#define spd_say spd_say_dylibloader_orig_speechd +#define spd_sayf spd_sayf_dylibloader_orig_speechd +#define spd_stop spd_stop_dylibloader_orig_speechd +#define spd_stop_all spd_stop_all_dylibloader_orig_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd +#define spd_cancel spd_cancel_dylibloader_orig_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd +#define spd_pause spd_pause_dylibloader_orig_speechd +#define spd_pause_all spd_pause_all_dylibloader_orig_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd +#define spd_resume spd_resume_dylibloader_orig_speechd +#define spd_resume_all spd_resume_all_dylibloader_orig_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd +#define spd_key spd_key_dylibloader_orig_speechd +#define spd_char spd_char_dylibloader_orig_speechd +#define spd_wchar spd_wchar_dylibloader_orig_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd +#define spd_set_notification spd_set_notification_dylibloader_orig_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd +#define spd_set_volume spd_set_volume_dylibloader_orig_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd +#define spd_get_volume spd_get_volume_dylibloader_orig_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd +#define spd_set_language spd_set_language_dylibloader_orig_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd +#define spd_get_language spd_get_language_dylibloader_orig_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd +#define spd_list_modules spd_list_modules_dylibloader_orig_speechd +#define free_spd_modules free_spd_modules_dylibloader_orig_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd +#define spd_list_voices spd_list_voices_dylibloader_orig_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd +#define free_spd_voices free_spd_voices_dylibloader_orig_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd +#define spd_execute_command spd_execute_command_dylibloader_orig_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd +#define spd_send_data spd_send_data_dylibloader_orig_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd +#include <libspeechd.h> +#undef SPDConnectionAddress__free +#undef spd_get_default_address +#undef spd_open +#undef spd_open2 +#undef spd_close +#undef spd_say +#undef spd_sayf +#undef spd_stop +#undef spd_stop_all +#undef spd_stop_uid +#undef spd_cancel +#undef spd_cancel_all +#undef spd_cancel_uid +#undef spd_pause +#undef spd_pause_all +#undef spd_pause_uid +#undef spd_resume +#undef spd_resume_all +#undef spd_resume_uid +#undef spd_key +#undef spd_char +#undef spd_wchar +#undef spd_sound_icon +#undef spd_set_voice_type +#undef spd_set_voice_type_all +#undef spd_set_voice_type_uid +#undef spd_get_voice_type +#undef spd_set_synthesis_voice +#undef spd_set_synthesis_voice_all +#undef spd_set_synthesis_voice_uid +#undef spd_set_data_mode +#undef spd_set_notification_on +#undef spd_set_notification_off +#undef spd_set_notification +#undef spd_set_voice_rate +#undef spd_set_voice_rate_all +#undef spd_set_voice_rate_uid +#undef spd_get_voice_rate +#undef spd_set_voice_pitch +#undef spd_set_voice_pitch_all +#undef spd_set_voice_pitch_uid +#undef spd_get_voice_pitch +#undef spd_set_voice_pitch_range +#undef spd_set_voice_pitch_range_all +#undef spd_set_voice_pitch_range_uid +#undef spd_set_volume +#undef spd_set_volume_all +#undef spd_set_volume_uid +#undef spd_get_volume +#undef spd_set_punctuation +#undef spd_set_punctuation_all +#undef spd_set_punctuation_uid +#undef spd_set_capital_letters +#undef spd_set_capital_letters_all +#undef spd_set_capital_letters_uid +#undef spd_set_spelling +#undef spd_set_spelling_all +#undef spd_set_spelling_uid +#undef spd_set_language +#undef spd_set_language_all +#undef spd_set_language_uid +#undef spd_get_language +#undef spd_set_output_module +#undef spd_set_output_module_all +#undef spd_set_output_module_uid +#undef spd_get_message_list_fd +#undef spd_list_modules +#undef free_spd_modules +#undef spd_get_output_module +#undef spd_list_voices +#undef spd_list_synthesis_voices +#undef free_spd_voices +#undef spd_execute_command_with_list_reply +#undef spd_execute_command +#undef spd_execute_command_with_reply +#undef spd_execute_command_wo_mutex +#undef spd_send_data +#undef spd_send_data_wo_mutex +#ifdef __cplusplus +extern "C" { +#endif +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_wrapper_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_wrapper_speechd +#define spd_open spd_open_dylibloader_wrapper_speechd +#define spd_open2 spd_open2_dylibloader_wrapper_speechd +#define spd_close spd_close_dylibloader_wrapper_speechd +#define spd_say spd_say_dylibloader_wrapper_speechd +#define spd_sayf spd_sayf_dylibloader_wrapper_speechd +#define spd_stop spd_stop_dylibloader_wrapper_speechd +#define spd_stop_all spd_stop_all_dylibloader_wrapper_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_wrapper_speechd +#define spd_cancel spd_cancel_dylibloader_wrapper_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_wrapper_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_wrapper_speechd +#define spd_pause spd_pause_dylibloader_wrapper_speechd +#define spd_pause_all spd_pause_all_dylibloader_wrapper_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_wrapper_speechd +#define spd_resume spd_resume_dylibloader_wrapper_speechd +#define spd_resume_all spd_resume_all_dylibloader_wrapper_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_wrapper_speechd +#define spd_key spd_key_dylibloader_wrapper_speechd +#define spd_char spd_char_dylibloader_wrapper_speechd +#define spd_wchar spd_wchar_dylibloader_wrapper_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_wrapper_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_wrapper_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_wrapper_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_wrapper_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_wrapper_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_wrapper_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_wrapper_speechd +#define spd_set_notification spd_set_notification_dylibloader_wrapper_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_wrapper_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_wrapper_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_wrapper_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_wrapper_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_wrapper_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd +#define spd_set_volume spd_set_volume_dylibloader_wrapper_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_wrapper_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_wrapper_speechd +#define spd_get_volume spd_get_volume_dylibloader_wrapper_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_wrapper_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_wrapper_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_wrapper_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_wrapper_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_wrapper_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_wrapper_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_wrapper_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_wrapper_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_wrapper_speechd +#define spd_set_language spd_set_language_dylibloader_wrapper_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_wrapper_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_wrapper_speechd +#define spd_get_language spd_get_language_dylibloader_wrapper_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_wrapper_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_wrapper_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_wrapper_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_wrapper_speechd +#define spd_list_modules spd_list_modules_dylibloader_wrapper_speechd +#define free_spd_modules free_spd_modules_dylibloader_wrapper_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_wrapper_speechd +#define spd_list_voices spd_list_voices_dylibloader_wrapper_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_wrapper_speechd +#define free_spd_voices free_spd_voices_dylibloader_wrapper_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_wrapper_speechd +#define spd_execute_command spd_execute_command_dylibloader_wrapper_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_wrapper_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_wrapper_speechd +#define spd_send_data spd_send_data_dylibloader_wrapper_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_wrapper_speechd +extern void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*); +extern SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**); +extern SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode); +extern SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**); +extern void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...); +extern int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t); +extern int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +extern int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +extern int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int); +extern SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode); +extern int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +extern int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +extern int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*); +extern int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +extern int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +extern int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int); +extern int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +extern int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +extern int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int); +extern int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +extern int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +extern int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int); +extern int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**); +extern char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*); +extern void (*free_spd_modules_dylibloader_wrapper_speechd)( char**); +extern char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*); +extern char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*); +extern SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*); +extern void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**); +extern char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**); +extern int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +extern char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +int initialize_speechd(int verbose); +#ifdef __cplusplus +} +#endif +#endif diff --git a/platform/linuxbsd/tts_linux.cpp b/platform/linuxbsd/tts_linux.cpp new file mode 100644 index 0000000000..aea1183d3d --- /dev/null +++ b/platform/linuxbsd/tts_linux.cpp @@ -0,0 +1,261 @@ +/*************************************************************************/ +/* tts_linux.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_linux.h" + +#include "core/config/project_settings.h" +#include "servers/text_server.h" + +TTS_Linux *TTS_Linux::singleton = nullptr; + +void TTS_Linux::speech_init_thread_func(void *p_userdata) { + TTS_Linux *tts = (TTS_Linux *)p_userdata; + if (tts) { + MutexLock thread_safe_method(tts->_thread_safe_); +#ifdef DEBUG_ENABLED + int dylibloader_verbose = 1; +#else + int dylibloader_verbose = 0; +#endif + if (initialize_speechd(dylibloader_verbose) == 0) { + CharString class_str; + String config_name = GLOBAL_GET("application/config/name"); + if (config_name.length() == 0) { + class_str = "Godot_Engine"; + } else { + class_str = config_name.utf8(); + } + tts->synth = spd_open(class_str, "Godot_Engine_Speech_API", "Godot_Engine", SPD_MODE_THREADED); + if (tts->synth) { + tts->synth->callback_end = &speech_event_callback; + tts->synth->callback_cancel = &speech_event_callback; + tts->synth->callback_im = &speech_event_index_mark; + spd_set_notification_on(tts->synth, SPD_END); + spd_set_notification_on(tts->synth, SPD_CANCEL); + + print_verbose("Text-to-Speech: Speech Dispatcher initialized."); + } else { + print_verbose("Text-to-Speech: Cannot initialize Speech Dispatcher synthesizer!"); + } + } else { + print_verbose("Text-to-Speech: Cannot load Speech Dispatcher library!"); + } + } +} + +void TTS_Linux::speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark) { + TTS_Linux *tts = TTS_Linux::get_singleton(); + if (tts && tts->ids.has(p_msg_id)) { + MutexLock thread_safe_method(tts->_thread_safe_); + // Get word offset from the index mark injected to the text stream. + String mark = String::utf8(p_index_mark); + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[p_msg_id], mark.to_int()); + } +} + +void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type) { + TTS_Linux *tts = TTS_Linux::get_singleton(); + if (tts) { + MutexLock thread_safe_method(tts->_thread_safe_); + List<DisplayServer::TTSUtterance> &queue = tts->queue; + if (!tts->paused && tts->ids.has(p_msg_id)) { + if (p_type == SPD_EVENT_END) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[p_msg_id]); + tts->ids.erase(p_msg_id); + tts->last_msg_id = -1; + tts->speaking = false; + } else if (p_type == SPD_EVENT_CANCEL) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, tts->ids[p_msg_id]); + tts->ids.erase(p_msg_id); + tts->last_msg_id = -1; + tts->speaking = false; + } + } + if (!tts->speaking && queue.size() > 0) { + DisplayServer::TTSUtterance &message = queue.front()->get(); + + // Inject index mark after each word. + String text; + String language; + SPDVoice **voices = spd_list_synthesis_voices(tts->synth); + if (voices != nullptr) { + SPDVoice **voices_ptr = voices; + while (*voices_ptr != nullptr) { + if (String::utf8((*voices_ptr)->name) == message.voice) { + language = String::utf8((*voices_ptr)->language); + break; + } + voices_ptr++; + } + free_spd_voices(voices); + } + PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language); + int prev = 0; + for (int i = 0; i < breaks.size(); i++) { + text += message.text.substr(prev, breaks[i] - prev); + text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>"; + prev = breaks[i]; + } + text += message.text.substr(prev, -1); + + spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data()); + spd_set_volume(tts->synth, message.volume * 2 - 100); + spd_set_voice_pitch(tts->synth, (message.pitch - 1) * 100); + float rate = 0; + if (message.rate > 1.f) { + rate = log10(MIN(message.rate, 2.5f)) / log10(2.5f) * 100; + } else if (message.rate < 1.f) { + rate = log10(MAX(message.rate, 0.5f)) / log10(0.5f) * -100; + } + spd_set_voice_rate(tts->synth, rate); + spd_set_data_mode(tts->synth, SPD_DATA_SSML); + tts->last_msg_id = spd_say(tts->synth, SPD_TEXT, text.utf8().get_data()); + tts->ids[tts->last_msg_id] = message.id; + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); + + queue.pop_front(); + tts->speaking = true; + } + } +} + +bool TTS_Linux::is_speaking() const { + return speaking; +} + +bool TTS_Linux::is_paused() const { + return paused; +} + +Array TTS_Linux::get_voices() const { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V(!synth, Array()); + Array list; + SPDVoice **voices = spd_list_synthesis_voices(synth); + if (voices != nullptr) { + SPDVoice **voices_ptr = voices; + while (*voices_ptr != nullptr) { + Dictionary voice_d; + voice_d["name"] = String::utf8((*voices_ptr)->name); + voice_d["id"] = String::utf8((*voices_ptr)->name); + voice_d["language"] = String::utf8((*voices_ptr)->language) + "_" + String::utf8((*voices_ptr)->variant); + list.push_back(voice_d); + + voices_ptr++; + } + free_spd_voices(voices); + } + return list; +} + +void TTS_Linux::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND(!synth); + if (p_interrupt) { + stop(); + } + + if (p_text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + DisplayServer::TTSUtterance message; + message.text = p_text; + message.voice = p_voice; + message.volume = CLAMP(p_volume, 0, 100); + message.pitch = CLAMP(p_pitch, 0.f, 2.f); + message.rate = CLAMP(p_rate, 0.1f, 10.f); + message.id = p_utterance_id; + queue.push_back(message); + + if (is_paused()) { + resume(); + } else { + speech_event_callback(0, 0, SPD_EVENT_BEGIN); + } +} + +void TTS_Linux::pause() { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND(!synth); + if (spd_pause(synth) == 0) { + paused = true; + } +} + +void TTS_Linux::resume() { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND(!synth); + spd_resume(synth); + paused = false; +} + +void TTS_Linux::stop() { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND(!synth); + for (DisplayServer::TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); + } + if ((last_msg_id != -1) && ids.has(last_msg_id)) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[last_msg_id]); + } + queue.clear(); + ids.clear(); + last_msg_id = -1; + spd_cancel(synth); + spd_resume(synth); + speaking = false; + paused = false; +} + +TTS_Linux *TTS_Linux::get_singleton() { + return singleton; +} + +TTS_Linux::TTS_Linux() { + singleton = this; + // Speech Dispatcher init can be slow, it might wait for helper process to start on background, so run it in the thread. + init_thread.start(speech_init_thread_func, this); +} + +TTS_Linux::~TTS_Linux() { + init_thread.wait_to_finish(); + if (synth) { + spd_close(synth); + } + + singleton = nullptr; +} diff --git a/platform/linuxbsd/tts_linux.h b/platform/linuxbsd/tts_linux.h new file mode 100644 index 0000000000..4d39af8970 --- /dev/null +++ b/platform/linuxbsd/tts_linux.h @@ -0,0 +1,78 @@ +/*************************************************************************/ +/* tts_linux.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_LINUX_H +#define TTS_LINUX_H + +#include "core/os/thread.h" +#include "core/os/thread_safe.h" +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include "speechd-so_wrap.h" + +class TTS_Linux { + _THREAD_SAFE_CLASS_ + + List<DisplayServer::TTSUtterance> queue; + SPDConnection *synth = nullptr; + bool speaking = false; + bool paused = false; + int last_msg_id = -1; + Map<int, int> ids; + + Thread init_thread; + + static void speech_init_thread_func(void *p_userdata); + static void speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type); + static void speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark); + + static TTS_Linux *singleton; + +public: + static TTS_Linux *get_singleton(); + + bool is_speaking() const; + bool is_paused() const; + Array get_voices() const; + + void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); + void pause(); + void resume(); + void stop(); + + TTS_Linux(); + ~TTS_Linux(); +}; + +#endif // TTS_LINUX_H diff --git a/platform/osx/SCsub b/platform/osx/SCsub index d72a75af04..3a4c95613d 100644 --- a/platform/osx/SCsub +++ b/platform/osx/SCsub @@ -18,6 +18,7 @@ files = [ "key_mapping_osx.mm", "godot_main_osx.mm", "dir_access_osx.mm", + "tts_osx.mm", "joypad_osx.cpp", "vulkan_context_osx.mm", "gl_manager_osx_legacy.mm", diff --git a/platform/osx/display_server_osx.h b/platform/osx/display_server_osx.h index fa3091ff81..fcb3a62bec 100644 --- a/platform/osx/display_server_osx.h +++ b/platform/osx/display_server_osx.h @@ -137,6 +137,8 @@ private: Vector<KeyEvent> key_event_buffer; int key_event_pos = 0; + id tts = nullptr; + Point2i im_selection; String im_text; @@ -264,6 +266,15 @@ public: virtual void global_menu_remove_item(const String &p_menu_root, int p_idx) override; virtual void global_menu_clear(const String &p_menu_root) override; + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + virtual Error dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) override; virtual Error dialog_input_text(String p_title, String p_description, String p_partial, const Callable &p_callback) override; diff --git a/platform/osx/display_server_osx.mm b/platform/osx/display_server_osx.mm index d209c90d87..17a44a3fbd 100644 --- a/platform/osx/display_server_osx.mm +++ b/platform/osx/display_server_osx.mm @@ -37,6 +37,8 @@ #include "key_mapping_osx.h" #include "os_osx.h" +#include "tts_osx.h" + #include "core/io/marshalls.h" #include "core/math/geometry_2d.h" #include "core/os/keyboard.h" @@ -702,6 +704,7 @@ bool DisplayServerOSX::has_feature(Feature p_feature) const { case FEATURE_NATIVE_ICON: //case FEATURE_KEEP_SCREEN_ON: case FEATURE_SWAP_BUFFERS: + case FEATURE_TEXT_TO_SPEECH: return true; default: { } @@ -1458,6 +1461,41 @@ void DisplayServerOSX::global_menu_clear(const String &p_menu_root) { } } +bool DisplayServerOSX::tts_is_speaking() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isSpeaking]; +} + +bool DisplayServerOSX::tts_is_paused() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isPaused]; +} + +Array DisplayServerOSX::tts_get_voices() const { + ERR_FAIL_COND_V(!tts, Array()); + return [tts getVoices]; +} + +void DisplayServerOSX::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!tts); + [tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt]; +} + +void DisplayServerOSX::tts_pause() { + ERR_FAIL_COND(!tts); + [tts pauseSpeaking]; +} + +void DisplayServerOSX::tts_resume() { + ERR_FAIL_COND(!tts); + [tts resumeSpeaking]; +} + +void DisplayServerOSX::tts_stop() { + ERR_FAIL_COND(!tts); + [tts stopSpeaking]; +} + Error DisplayServerOSX::dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) { _THREAD_SAFE_METHOD_ @@ -3121,6 +3159,9 @@ DisplayServerOSX::DisplayServerOSX(const String &p_rendering_driver, WindowMode // Register to be notified on displays arrangement changes. CGDisplayRegisterReconfigurationCallback(_displays_arrangement_changed, nullptr); + // Init TTS + tts = [[TTS_OSX alloc] init]; + NSMenuItem *menu_item; NSString *title; diff --git a/platform/osx/tts_osx.h b/platform/osx/tts_osx.h new file mode 100644 index 0000000000..2cf6d21c18 --- /dev/null +++ b/platform/osx/tts_osx.h @@ -0,0 +1,66 @@ +/*************************************************************************/ +/* tts_osx.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_OSX_H +#define TTS_OSX_H + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <AVFAudio/AVSpeechSynthesis.h> +#include <AppKit/AppKit.h> + +@interface TTS_OSX : NSObject <AVSpeechSynthesizerDelegate> { + // AVSpeechSynthesizer + bool speaking; + Map<id, int> ids; + + // NSSpeechSynthesizer + bool paused; + bool have_utterance; + int last_utterance; + + id synth; // NSSpeechSynthesizer or AVSpeechSynthesizer + List<DisplayServer::TTSUtterance> queue; +} + +- (void)pauseSpeaking; +- (void)resumeSpeaking; +- (void)stopSpeaking; +- (bool)isSpeaking; +- (bool)isPaused; +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt; +- (Array)getVoices; +@end + +#endif // TTS_OSX_H diff --git a/platform/osx/tts_osx.mm b/platform/osx/tts_osx.mm new file mode 100644 index 0000000000..e6a5236cd9 --- /dev/null +++ b/platform/osx/tts_osx.mm @@ -0,0 +1,266 @@ +/*************************************************************************/ +/* tts_osx.mm */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_osx.h" + +@implementation TTS_OSX + +- (id)init { + self = [super init]; + self->speaking = false; + self->have_utterance = false; + self->last_utterance = -1; + self->paused = false; + if (@available(macOS 10.14, *)) { + self->synth = [[AVSpeechSynthesizer alloc] init]; + [self->synth setDelegate:self]; + print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized."); + } else { + self->synth = [[NSSpeechSynthesizer alloc] init]; + [self->synth setDelegate:self]; + print_verbose("Text-to-Speech: NSSpeechSynthesizer initialized."); + } + return self; +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { + NSString *string = [utterance speechString]; + + // Convert from UTF-16 to UTF-32 position. + int pos = 0; + for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { + unichar c = [string characterAtIndex:i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos); +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +// NSSpeechSynthesizer callback (macOS 10.4+) + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth willSpeakWord:(NSRange)characterRange ofString:(NSString *)string { + if (!paused && have_utterance) { + // Convert from UTF-16 to UTF-32 position. + int pos = 0; + for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { + unichar c = [string characterAtIndex:i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, last_utterance, pos); + } +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth didFinishSpeaking:(BOOL)success { + if (!paused && have_utterance) { + if (success) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, last_utterance); + } else { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance); + } + have_utterance = false; + } + speaking = false; + [self update]; +} + +- (void)update { + if (!speaking && queue.size() > 0) { + DisplayServer::TTSUtterance &message = queue.front()->get(); + + if (@available(macOS 10.14, *)) { + AVSpeechSynthesizer *av_synth = synth; + AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; + [new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]]; + if (message.rate > 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)]; + } else if (message.rate < 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)]; + } + [new_utterance setPitchMultiplier:message.pitch]; + [new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + + ids[new_utterance] = message.id; + [av_synth speakUtterance:new_utterance]; + } else { + NSSpeechSynthesizer *ns_synth = synth; + [ns_synth setObject:nil forProperty:NSSpeechResetProperty error:nil]; + [ns_synth setVoice:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]; + int base_pitch = [[ns_synth objectForProperty:NSSpeechPitchBaseProperty error:nil] intValue]; + [ns_synth setObject:[NSNumber numberWithInt:(base_pitch * (message.pitch / 2.f + 0.5f))] forProperty:NSSpeechPitchBaseProperty error:nullptr]; + [ns_synth setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + [ns_synth setRate:(message.rate * 200)]; + + last_utterance = message.id; + have_utterance = true; + [ns_synth startSpeakingString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; + } + queue.pop_front(); + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); + speaking = true; + } +} + +- (void)pauseSpeaking { + if (@available(macOS 10.14, *)) { + AVSpeechSynthesizer *av_synth = synth; + [av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate]; + } else { + NSSpeechSynthesizer *ns_synth = synth; + [ns_synth pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + } + paused = true; +} + +- (void)resumeSpeaking { + if (@available(macOS 10.14, *)) { + AVSpeechSynthesizer *av_synth = synth; + [av_synth continueSpeaking]; + } else { + NSSpeechSynthesizer *ns_synth = synth; + [ns_synth continueSpeaking]; + } + paused = false; +} + +- (void)stopSpeaking { + for (DisplayServer::TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + if (@available(macOS 10.14, *)) { + AVSpeechSynthesizer *av_synth = synth; + [av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate]; + } else { + NSSpeechSynthesizer *ns_synth = synth; + if (have_utterance) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance); + } + [ns_synth stopSpeaking]; + } + have_utterance = false; + speaking = false; + paused = false; +} + +- (bool)isSpeaking { + return speaking || (queue.size() > 0); +} + +- (bool)isPaused { + if (@available(macOS 10.14, *)) { + AVSpeechSynthesizer *av_synth = synth; + return [av_synth isPaused]; + } else { + return paused; + } +} + +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt { + if (interrupt) { + [self stopSpeaking]; + } + + if (text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id); + return; + } + + DisplayServer::TTSUtterance message; + message.text = text; + message.voice = voice; + message.volume = CLAMP(volume, 0, 100); + message.pitch = CLAMP(pitch, 0.f, 2.f); + message.rate = CLAMP(rate, 0.1f, 10.f); + message.id = utterance_id; + queue.push_back(message); + + if ([self isPaused]) { + [self resumeSpeaking]; + } else { + [self update]; + } +} + +- (Array)getVoices { + Array list; + if (@available(macOS 10.14, *)) { + for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) { + NSString *voiceIdentifierString = [voice identifier]; + NSString *voiceLocaleIdentifier = [voice language]; + NSString *voiceName = [voice name]; + Dictionary voice_d; + voice_d["name"] = String::utf8([voiceName UTF8String]); + voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]); + voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]); + list.push_back(voice_d); + } + } else { + for (NSString *voiceIdentifierString in [NSSpeechSynthesizer availableVoices]) { + NSString *voiceLocaleIdentifier = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceLocaleIdentifier]; + NSString *voiceName = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceName]; + Dictionary voice_d; + voice_d["name"] = String([voiceName UTF8String]); + voice_d["id"] = String([voiceIdentifierString UTF8String]); + voice_d["language"] = String([voiceLocaleIdentifier UTF8String]); + list.push_back(voice_d); + } + } + return list; +} + +@end diff --git a/platform/windows/SCsub b/platform/windows/SCsub index 76234c3065..7e412b140f 100644 --- a/platform/windows/SCsub +++ b/platform/windows/SCsub @@ -13,6 +13,7 @@ common_win = [ "display_server_windows.cpp", "key_mapping_windows.cpp", "joypad_windows.cpp", + "tts_windows.cpp", "windows_terminal_logger.cpp", "vulkan_context_win.cpp", "gl_manager_windows.cpp", diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 249a0d2e79..0b18fb74fb 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -252,6 +252,7 @@ def configure_msvc(env, manual_msvc_config): "kernel32", "ole32", "oleaut32", + "sapi", "user32", "gdi32", "IPHLPAPI", @@ -426,6 +427,7 @@ def configure_mingw(env): "ws2_32", "kernel32", "oleaut32", + "sapi", "dinput8", "dxguid", "ksuser", diff --git a/platform/windows/display_server_windows.cpp b/platform/windows/display_server_windows.cpp index 31bad0f053..0412eb2d9c 100644 --- a/platform/windows/display_server_windows.cpp +++ b/platform/windows/display_server_windows.cpp @@ -84,6 +84,7 @@ bool DisplayServerWindows::has_feature(Feature p_feature) const { case FEATURE_NATIVE_ICON: case FEATURE_SWAP_BUFFERS: case FEATURE_KEEP_SCREEN_ON: + case FEATURE_TEXT_TO_SPEECH: return true; default: return false; @@ -133,6 +134,41 @@ void DisplayServerWindows::_set_mouse_mode_impl(MouseMode p_mode) { } } +bool DisplayServerWindows::tts_is_speaking() const { + ERR_FAIL_COND_V(!tts, false); + return tts->is_speaking(); +} + +bool DisplayServerWindows::tts_is_paused() const { + ERR_FAIL_COND_V(!tts, false); + return tts->is_paused(); +} + +Array DisplayServerWindows::tts_get_voices() const { + ERR_FAIL_COND_V(!tts, Array()); + return tts->get_voices(); +} + +void DisplayServerWindows::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!tts); + tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerWindows::tts_pause() { + ERR_FAIL_COND(!tts); + tts->pause(); +} + +void DisplayServerWindows::tts_resume() { + ERR_FAIL_COND(!tts); + tts->resume(); +} + +void DisplayServerWindows::tts_stop() { + ERR_FAIL_COND(!tts); + tts->stop(); +} + void DisplayServerWindows::mouse_set_mode(MouseMode p_mode) { _THREAD_SAFE_METHOD_ @@ -3497,6 +3533,9 @@ DisplayServerWindows::DisplayServerWindows(const String &p_rendering_driver, Win rendering_driver = p_rendering_driver; + // Init TTS + tts = memnew(TTS_Windows); + // Note: Wacom WinTab driver API for pen input, for devices incompatible with Windows Ink. HMODULE wintab_lib = LoadLibraryW(L"wintab32.dll"); if (wintab_lib) { @@ -3739,4 +3778,8 @@ DisplayServerWindows::~DisplayServerWindows() { gl_manager = nullptr; } #endif + if (tts) { + memdelete(tts); + } + CoUninitialize(); } diff --git a/platform/windows/display_server_windows.h b/platform/windows/display_server_windows.h index fcf4b5a728..80faf71bd4 100644 --- a/platform/windows/display_server_windows.h +++ b/platform/windows/display_server_windows.h @@ -46,6 +46,7 @@ #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/renderer_rd/renderer_compositor_rd.h" #include "servers/rendering_server.h" +#include "tts_windows.h" #ifdef XAUDIO2_ENABLED #include "drivers/xaudio2/audio_driver_xaudio2.h" @@ -320,6 +321,8 @@ class DisplayServerWindows : public DisplayServer { String rendering_driver; bool app_focused = false; + TTS_Windows *tts = nullptr; + struct WindowData { HWND hWnd; //layered window @@ -454,6 +457,15 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + virtual void mouse_set_mode(MouseMode p_mode) override; virtual MouseMode mouse_get_mode() const override; diff --git a/platform/windows/tts_windows.cpp b/platform/windows/tts_windows.cpp new file mode 100644 index 0000000000..05249934ba --- /dev/null +++ b/platform/windows/tts_windows.cpp @@ -0,0 +1,269 @@ +/*************************************************************************/ +/* tts_windows.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_windows.h" + +TTS_Windows *TTS_Windows::singleton = nullptr; + +void __stdcall TTS_Windows::speech_event_callback(WPARAM wParam, LPARAM lParam) { + TTS_Windows *tts = TTS_Windows::get_singleton(); + SPEVENT event; + while (tts->synth->GetEvents(1, &event, NULL) == S_OK) { + if (tts->ids.has(event.ulStreamNum)) { + if (event.eEventId == SPEI_START_INPUT_STREAM) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, tts->ids[event.ulStreamNum].id); + } else if (event.eEventId == SPEI_END_INPUT_STREAM) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[event.ulStreamNum].id); + tts->ids.erase(event.ulStreamNum); + tts->_update_tts(); + } else if (event.eEventId == SPEI_WORD_BOUNDARY) { + const Char16String &string = tts->ids[event.ulStreamNum].string; + int pos = 0; + for (int i = 0; i < MIN(event.lParam, string.length()); i++) { + char16_t c = string[i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[event.ulStreamNum].id, pos - tts->ids[event.ulStreamNum].offset); + } + } + } +} + +void TTS_Windows::_update_tts() { + if (!is_speaking() && !paused && queue.size() > 0) { + DisplayServer::TTSUtterance &message = queue.front()->get(); + + String text; + DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML; + String pitch_tag = String("<pitch absmiddle=\"") + String::num_int64(message.pitch * 10 - 10, 10) + String("\">"); + text = pitch_tag + message.text + String("</pitch>"); + + IEnumSpObjectTokens *cpEnum; + ISpObjectToken *cpVoiceToken; + ULONG ulCount = 0; + ULONG stream_number = 0; + ISpObjectTokenCategory *cpCategory; + HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); + if (SUCCEEDED(hr)) { + hr = cpCategory->SetId(SPCAT_VOICES, false); + if (SUCCEEDED(hr)) { + hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); + if (SUCCEEDED(hr)) { + hr = cpEnum->GetCount(&ulCount); + while (SUCCEEDED(hr) && ulCount--) { + wchar_t *w_id = 0L; + hr = cpEnum->Next(1, &cpVoiceToken, nullptr); + cpVoiceToken->GetId(&w_id); + if (String::utf16((const char16_t *)w_id) == message.voice) { + synth->SetVoice(cpVoiceToken); + cpVoiceToken->Release(); + break; + } + cpVoiceToken->Release(); + } + cpEnum->Release(); + } + } + cpCategory->Release(); + } + + UTData ut; + ut.string = text.utf16(); + ut.offset = pitch_tag.length(); // Substract injected <pitch> tag offset. + ut.id = message.id; + + synth->SetVolume(message.volume); + synth->SetRate(10.f * log10(message.rate) / log10(3.f)); + synth->Speak((LPCWSTR)ut.string.get_data(), flags, &stream_number); + + ids[stream_number] = ut; + + queue.pop_front(); + } +} + +bool TTS_Windows::is_speaking() const { + ERR_FAIL_COND_V(!synth, false); + + SPVOICESTATUS status; + synth->GetStatus(&status, nullptr); + return (status.dwRunningState == SPRS_IS_SPEAKING); +} + +bool TTS_Windows::is_paused() const { + ERR_FAIL_COND_V(!synth, false); + return paused; +} + +Array TTS_Windows::get_voices() const { + Array list; + IEnumSpObjectTokens *cpEnum; + ISpObjectToken *cpVoiceToken; + ISpDataKey *cpDataKeyAttribs; + ULONG ulCount = 0; + ISpObjectTokenCategory *cpCategory; + HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); + if (SUCCEEDED(hr)) { + hr = cpCategory->SetId(SPCAT_VOICES, false); + if (SUCCEEDED(hr)) { + hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); + if (SUCCEEDED(hr)) { + hr = cpEnum->GetCount(&ulCount); + while (SUCCEEDED(hr) && ulCount--) { + hr = cpEnum->Next(1, &cpVoiceToken, nullptr); + HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs); + if (SUCCEEDED(hr_attr)) { + wchar_t *w_id = nullptr; + wchar_t *w_lang = nullptr; + wchar_t *w_name = nullptr; + cpVoiceToken->GetId(&w_id); + cpDataKeyAttribs->GetStringValue(L"Language", &w_lang); + cpDataKeyAttribs->GetStringValue(nullptr, &w_name); + LCID locale = wcstol(w_lang, nullptr, 16); + + int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0); + int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0); + wchar_t *w_lang_code = new wchar_t[locale_chars]; + wchar_t *w_reg_code = new wchar_t[region_chars]; + GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars); + GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars); + + Dictionary voice_d; + voice_d["id"] = String::utf16((const char16_t *)w_id); + if (w_name) { + voice_d["name"] = String::utf16((const char16_t *)w_name); + } else { + voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", ""); + } + voice_d["language"] = String::utf16((const char16_t *)w_lang_code) + "_" + String::utf16((const char16_t *)w_reg_code); + list.push_back(voice_d); + + delete[] w_lang_code; + delete[] w_reg_code; + + cpDataKeyAttribs->Release(); + } + cpVoiceToken->Release(); + } + cpEnum->Release(); + } + } + cpCategory->Release(); + } + return list; +} + +void TTS_Windows::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!synth); + if (p_interrupt) { + stop(); + } + + if (p_text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); + return; + } + + DisplayServer::TTSUtterance message; + message.text = p_text; + message.voice = p_voice; + message.volume = CLAMP(p_volume, 0, 100); + message.pitch = CLAMP(p_pitch, 0.f, 2.f); + message.rate = CLAMP(p_rate, 0.1f, 10.f); + message.id = p_utterance_id; + queue.push_back(message); + + if (is_paused()) { + resume(); + } else { + _update_tts(); + } +} + +void TTS_Windows::pause() { + ERR_FAIL_COND(!synth); + if (!paused) { + if (synth->Pause() == S_OK) { + paused = true; + } + } +} + +void TTS_Windows::resume() { + ERR_FAIL_COND(!synth); + synth->Resume(); + paused = false; +} + +void TTS_Windows::stop() { + ERR_FAIL_COND(!synth); + + SPVOICESTATUS status; + synth->GetStatus(&status, nullptr); + if (ids.has(status.ulCurrentStream)) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[status.ulCurrentStream].id); + ids.erase(status.ulCurrentStream); + } + for (DisplayServer::TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr); + synth->Resume(); + paused = false; +} + +TTS_Windows *TTS_Windows::get_singleton() { + return singleton; +} + +TTS_Windows::TTS_Windows() { + singleton = this; + CoInitialize(nullptr); + + if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) { + ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY); + synth->SetInterest(event_mask, event_mask); + synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0); + print_verbose("Text-to-Speech: SAPI initialized."); + } else { + print_verbose("Text-to-Speech: Cannot initialize ISpVoice!"); + } +} + +TTS_Windows::~TTS_Windows() { + if (synth) { + synth->Release(); + } + singleton = nullptr; +} diff --git a/platform/windows/tts_windows.h b/platform/windows/tts_windows.h new file mode 100644 index 0000000000..5da404baf9 --- /dev/null +++ b/platform/windows/tts_windows.h @@ -0,0 +1,80 @@ +/*************************************************************************/ +/* tts_windows.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_WINDOWS_H +#define TTS_WINDOWS_H + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <objbase.h> +#include <sapi.h> +#include <wchar.h> +#include <winnls.h> + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +class TTS_Windows { + List<DisplayServer::TTSUtterance> queue; + ISpVoice *synth = nullptr; + bool paused = false; + struct UTData { + Char16String string; + int offset; + int id; + }; + Map<ULONG, UTData> ids; + + static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam); + void _update_tts(); + + static TTS_Windows *singleton; + +public: + static TTS_Windows *get_singleton(); + + bool is_speaking() const; + bool is_paused() const; + Array get_voices() const; + + void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); + void pause(); + void resume(); + void stop(); + + TTS_Windows(); + ~TTS_Windows(); +}; + +#endif // TTS_WINDOWS_H diff --git a/servers/display_server.cpp b/servers/display_server.cpp index 67bfc75426..8d97cd2543 100644 --- a/servers/display_server.cpp +++ b/servers/display_server.cpp @@ -220,6 +220,81 @@ void DisplayServer::global_menu_clear(const String &p_menu_root) { WARN_PRINT("Global menus not supported by this display server."); } +bool DisplayServer::tts_is_speaking() const { + WARN_PRINT("TTS is not supported by this display server."); + return false; +} + +bool DisplayServer::tts_is_paused() const { + WARN_PRINT("TTS is not supported by this display server."); + return false; +} + +void DisplayServer::tts_pause() { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_resume() { + WARN_PRINT("TTS is not supported by this display server."); +} + +Array DisplayServer::tts_get_voices() const { + WARN_PRINT("TTS is not supported by this display server."); + return Array(); +} + +PackedStringArray DisplayServer::tts_get_voices_for_language(const String &p_language) const { + PackedStringArray ret; + Array voices = tts_get_voices(); + for (int i = 0; i < voices.size(); i++) { + const Dictionary &voice = voices[i]; + if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) { + ret.push_back(voice["id"]); + } + } + return ret; +} + +void DisplayServer::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_stop() { + WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable) { + ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); + utterance_callback[p_event] = p_callable; +} + +void DisplayServer::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) { + ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); + switch (p_event) { + case DisplayServer::TTS_UTTERANCE_STARTED: + case DisplayServer::TTS_UTTERANCE_ENDED: + case DisplayServer::TTS_UTTERANCE_CANCELED: { + if (utterance_callback[p_event].is_valid()) { + Variant args[1]; + args[0] = p_id; + const Variant *argp[] = { &args[0] }; + utterance_callback[p_event].call_deferred(argp, 1); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. + } + } break; + case DisplayServer::TTS_UTTERANCE_BOUNDARY: { + if (utterance_callback[p_event].is_valid()) { + Variant args[2]; + args[0] = p_pos; + args[1] = p_id; + const Variant *argp[] = { &args[0], &args[1] }; + utterance_callback[p_event].call_deferred(argp, 2); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. + } + } break; + default: + break; + } +} + void DisplayServer::mouse_set_mode(MouseMode p_mode) { WARN_PRINT("Mouse is not supported by this display server."); } @@ -478,6 +553,19 @@ void DisplayServer::_bind_methods() { ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu_root", "idx"), &DisplayServer::global_menu_remove_item); ClassDB::bind_method(D_METHOD("global_menu_clear", "menu_root"), &DisplayServer::global_menu_clear); + ClassDB::bind_method(D_METHOD("tts_is_speaking"), &DisplayServer::tts_is_speaking); + ClassDB::bind_method(D_METHOD("tts_is_paused"), &DisplayServer::tts_is_paused); + ClassDB::bind_method(D_METHOD("tts_get_voices"), &DisplayServer::tts_get_voices); + ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &DisplayServer::tts_get_voices_for_language); + + ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &DisplayServer::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false)); + ClassDB::bind_method(D_METHOD("tts_pause"), &DisplayServer::tts_pause); + ClassDB::bind_method(D_METHOD("tts_resume"), &DisplayServer::tts_resume); + ClassDB::bind_method(D_METHOD("tts_stop"), &DisplayServer::tts_stop); + + ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "callable"), &DisplayServer::tts_set_utterance_callback); + ClassDB::bind_method(D_METHOD("_tts_post_utterance_event", "event", "id", "char_pos"), &DisplayServer::tts_post_utterance_event); + ClassDB::bind_method(D_METHOD("mouse_set_mode", "mouse_mode"), &DisplayServer::mouse_set_mode); ClassDB::bind_method(D_METHOD("mouse_get_mode"), &DisplayServer::mouse_get_mode); @@ -621,6 +709,7 @@ void DisplayServer::_bind_methods() { BIND_ENUM_CONSTANT(FEATURE_ORIENTATION); BIND_ENUM_CONSTANT(FEATURE_SWAP_BUFFERS); BIND_ENUM_CONSTANT(FEATURE_CLIPBOARD_PRIMARY); + BIND_ENUM_CONSTANT(FEATURE_TEXT_TO_SPEECH); BIND_ENUM_CONSTANT(MOUSE_MODE_VISIBLE); BIND_ENUM_CONSTANT(MOUSE_MODE_HIDDEN); @@ -689,6 +778,11 @@ void DisplayServer::_bind_methods() { BIND_ENUM_CONSTANT(DISPLAY_HANDLE); BIND_ENUM_CONSTANT(WINDOW_HANDLE); BIND_ENUM_CONSTANT(WINDOW_VIEW); + + BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED); + BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY); } void DisplayServer::register_create_function(const char *p_name, CreateFunction p_function, GetRenderingDriversFunction p_get_drivers) { diff --git a/servers/display_server.h b/servers/display_server.h index 4961b07ba3..19efcbd3dd 100644 --- a/servers/display_server.h +++ b/servers/display_server.h @@ -121,6 +121,7 @@ public: FEATURE_SWAP_BUFFERS, FEATURE_KEEP_SCREEN_ON, FEATURE_CLIPBOARD_PRIMARY, + FEATURE_TEXT_TO_SPEECH, }; virtual bool has_feature(Feature p_feature) const = 0; @@ -172,6 +173,40 @@ public: virtual void global_menu_remove_item(const String &p_menu_root, int p_idx); virtual void global_menu_clear(const String &p_menu_root); + struct TTSUtterance { + String text; + String voice; + int volume = 50; + float pitch = 1.f; + float rate = 1.f; + int id = 0; + }; + + enum TTSUtteranceEvent { + TTS_UTTERANCE_STARTED, + TTS_UTTERANCE_ENDED, + TTS_UTTERANCE_CANCELED, + TTS_UTTERANCE_BOUNDARY, + TTS_UTTERANCE_MAX, + }; + +private: + Callable utterance_callback[TTS_UTTERANCE_MAX]; + +public: + virtual bool tts_is_speaking() const; + virtual bool tts_is_paused() const; + virtual Array tts_get_voices() const; + virtual PackedStringArray tts_get_voices_for_language(const String &p_language) const; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); + virtual void tts_pause(); + virtual void tts_resume(); + virtual void tts_stop(); + + virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable); + virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0); + enum MouseMode { MOUSE_MODE_VISIBLE, MOUSE_MODE_HIDDEN, @@ -431,5 +466,6 @@ VARIANT_ENUM_CAST(DisplayServer::WindowFlags) VARIANT_ENUM_CAST(DisplayServer::HandleType) VARIANT_ENUM_CAST(DisplayServer::CursorShape) VARIANT_ENUM_CAST(DisplayServer::VSyncMode) +VARIANT_ENUM_CAST(DisplayServer::TTSUtteranceEvent) #endif // DISPLAY_SERVER_H diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp index 001706bb6f..005cb68302 100644 --- a/servers/text/text_server_extension.cpp +++ b/servers/text/text_server_extension.cpp @@ -293,6 +293,8 @@ void TextServerExtension::_bind_methods() { GDVIRTUAL_BIND(strip_diacritics, "string"); + GDVIRTUAL_BIND(string_get_word_breaks, "string", "language"); + GDVIRTUAL_BIND(string_to_upper, "string", "language"); GDVIRTUAL_BIND(string_to_lower, "string", "language"); @@ -1503,6 +1505,14 @@ Array TextServerExtension::parse_structured_text(StructuredTextParser p_parser_t return Array(); } +PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const { + PackedInt32Array ret; + if (GDVIRTUAL_CALL(string_get_word_breaks, p_string, p_language, ret)) { + return ret; + } + return PackedInt32Array(); +} + TextServerExtension::TextServerExtension() { //NOP } diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h index ce781097f3..7b7fc61ed7 100644 --- a/servers/text/text_server_extension.h +++ b/servers/text/text_server_extension.h @@ -485,6 +485,9 @@ public: virtual String strip_diacritics(const String &p_string) const override; GDVIRTUAL1RC(String, strip_diacritics, const String &); + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + GDVIRTUAL2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; virtual String string_to_lower(const String &p_string, const String &p_language = "") const override; GDVIRTUAL2RC(String, string_to_upper, const String &, const String &); diff --git a/servers/text_server.cpp b/servers/text_server.cpp index d66e769e3c..7d9945f5d7 100644 --- a/servers/text_server.cpp +++ b/servers/text_server.cpp @@ -439,6 +439,8 @@ void TextServer::_bind_methods() { ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL("")); ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL("")); + ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL("")); + ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics); ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL("")); diff --git a/servers/text_server.h b/servers/text_server.h index 7e7f26b32d..b08aa26917 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -431,6 +431,9 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const = 0; virtual String percent_sign(const String &p_language = "") const = 0; + // String functions. + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0; + virtual String strip_diacritics(const String &p_string) const; // Other string operations. diff --git a/tests/servers/test_text_server.h b/tests/servers/test_text_server.h index d7de94516f..066c280fd5 100644 --- a/tests/servers/test_text_server.h +++ b/tests/servers/test_text_server.h @@ -514,6 +514,49 @@ TEST_SUITE("[[TextServer]") { CHECK(ts->strip_diacritics(U"ṽṿ ẁẃẅẇẉ ẋẍ ẏ ẑẓẕ ẖ ẗẘẙẛ") == U"vv wwwww xx y zzz h twys"); } } + + SUBCASE("[TextServer] Word break") { + for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) { + Ref<TextServer> ts = TextServerManager::get_singleton()->get_interface(i); + + if (!ts->has_feature(TextServer::FEATURE_SIMPLE_LAYOUT)) { + continue; + } + + TEST_FAIL_COND(ts.is_null(), "Invalid TS interface."); + { + String text1 = U"linguistically similar and effectively form"; + // 14^ 22^ 26^ 38^ + PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en"); + CHECK(breaks.size() == 4); + if (breaks.size() == 4) { + CHECK(breaks[0] == 14); + CHECK(breaks[1] == 22); + CHECK(breaks[2] == 26); + CHECK(breaks[3] == 38); + } + } + + if (ts->has_feature(TextServer::FEATURE_BREAK_ITERATORS)) { + String text2 = U"เป็นภาษาราชการและภาษาประจำชาติของประเทศไทย"; + // เป็น ภาษา ราชการ และ ภาษา ประจำ ชาติ ของ ประเทศไทย + // 3^ 7^ 13^ 16^ 20^ 25^ 29^ 32^ + + PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th"); + CHECK(breaks.size() == 8); + if (breaks.size() == 8) { + CHECK(breaks[0] == 3); + CHECK(breaks[1] == 7); + CHECK(breaks[2] == 13); + CHECK(breaks[3] == 16); + CHECK(breaks[4] == 20); + CHECK(breaks[5] == 25); + CHECK(breaks[6] == 29); + CHECK(breaks[7] == 32); + } + } + } + } } } }; // namespace TestTextServer |