diff options
54 files changed, 3962 insertions, 2 deletions
diff --git a/.github/workflows/linux_builds.yml b/.github/workflows/linux_builds.yml index 7ac364c45f..338278f461 100644 --- a/.github/workflows/linux_builds.yml +++ b/.github/workflows/linux_builds.yml @@ -87,7 +87,7 @@ jobs:            sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \                libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \                libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \ -              llvm +              llvm libspeechd-dev speech-dispatcher        - name: Setup Godot build cache          uses: ./.github/actions/godot-cache diff --git a/doc/classes/DisplayServer.xml b/doc/classes/DisplayServer.xml index 0d99c600d5..ede3a1e199 100644 --- a/doc/classes/DisplayServer.xml +++ b/doc/classes/DisplayServer.xml @@ -814,6 +814,93 @@  				[b]Note:[/b] This method is implemented on Windows.  			</description>  		</method> +		<method name="tts_get_voices" qualifiers="const"> +			<return type="Array" /> +			<description> +				Returns an [Array] of voice information dictionaries. +				Each [Dictionary] contains two [String] entries: +				- [code]name[/code] is voice name. +				- [code]id[/code] is voice identifier. +				- [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_get_voices_for_language" qualifiers="const"> +			<return type="PackedStringArray" /> +			<argument index="0" name="language" type="String" /> +			<description> +				Returns an [PackedStringArray] of voice identifiers for the [code]language[/code]. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_is_paused" qualifiers="const"> +			<return type="bool" /> +			<description> +				Returns [code]true[/code] if the synthesizer is in a paused state. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_is_speaking" qualifiers="const"> +			<return type="bool" /> +			<description> +				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_pause"> +			<return type="void" /> +			<description> +				Puts the synthesizer into a paused state. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_resume"> +			<return type="void" /> +			<description> +				Resumes the synthesizer if it was paused. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_set_utterance_callback"> +			<return type="void" /> +			<argument index="0" name="event" type="int" enum="DisplayServer.TTSUtteranceEvent" /> +			<argument index="1" name="callable" type="Callable" /> +			<description> +				Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary. +				- [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id. +				- [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id. +				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_speak"> +			<return type="void" /> +			<argument index="0" name="text" type="String" /> +			<argument index="1" name="voice" type="String" /> +			<argument index="2" name="volume" type="int" default="50" /> +			<argument index="3" name="pitch" type="float" default="1.0" /> +			<argument index="4" name="rate" type="float" default="1.0" /> +			<argument index="5" name="utterance_id" type="int" default="0" /> +			<argument index="6" name="interrupt" type="bool" default="false" /> +			<description> +				Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first. +				- [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language]. +				- [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest). +				- [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice. +				- [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative. +				- [code]utterance_id[/code] is passed as a parameter to the callback functions. +				[b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak]. +				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method> +		<method name="tts_stop"> +			<return type="void" /> +			<description> +				Stops synthesis in progress and removes all utterances from the queue. +				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows. +			</description> +		</method>  		<method name="virtual_keyboard_get_height" qualifiers="const">  			<return type="int" />  			<description> @@ -1184,6 +1271,9 @@  		</constant>  		<constant name="FEATURE_CLIPBOARD_PRIMARY" value="18" enum="Feature">  		</constant> +		<constant name="FEATURE_TEXT_TO_SPEECH" value="19" enum="Feature"> +			Display server supports text-to-speech. See [code]tts_*[/code] methods. +		</constant>  		<constant name="MOUSE_MODE_VISIBLE" value="0" enum="MouseMode">  			Makes the mouse cursor visible if it is hidden.  		</constant> @@ -1335,5 +1425,17 @@  			- MacOS: [code]NSView*[/code] for the window main view.  			- iOS: [code]UIView*[/code] for the window main view.  		</constant> +		<constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent"> +			Utterance has begun to be spoken. +		</constant> +		<constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent"> +			Utterance was successfully finished. +		</constant> +		<constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent"> +			Utterance was canceled, or TTS service was unable to process it. +		</constant> +		<constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent"> +			Utterance reached a word or sentence boundary. +		</constant>  	</constants>  </class> diff --git a/doc/classes/TextServer.xml b/doc/classes/TextServer.xml index dba4bd24a5..2f57b76374 100644 --- a/doc/classes/TextServer.xml +++ b/doc/classes/TextServer.xml @@ -1441,6 +1441,14 @@  				Aligns shaped text to the given tab-stops.  			</description>  		</method> +		<method name="string_get_word_breaks" qualifiers="const"> +			<return type="PackedInt32Array" /> +			<argument index="0" name="string" type="String" /> +			<argument index="1" name="language" type="String" default="""" /> +			<description> +				Returns array of the word break character offsets. +			</description> +		</method>  		<method name="string_to_lower" qualifiers="const">  			<return type="String" />  			<argument index="0" name="string" type="String" /> diff --git a/doc/classes/TextServerExtension.xml b/doc/classes/TextServerExtension.xml index ef522d08a7..434d6f909c 100644 --- a/doc/classes/TextServerExtension.xml +++ b/doc/classes/TextServerExtension.xml @@ -1461,6 +1461,14 @@  				[b]Note:[/b] This method is used by default line/word breaking methods, and its implementation might be omitted if custom line breaking in implemented.  			</description>  		</method> +		<method name="string_get_word_breaks" qualifiers="virtual const"> +			<return type="PackedInt32Array" /> +			<argument index="0" name="string" type="String" /> +			<argument index="1" name="language" type="String" /> +			<description> +				Returns array of the word break character offsets. +			</description> +		</method>  		<method name="string_to_lower" qualifiers="virtual const">  			<return type="String" />  			<argument index="0" name="string" type="String" /> diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index 0ae8219e23..437fbe76ab 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -3255,6 +3255,19 @@ void TextServerAdvanced::font_set_global_oversampling(double p_oversampling) {  /* Shaped text buffer interface                                          */  /*************************************************************************/ +int64_t TextServerAdvanced::_convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const { +	int64_t limit = p_pos; +	if (p_utf32.length() != p_utf16.length()) { +		const UChar *data = p_utf16.ptr(); +		for (int i = 0; i < p_pos; i++) { +			if (U16_IS_LEAD(data[i])) { +				limit--; +			} +		} +	} +	return limit; +} +  int64_t TextServerAdvanced::_convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const {  	int64_t limit = p_pos;  	if (p_sd->text.length() != p_sd->utf16.length()) { @@ -5555,6 +5568,53 @@ String TextServerAdvanced::string_to_lower(const String &p_string, const String  	return String::utf16(lower.ptr(), len);  } +PackedInt32Array TextServerAdvanced::string_get_word_breaks(const String &p_string, const String &p_language) const { +	// Convert to UTF-16. +	Char16String utf16 = p_string.utf16(); + +	Set<int> breaks; +	UErrorCode err = U_ZERO_ERROR; +	UBreakIterator *bi = ubrk_open(UBRK_LINE, p_language.ascii().get_data(), (const UChar *)utf16.ptr(), utf16.length(), &err); +	if (U_FAILURE(err)) { +		// No data loaded - use fallback. +		for (int i = 0; i < p_string.length(); i++) { +			char32_t c = p_string[i]; +			if (is_whitespace(c) || is_linebreak(c)) { +				breaks.insert(i); +			} +		} +	} else { +		while (ubrk_next(bi) != UBRK_DONE) { +			int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; +			if (pos != p_string.length() - 1) { +				breaks.insert(pos); +			} +		} +	} +	ubrk_close(bi); + +	PackedInt32Array ret; +	for (int i = 0; i < p_string.length(); i++) { +		char32_t c = p_string[i]; +		if (c == 0xfffc) { +			continue; +		} +		if (u_ispunct(c) && c != 0x005F) { +			ret.push_back(i); +			continue; +		} +		if (is_underscore(c)) { +			ret.push_back(i); +			continue; +		} +		if (breaks.has(i)) { +			ret.push_back(i); +			continue; +		} +	} +	return ret; +} +  TextServerAdvanced::TextServerAdvanced() {  	_insert_num_systems_lang();  	_insert_feature_sets(); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index fa59566a94..1b4293aa72 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -393,11 +393,13 @@ class TextServerAdvanced : public TextServerExtension {  	mutable RID_PtrOwner<ShapedTextDataAdvanced> shaped_owner;  	void _realign(ShapedTextDataAdvanced *p_sd) const; +	int64_t _convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const;  	int64_t _convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;  	int64_t _convert_pos_inv(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;  	bool _shape_substr(ShapedTextDataAdvanced *p_new_sd, const ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_length) const;  	void _shape_run(ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_end, hb_script_t p_script, hb_direction_t p_direction, Array p_fonts, int64_t p_span, int64_t p_fb_index);  	Glyph _shape_single_glyph(ShapedTextDataAdvanced *p_sd, char32_t p_char, hb_script_t p_script, hb_direction_t p_direction, const RID &p_font, int64_t p_font_size); +  	_FORCE_INLINE_ void _add_featuers(const Dictionary &p_source, Vector<hb_feature_t> &r_ftrs);  	// HarfBuzz bitmap font interface. @@ -686,6 +688,8 @@ public:  	virtual String parse_number(const String &p_string, const String &p_language = "") const override;  	virtual String percent_sign(const String &p_language = "") const override; +	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; +  	virtual String strip_diacritics(const String &p_string) const override;  	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override; diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index 1251aaf2b9..d84e9e581a 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -3079,7 +3079,7 @@ bool TextServerFallback::shaped_text_update_breaks(const RID &p_shaped) {  		if (sd_glyphs[i].count > 0) {  			char32_t c = sd->text[sd_glyphs[i].start - sd->start];  			if (c_punct_size == 0) { -				if (is_punct(c)) { +				if (is_punct(c) && c != 0x005F) {  					sd_glyphs[i].flags |= GRAPHEME_IS_PUNCTUATION;  				}  			} else { @@ -3623,6 +3623,29 @@ String TextServerFallback::string_to_lower(const String &p_string, const String  	return lower;  } +PackedInt32Array TextServerFallback::string_get_word_breaks(const String &p_string, const String &p_language) const { +	PackedInt32Array ret; +	for (int i = 0; i < p_string.length(); i++) { +		char32_t c = p_string[i]; +		if (c == 0xfffc) { +			continue; +		} +		if (is_punct(c) && c != 0x005F) { +			ret.push_back(i); +			continue; +		} +		if (is_underscore(c)) { +			ret.push_back(i); +			continue; +		} +		if (is_whitespace(c) || is_linebreak(c)) { +			ret.push_back(i); +			continue; +		} +	} +	return ret; +} +  TextServerFallback::TextServerFallback() {  	_insert_feature_sets();  }; diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index d6f61e02f8..c837029623 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -573,6 +573,8 @@ public:  	virtual double shaped_text_get_underline_position(const RID &p_shaped) const override;  	virtual double shaped_text_get_underline_thickness(const RID &p_shaped) const override; +	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; +  	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;  	virtual String string_to_lower(const String &p_string, const String &p_language = "") const override; diff --git a/platform/android/SCsub b/platform/android/SCsub index 1a3c158d2e..ad226255bc 100644 --- a/platform/android/SCsub +++ b/platform/android/SCsub @@ -8,6 +8,7 @@ android_files = [      "file_access_android.cpp",      "audio_driver_opensl.cpp",      "dir_access_jandroid.cpp", +    "tts_android.cpp",      "thread_jandroid.cpp",      "net_socket_android.cpp",      "java_godot_lib_jni.cpp", diff --git a/platform/android/display_server_android.cpp b/platform/android/display_server_android.cpp index e7de287fc6..2eb7056a36 100644 --- a/platform/android/display_server_android.cpp +++ b/platform/android/display_server_android.cpp @@ -34,6 +34,7 @@  #include "java_godot_io_wrapper.h"  #include "java_godot_wrapper.h"  #include "os_android.h" +#include "tts_android.h"  #if defined(VULKAN_ENABLED)  #include "drivers/vulkan/rendering_device_vulkan.h" @@ -63,6 +64,7 @@ bool DisplayServerAndroid::has_feature(Feature p_feature) const {  		case FEATURE_ORIENTATION:  		case FEATURE_TOUCHSCREEN:  		case FEATURE_VIRTUAL_KEYBOARD: +		case FEATURE_TEXT_TO_SPEECH:  			return true;  		default:  			return false; @@ -73,6 +75,34 @@ String DisplayServerAndroid::get_name() const {  	return "Android";  } +bool DisplayServerAndroid::tts_is_speaking() const { +	return TTS_Android::is_speaking(); +} + +bool DisplayServerAndroid::tts_is_paused() const { +	return TTS_Android::is_paused(); +} + +Array DisplayServerAndroid::tts_get_voices() const { +	return TTS_Android::get_voices(); +} + +void DisplayServerAndroid::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	TTS_Android::speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerAndroid::tts_pause() { +	TTS_Android::pause(); +} + +void DisplayServerAndroid::tts_resume() { +	TTS_Android::resume(); +} + +void DisplayServerAndroid::tts_stop() { +	TTS_Android::stop(); +} +  void DisplayServerAndroid::clipboard_set(const String &p_text) {  	GodotJavaWrapper *godot_java = OS_Android::get_singleton()->get_godot_java();  	ERR_FAIL_COND(!godot_java); diff --git a/platform/android/display_server_android.h b/platform/android/display_server_android.h index 1d268bbcfd..2604214ac0 100644 --- a/platform/android/display_server_android.h +++ b/platform/android/display_server_android.h @@ -91,6 +91,15 @@ public:  	virtual bool has_feature(Feature p_feature) const override;  	virtual String get_name() const override; +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +  	virtual void clipboard_set(const String &p_text) override;  	virtual String clipboard_get() const override;  	virtual bool clipboard_has() const override; diff --git a/platform/android/java/lib/src/org/godotengine/godot/Godot.java b/platform/android/java/lib/src/org/godotengine/godot/Godot.java index 6e597163ab..351e2e60a0 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/Godot.java +++ b/platform/android/java/lib/src/org/godotengine/godot/Godot.java @@ -36,6 +36,7 @@ import static android.content.Context.WINDOW_SERVICE;  import org.godotengine.godot.input.GodotEditText;  import org.godotengine.godot.plugin.GodotPlugin;  import org.godotengine.godot.plugin.GodotPluginRegistry; +import org.godotengine.godot.tts.GodotTTS;  import org.godotengine.godot.utils.GodotNetUtils;  import org.godotengine.godot.utils.PermissionsUtil;  import org.godotengine.godot.xr.XRMode; @@ -165,6 +166,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC  	public static GodotIO io;  	public static GodotNetUtils netUtils; +	public static GodotTTS tts;  	public interface ResultCallback {  		void callback(int requestCode, int resultCode, Intent data); @@ -458,6 +460,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC  		io = new GodotIO(activity);  		GodotLib.io = io;  		netUtils = new GodotNetUtils(activity); +		tts = new GodotTTS(activity);  		mSensorManager = (SensorManager)activity.getSystemService(Context.SENSOR_SERVICE);  		mAccelerometer = mSensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER);  		mSensorManager.registerListener(this, mAccelerometer, SensorManager.SENSOR_DELAY_GAME); diff --git a/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java b/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java index 253a51b83c..1f8f8c82a6 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java +++ b/platform/android/java/lib/src/org/godotengine/godot/GodotLib.java @@ -92,6 +92,11 @@ public class GodotLib {  	public static native boolean step();  	/** +	 * TTS callback. +	 */ +	public static native void ttsCallback(int event, int id, int pos); + +	/**  	 * Forward touch events from the main thread to the GL thread.  	 */  	public static native void touch(int inputDevice, int event, int pointer, int pointerCount, float[] positions); diff --git a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java new file mode 100644 index 0000000000..2239ddac8e --- /dev/null +++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java @@ -0,0 +1,298 @@ +/*************************************************************************/ +/*  GodotTTS.java                                                        */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +package org.godotengine.godot.tts; + +import org.godotengine.godot.GodotLib; + +import android.app.Activity; +import android.os.Bundle; +import android.speech.tts.TextToSpeech; +import android.speech.tts.UtteranceProgressListener; +import android.speech.tts.Voice; + +import androidx.annotation.Keep; + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Set; + +/** + * Wrapper for Android Text to Speech API and custom utterance query implementation. + * <p> + * A [GodotTTS] provides the following features: + * <p> + * <ul> + * <li>Access to the Android Text to Speech API. + * <li>Utterance pause / resume functions, unsupported by Android TTS API. + * </ul> + */ +@Keep +public class GodotTTS extends UtteranceProgressListener { +	// Note: These constants must be in sync with DisplayServer::TTSUtteranceEvent enum from "servers/display_server.h". +	final private static int EVENT_START = 0; +	final private static int EVENT_END = 1; +	final private static int EVENT_CANCEL = 2; +	final private static int EVENT_BOUNDARY = 3; + +	final private TextToSpeech synth; +	final private LinkedList<GodotUtterance> queue; +	final private Object lock = new Object(); +	private GodotUtterance lastUtterance; + +	private boolean speaking; +	private boolean paused; + +	public GodotTTS(Activity p_activity) { +		synth = new TextToSpeech(p_activity, null); +		queue = new LinkedList<GodotUtterance>(); + +		synth.setOnUtteranceProgressListener(this); +	} + +	private void updateTTS() { +		if (!speaking && queue.size() > 0) { +			int mode = TextToSpeech.QUEUE_FLUSH; +			GodotUtterance message = queue.pollFirst(); + +			Set<Voice> voices = synth.getVoices(); +			for (Voice v : voices) { +				if (v.getName().equals(message.voice)) { +					synth.setVoice(v); +					break; +				} +			} +			synth.setPitch(message.pitch); +			synth.setSpeechRate(message.rate); + +			Bundle params = new Bundle(); +			params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, message.volume / 100.f); + +			lastUtterance = message; +			lastUtterance.start = 0; +			lastUtterance.offset = 0; +			paused = false; + +			synth.speak(message.text, mode, params, String.valueOf(message.id)); +			speaking = true; +		} +	} + +	/** +	 * Called by TTS engine when the TTS service is about to speak the specified range. +	 */ +	@Override +	public void onRangeStart(String utteranceId, int start, int end, int frame) { +		synchronized (lock) { +			if (lastUtterance != null && Integer.parseInt(utteranceId) == lastUtterance.id) { +				lastUtterance.offset = start; +				GodotLib.ttsCallback(EVENT_BOUNDARY, lastUtterance.id, start + lastUtterance.start); +			} +		} +	} + +	/** +	 * Called by TTS engine when an utterance was canceled in progress. +	 */ +	@Override +	public void onStop(String utteranceId, boolean interrupted) { +		synchronized (lock) { +			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { +				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); +				speaking = false; +				updateTTS(); +			} +		} +	} + +	/** +	 * Called by TTS engine when an utterance has begun to be spoken.. +	 */ +	@Override +	public void onStart(String utteranceId) { +		synchronized (lock) { +			if (lastUtterance != null && lastUtterance.start == 0 && Integer.parseInt(utteranceId) == lastUtterance.id) { +				GodotLib.ttsCallback(EVENT_START, lastUtterance.id, 0); +			} +		} +	} + +	/** +	 * Called by TTS engine when an utterance was successfully finished. +	 */ +	@Override +	public void onDone(String utteranceId) { +		synchronized (lock) { +			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { +				GodotLib.ttsCallback(EVENT_END, lastUtterance.id, 0); +				speaking = false; +				updateTTS(); +			} +		} +	} + +	/** +	 * Called by TTS engine when an error has occurred during processing. +	 */ +	@Override +	public void onError(String utteranceId, int errorCode) { +		synchronized (lock) { +			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { +				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); +				speaking = false; +				updateTTS(); +			} +		} +	} + +	/** +	 * Called by TTS engine when an error has occurred during processing (pre API level 21 version). +	 */ +	@Override +	public void onError(String utteranceId) { +		synchronized (lock) { +			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) { +				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); +				speaking = false; +				updateTTS(); +			} +		} +	} + +	/** +	 * Adds an utterance to the queue. +	 */ +	public void speak(String text, String voice, int volume, float pitch, float rate, int utterance_id, boolean interrupt) { +		synchronized (lock) { +			GodotUtterance message = new GodotUtterance(text, voice, volume, pitch, rate, utterance_id); +			queue.addLast(message); + +			if (isPaused()) { +				resumeSpeaking(); +			} else { +				updateTTS(); +			} +		} +	} + +	/** +	 * Puts the synthesizer into a paused state. +	 */ +	public void pauseSpeaking() { +		synchronized (lock) { +			if (!paused) { +				paused = true; +				synth.stop(); +			} +		} +	} + +	/** +	 * Resumes the synthesizer if it was paused. +	 */ +	public void resumeSpeaking() { +		synchronized (lock) { +			if (lastUtterance != null && paused) { +				int mode = TextToSpeech.QUEUE_FLUSH; + +				Set<Voice> voices = synth.getVoices(); +				for (Voice v : voices) { +					if (v.getName().equals(lastUtterance.voice)) { +						synth.setVoice(v); +						break; +					} +				} +				synth.setPitch(lastUtterance.pitch); +				synth.setSpeechRate(lastUtterance.rate); + +				Bundle params = new Bundle(); +				params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, lastUtterance.volume / 100.f); + +				lastUtterance.start = lastUtterance.offset; +				lastUtterance.offset = 0; +				paused = false; + +				synth.speak(lastUtterance.text.substring(lastUtterance.start), mode, params, String.valueOf(lastUtterance.id)); +				speaking = true; +			} else { +				paused = false; +			} +		} +	} + +	/** +	 * Stops synthesis in progress and removes all utterances from the queue. +	 */ +	public void stopSpeaking() { +		synchronized (lock) { +			for (GodotUtterance u : queue) { +				GodotLib.ttsCallback(EVENT_CANCEL, u.id, 0); +			} +			queue.clear(); + +			if (lastUtterance != null) { +				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0); +			} +			lastUtterance = null; + +			paused = false; +			speaking = false; + +			synth.stop(); +		} +	} + +	/** +	 * Returns voice information. +	 */ +	public String[] getVoices() { +		Set<Voice> voices = synth.getVoices(); +		String[] list = new String[voices.size()]; +		int i = 0; +		for (Voice v : voices) { +			list[i++] = v.getLocale().toString() + ";" + v.getName(); +		} +		return list; +	} + +	/** +	 * Returns true if the synthesizer is generating speech, or have utterance waiting in the queue. +	 */ +	public boolean isSpeaking() { +		return speaking; +	} + +	/** +	 * Returns true if the synthesizer is in a paused state. +	 */ +	public boolean isPaused() { +		return paused; +	} +} diff --git a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java new file mode 100644 index 0000000000..bde37e7315 --- /dev/null +++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java @@ -0,0 +1,55 @@ +/*************************************************************************/ +/*  GodotUtterance.java                                                  */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +package org.godotengine.godot.tts; + +/** + * A speech request for GodotTTS. + */ +class GodotUtterance { +	final String text; +	final String voice; +	final int volume; +	final float pitch; +	final float rate; +	final int id; + +	int offset = -1; +	int start = 0; + +	GodotUtterance(String text, String voice, int volume, float pitch, float rate, int id) { +		this.text = text; +		this.voice = voice; +		this.volume = volume; +		this.pitch = pitch; +		this.rate = rate; +		this.id = id; +	} +} diff --git a/platform/android/java_godot_lib_jni.cpp b/platform/android/java_godot_lib_jni.cpp index 5e0a9d967b..8ad72b499e 100644 --- a/platform/android/java_godot_lib_jni.cpp +++ b/platform/android/java_godot_lib_jni.cpp @@ -49,6 +49,7 @@  #include "os_android.h"  #include "string_android.h"  #include "thread_jandroid.h" +#include "tts_android.h"  #include <android/input.h>  #include <unistd.h> @@ -96,6 +97,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *en  	DirAccessJAndroid::setup(godot_io_java->get_instance());  	NetSocketAndroid::setup(godot_java->get_member_object("netUtils", "Lorg/godotengine/godot/utils/GodotNetUtils;", env)); +	TTS_Android::setup(godot_java->get_member_object("tts", "Lorg/godotengine/godot/tts/GodotTTS;", env));  	os_android = new OS_Android(godot_java, godot_io_java, p_use_apk_expansion); @@ -213,6 +215,10 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jcl  	}  } +JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos) { +	TTS_Android::_java_utterance_callback(event, id, pos); +} +  JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz) {  	if (step.get() == -1) {  		return true; diff --git a/platform/android/java_godot_lib_jni.h b/platform/android/java_godot_lib_jni.h index e686ee5c09..4f2195942c 100644 --- a/platform/android/java_godot_lib_jni.h +++ b/platform/android/java_godot_lib_jni.h @@ -43,6 +43,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setup(JNIEnv *env, jc  JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_resize(JNIEnv *env, jclass clazz, jobject p_surface, jint p_width, jint p_height);  JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_newcontext(JNIEnv *env, jclass clazz, jobject p_surface);  JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz); +JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos);  JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jclass clazz);  void touch_preprocessing(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions, jint buttons_mask = 0, jfloat vertical_factor = 0, jfloat horizontal_factor = 0);  JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_touch__IIII_3F(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions); diff --git a/platform/android/tts_android.cpp b/platform/android/tts_android.cpp new file mode 100644 index 0000000000..528878f14e --- /dev/null +++ b/platform/android/tts_android.cpp @@ -0,0 +1,189 @@ +/*************************************************************************/ +/*  tts_android.cpp                                                      */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#include "tts_android.h" + +#include "java_godot_wrapper.h" +#include "os_android.h" +#include "string_android.h" +#include "thread_jandroid.h" + +jobject TTS_Android::tts = 0; +jclass TTS_Android::cls = 0; + +jmethodID TTS_Android::_is_speaking = 0; +jmethodID TTS_Android::_is_paused = 0; +jmethodID TTS_Android::_get_voices = 0; +jmethodID TTS_Android::_speak = 0; +jmethodID TTS_Android::_pause_speaking = 0; +jmethodID TTS_Android::_resume_speaking = 0; +jmethodID TTS_Android::_stop_speaking = 0; + +Map<int, Char16String> TTS_Android::ids; + +void TTS_Android::setup(jobject p_tts) { +	JNIEnv *env = get_jni_env(); + +	tts = env->NewGlobalRef(p_tts); + +	jclass c = env->GetObjectClass(tts); +	cls = (jclass)env->NewGlobalRef(c); + +	_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z"); +	_is_paused = env->GetMethodID(cls, "isPaused", "()Z"); +	_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;"); +	_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V"); +	_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V"); +	_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V"); +	_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V"); +} + +void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) { +	if (ids.has(p_id)) { +		int pos = 0; +		if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) { +			// Convert position from UTF-16 to UTF-32. +			const Char16String &string = ids[p_id]; +			for (int i = 0; i < MIN(p_pos, string.length()); i++) { +				char16_t c = string[i]; +				if ((c & 0xfffffc00) == 0xd800) { +					i++; +				} +				pos++; +			} +		} else if ((DisplayServer::TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) { +			ids.erase(p_id); +		} +		DisplayServer::get_singleton()->tts_post_utterance_event((DisplayServer::TTSUtteranceEvent)p_event, p_id, pos); +	} +} + +bool TTS_Android::is_speaking() { +	if (_is_speaking) { +		JNIEnv *env = get_jni_env(); + +		ERR_FAIL_COND_V(env == nullptr, false); +		return env->CallBooleanMethod(tts, _is_speaking); +	} else { +		return false; +	} +} + +bool TTS_Android::is_paused() { +	if (_is_paused) { +		JNIEnv *env = get_jni_env(); + +		ERR_FAIL_COND_V(env == nullptr, false); +		return env->CallBooleanMethod(tts, _is_paused); +	} else { +		return false; +	} +} + +Array TTS_Android::get_voices() { +	Array list; +	if (_get_voices) { +		JNIEnv *env = get_jni_env(); +		ERR_FAIL_COND_V(env == nullptr, list); + +		jobject voices_object = env->CallObjectMethod(tts, _get_voices); +		jobjectArray *arr = reinterpret_cast<jobjectArray *>(&voices_object); + +		jsize len = env->GetArrayLength(*arr); +		for (int i = 0; i < len; i++) { +			jstring jStr = (jstring)env->GetObjectArrayElement(*arr, i); +			String str = jstring_to_string(jStr, env); +			Vector<String> tokens = str.split(";", true, 2); +			if (tokens.size() == 2) { +				Dictionary voice_d; +				voice_d["name"] = tokens[1]; +				voice_d["id"] = tokens[1]; +				voice_d["language"] = tokens[0]; +				list.push_back(voice_d); +			} +			env->DeleteLocalRef(jStr); +		} +	} +	return list; +} + +void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	if (p_interrupt) { +		stop(); +	} + +	if (p_text.is_empty()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); +		return; +	} + +	ids[p_utterance_id] = p_text.utf16(); + +	if (_speak) { +		JNIEnv *env = get_jni_env(); +		ERR_FAIL_COND(env == nullptr); + +		jstring jStrT = env->NewStringUTF(p_text.utf8().get_data()); +		jstring jStrV = env->NewStringUTF(p_voice.utf8().get_data()); +		env->CallVoidMethod(tts, _speak, jStrT, jStrV, CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, p_interrupt); +	} +} + +void TTS_Android::pause() { +	if (_pause_speaking) { +		JNIEnv *env = get_jni_env(); + +		ERR_FAIL_COND(env == nullptr); +		env->CallVoidMethod(tts, _pause_speaking); +	} +} + +void TTS_Android::resume() { +	if (_resume_speaking) { +		JNIEnv *env = get_jni_env(); + +		ERR_FAIL_COND(env == nullptr); +		env->CallVoidMethod(tts, _resume_speaking); +	} +} + +void TTS_Android::stop() { +	for (Map<int, Char16String>::Element *E = ids.front(); E; E = E->next()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key()); +	} +	ids.clear(); + +	if (_stop_speaking) { +		JNIEnv *env = get_jni_env(); + +		ERR_FAIL_COND(env == nullptr); +		env->CallVoidMethod(tts, _stop_speaking); +	} +} diff --git a/platform/android/tts_android.h b/platform/android/tts_android.h new file mode 100644 index 0000000000..efeed94856 --- /dev/null +++ b/platform/android/tts_android.h @@ -0,0 +1,67 @@ +/*************************************************************************/ +/*  tts_android.h                                                        */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#ifndef TTS_ANDROID_H +#define TTS_ANDROID_H + +#include "core/string/ustring.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <jni.h> + +class TTS_Android { +	static jobject tts; +	static jclass cls; + +	static jmethodID _is_speaking; +	static jmethodID _is_paused; +	static jmethodID _get_voices; +	static jmethodID _speak; +	static jmethodID _pause_speaking; +	static jmethodID _resume_speaking; +	static jmethodID _stop_speaking; + +	static Map<int, Char16String> ids; + +public: +	static void setup(jobject p_tts); +	static void _java_utterance_callback(int p_event, int p_id, int p_pos); + +	static bool is_speaking(); +	static bool is_paused(); +	static Array get_voices(); +	static void speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt); +	static void pause(); +	static void resume(); +	static void stop(); +}; + +#endif // TTS_ANDROID_H diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 58b574a72f..5e10bf5646 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -13,6 +13,7 @@ iphone_lib = [      "display_server_iphone.mm",      "joypad_iphone.mm",      "godot_view.mm", +    "tts_ios.mm",      "display_layer.mm",      "godot_app_delegate.m",      "godot_view_renderer.mm", diff --git a/platform/iphone/display_server_iphone.h b/platform/iphone/display_server_iphone.h index 7441550f67..6ae190b81a 100644 --- a/platform/iphone/display_server_iphone.h +++ b/platform/iphone/display_server_iphone.h @@ -58,6 +58,8 @@ class DisplayServerIPhone : public DisplayServer {  	RenderingDeviceVulkan *rendering_device_vulkan = nullptr;  #endif +	id tts = nullptr; +  	DisplayServer::ScreenOrientation screen_orientation;  	ObjectID window_attached_instance_id; @@ -123,6 +125,15 @@ public:  	virtual bool has_feature(Feature p_feature) const override;  	virtual String get_name() const override; +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +  	virtual int get_screen_count() const override;  	virtual Point2i screen_get_position(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;  	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override; diff --git a/platform/iphone/display_server_iphone.mm b/platform/iphone/display_server_iphone.mm index a0f8daf5a0..ec58ab195a 100644 --- a/platform/iphone/display_server_iphone.mm +++ b/platform/iphone/display_server_iphone.mm @@ -38,6 +38,7 @@  #include "ios.h"  #import "keyboard_input_view.h"  #include "os_iphone.h" +#include "tts_ios.h"  #import "view_controller.h"  #import <Foundation/Foundation.h> @@ -52,6 +53,9 @@ DisplayServerIPhone *DisplayServerIPhone::get_singleton() {  DisplayServerIPhone::DisplayServerIPhone(const String &p_rendering_driver, WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error) {  	rendering_driver = p_rendering_driver; +	// Init TTS +	tts = [[TTS_IOS alloc] init]; +  #if defined(GLES3_ENABLED)  	// FIXME: Add support for both OpenGL and Vulkan when OpenGL is implemented  	// again, @@ -310,6 +314,7 @@ bool DisplayServerIPhone::has_feature(Feature p_feature) const {  		case FEATURE_ORIENTATION:  		case FEATURE_TOUCHSCREEN:  		case FEATURE_VIRTUAL_KEYBOARD: +		case FEATURE_TEXT_TO_SPEECH:  			return true;  		default:  			return false; @@ -320,6 +325,41 @@ String DisplayServerIPhone::get_name() const {  	return "iPhone";  } +bool DisplayServerIPhone::tts_is_speaking() const { +	ERR_FAIL_COND_V(!tts, false); +	return [tts isSpeaking]; +} + +bool DisplayServerIPhone::tts_is_paused() const { +	ERR_FAIL_COND_V(!tts, false); +	return [tts isPaused]; +} + +Array DisplayServerIPhone::tts_get_voices() const { +	ERR_FAIL_COND_V(!tts, Array()); +	return [tts getVoices]; +} + +void DisplayServerIPhone::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	ERR_FAIL_COND(!tts); +	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt]; +} + +void DisplayServerIPhone::tts_pause() { +	ERR_FAIL_COND(!tts); +	[tts pauseSpeaking]; +} + +void DisplayServerIPhone::tts_resume() { +	ERR_FAIL_COND(!tts); +	[tts resumeSpeaking]; +} + +void DisplayServerIPhone::tts_stop() { +	ERR_FAIL_COND(!tts); +	[tts stopSpeaking]; +} +  int DisplayServerIPhone::get_screen_count() const {  	return 1;  } diff --git a/platform/iphone/tts_ios.h b/platform/iphone/tts_ios.h new file mode 100644 index 0000000000..c7defeb98f --- /dev/null +++ b/platform/iphone/tts_ios.h @@ -0,0 +1,59 @@ +/*************************************************************************/ +/*  tts_ios.h                                                            */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#ifndef TTS_IOS_H +#define TTS_IOS_H + +#include <AVFAudio/AVSpeechSynthesis.h> + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +@interface TTS_IOS : NSObject <AVSpeechSynthesizerDelegate> { +	bool speaking; +	Map<id, int> ids; + +	AVSpeechSynthesizer *av_synth; +	List<DisplayServer::TTSUtterance> queue; +} + +- (void)pauseSpeaking; +- (void)resumeSpeaking; +- (void)stopSpeaking; +- (bool)isSpeaking; +- (bool)isPaused; +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt; +- (Array)getVoices; +@end + +#endif // TTS_IOS_H diff --git a/platform/iphone/tts_ios.mm b/platform/iphone/tts_ios.mm new file mode 100644 index 0000000000..a079d02add --- /dev/null +++ b/platform/iphone/tts_ios.mm @@ -0,0 +1,164 @@ +/*************************************************************************/ +/*  tts_ios.mm                                                           */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#include "tts_ios.h" + +@implementation TTS_IOS + +- (id)init { +	self = [super init]; +	self->speaking = false; +	self->av_synth = [[AVSpeechSynthesizer alloc] init]; +	[self->av_synth setDelegate:self]; +	print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized."); +	return self; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance { +	NSString *string = [utterance speechString]; + +	// Convert from UTF-16 to UTF-32 position. +	int pos = 0; +	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { +		unichar c = [string characterAtIndex:i]; +		if ((c & 0xfffffc00) == 0xd800) { +			i++; +		} +		pos++; +	} + +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos); +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance { +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]); +	ids.erase(utterance); +	speaking = false; +	[self update]; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance { +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]); +	ids.erase(utterance); +	speaking = false; +	[self update]; +} + +- (void)update { +	if (!speaking && queue.size() > 0) { +		DisplayServer::TTSUtterance &message = queue.front()->get(); + +		AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; +		[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]]; +		if (message.rate > 1.f) { +			[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)]; +		} else if (message.rate < 1.f) { +			[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)]; +		} +		[new_utterance setPitchMultiplier:message.pitch]; +		[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + +		ids[new_utterance] = message.id; +		[av_synth speakUtterance:new_utterance]; + +		queue.pop_front(); + +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); +		speaking = true; +	} +} + +- (void)pauseSpeaking { +	[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +} + +- (void)resumeSpeaking { +	[av_synth continueSpeaking]; +} + +- (void)stopSpeaking { +	for (DisplayServer::TTSUtterance &message : queue) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); +	} +	queue.clear(); +	[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +	speaking = false; +} + +- (bool)isSpeaking { +	return speaking || (queue.size() > 0); +} + +- (bool)isPaused { +	return [av_synth isPaused]; +} + +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt { +	if (interrupt) { +		[self stopSpeaking]; +	} + +	if (text.is_empty()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id); +		return; +	} + +	DisplayServer::TTSUtterance message; +	message.text = text; +	message.voice = voice; +	message.volume = CLAMP(volume, 0, 100); +	message.pitch = CLAMP(pitch, 0.f, 2.f); +	message.rate = CLAMP(rate, 0.1f, 10.f); +	message.id = utterance_id; +	queue.push_back(message); + +	if ([self isPaused]) { +		[self resumeSpeaking]; +	} else { +		[self update]; +	} +} + +- (Array)getVoices { +	Array list; +	for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) { +		NSString *voiceIdentifierString = [voice identifier]; +		NSString *voiceLocaleIdentifier = [voice language]; +		NSString *voiceName = [voice name]; +		Dictionary voice_d; +		voice_d["name"] = String::utf8([voiceName UTF8String]); +		voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]); +		voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]); +		list.push_back(voice_d); +	} +	return list; +} + +@end diff --git a/platform/javascript/display_server_javascript.cpp b/platform/javascript/display_server_javascript.cpp index a38040922d..cc77c8fcd5 100644 --- a/platform/javascript/display_server_javascript.cpp +++ b/platform/javascript/display_server_javascript.cpp @@ -274,6 +274,90 @@ const char *DisplayServerJavaScript::godot2dom_cursor(DisplayServer::CursorShape  	}  } +bool DisplayServerJavaScript::tts_is_speaking() const { +	return godot_js_tts_is_speaking(); +} + +bool DisplayServerJavaScript::tts_is_paused() const { +	return godot_js_tts_is_paused(); +} + +void DisplayServerJavaScript::update_voices_callback(int p_size, const char **p_voice) { +	get_singleton()->voices.clear(); +	for (int i = 0; i < p_size; i++) { +		Vector<String> tokens = String::utf8(p_voice[i]).split(";", true, 2); +		if (tokens.size() == 2) { +			Dictionary voice_d; +			voice_d["name"] = tokens[1]; +			voice_d["id"] = tokens[1]; +			voice_d["language"] = tokens[0]; +			get_singleton()->voices.push_back(voice_d); +		} +	} +} + +Array DisplayServerJavaScript::tts_get_voices() const { +	godot_js_tts_get_voices(update_voices_callback); +	return voices; +} + +void DisplayServerJavaScript::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	if (p_interrupt) { +		tts_stop(); +	} + +	if (p_text.is_empty()) { +		tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); +		return; +	} + +	CharString string = p_text.utf8(); +	utterance_ids[p_utterance_id] = string; + +	godot_js_tts_speak(string.get_data(), p_voice.utf8().get_data(), CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, DisplayServerJavaScript::_js_utterance_callback); +} + +void DisplayServerJavaScript::tts_pause() { +	godot_js_tts_pause(); +} + +void DisplayServerJavaScript::tts_resume() { +	godot_js_tts_resume(); +} + +void DisplayServerJavaScript::tts_stop() { +	for (Map<int, CharString>::Element *E = utterance_ids.front(); E; E = E->next()) { +		tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key()); +	} +	utterance_ids.clear(); +	godot_js_tts_stop(); +} + +void DisplayServerJavaScript::_js_utterance_callback(int p_event, int p_id, int p_pos) { +	DisplayServerJavaScript *ds = (DisplayServerJavaScript *)DisplayServer::get_singleton(); +	if (ds->utterance_ids.has(p_id)) { +		int pos = 0; +		if ((TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) { +			// Convert position from UTF-8 to UTF-32. +			const CharString &string = ds->utterance_ids[p_id]; +			for (int i = 0; i < MIN(p_pos, string.length()); i++) { +				uint8_t c = string[i]; +				if ((c & 0xe0) == 0xc0) { +					i += 1; +				} else if ((c & 0xf0) == 0xe0) { +					i += 2; +				} else if ((c & 0xf8) == 0xf0) { +					i += 3; +				} +				pos++; +			} +		} else if ((TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) { +			ds->utterance_ids.erase(p_id); +		} +		ds->tts_post_utterance_event((TTSUtteranceEvent)p_event, p_id, pos); +	} +} +  void DisplayServerJavaScript::cursor_set_shape(CursorShape p_shape) {  	ERR_FAIL_INDEX(p_shape, CURSOR_MAX);  	if (cursor_shape == p_shape) { @@ -755,6 +839,8 @@ bool DisplayServerJavaScript::has_feature(Feature p_feature) const {  		//case FEATURE_ORIENTATION:  		case FEATURE_VIRTUAL_KEYBOARD:  			return godot_js_display_vk_available() != 0; +		case FEATURE_TEXT_TO_SPEECH: +			return godot_js_display_tts_available() != 0;  		default:  			return false;  	} diff --git a/platform/javascript/display_server_javascript.h b/platform/javascript/display_server_javascript.h index b50956d91c..bbd0206087 100644 --- a/platform/javascript/display_server_javascript.h +++ b/platform/javascript/display_server_javascript.h @@ -55,6 +55,8 @@ private:  	EMSCRIPTEN_WEBGL_CONTEXT_HANDLE webgl_ctx = 0;  #endif +	Map<int, CharString> utterance_ids; +  	WindowMode window_mode = WINDOW_MODE_WINDOWED;  	ObjectID window_attached_instance_id = {}; @@ -66,6 +68,8 @@ private:  	String clipboard;  	Point2 touches[32]; +	Array voices; +  	char canvas_id[256] = { 0 };  	bool cursor_inside_canvas = true;  	CursorShape cursor_shape = CURSOR_ARROW; @@ -89,6 +93,7 @@ private:  	static void vk_input_text_callback(const char *p_text, int p_cursor);  	static void gamepad_callback(int p_index, int p_connected, const char *p_id, const char *p_guid);  	void process_joypads(); +	static void _js_utterance_callback(int p_event, int p_id, int p_pos);  	static Vector<String> get_rendering_drivers_func();  	static DisplayServer *create_func(const String &p_rendering_driver, WindowMode p_window_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error); @@ -97,6 +102,7 @@ private:  	static void request_quit_callback();  	static void window_blur_callback(); +	static void update_voices_callback(int p_size, const char **p_voice);  	static void update_clipboard_callback(const char *p_text);  	static void send_window_event_callback(int p_notification);  	static void drop_files_js_callback(char **p_filev, int p_filec); @@ -115,6 +121,16 @@ public:  	virtual bool has_feature(Feature p_feature) const override;  	virtual String get_name() const override; +	// tts +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +  	// cursor  	virtual void cursor_set_shape(CursorShape p_shape) override;  	virtual CursorShape cursor_get_shape() const override; diff --git a/platform/javascript/godot_js.h b/platform/javascript/godot_js.h index 2cb5c3025c..1a383c9799 100644 --- a/platform/javascript/godot_js.h +++ b/platform/javascript/godot_js.h @@ -67,6 +67,15 @@ extern int godot_js_input_gamepad_sample_get(int p_idx, float r_btns[16], int32_  extern void godot_js_input_paste_cb(void (*p_callback)(const char *p_text));  extern void godot_js_input_drop_files_cb(void (*p_callback)(char **p_filev, int p_filec)); +// TTS +extern int godot_js_tts_is_speaking(); +extern int godot_js_tts_is_paused(); +extern int godot_js_tts_get_voices(void (*p_callback)(int p_size, const char **p_voices)); +extern void godot_js_tts_speak(const char *p_text, const char *p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, void (*p_callback)(int p_event, int p_id, int p_pos)); +extern void godot_js_tts_pause(); +extern void godot_js_tts_resume(); +extern void godot_js_tts_stop(); +  // Display  extern int godot_js_display_screen_dpi_get();  extern double godot_js_display_pixel_ratio_get(); @@ -109,6 +118,7 @@ extern void godot_js_display_notification_cb(void (*p_callback)(int p_notificati  // Display Virtual Keyboard  extern int godot_js_display_vk_available(); +extern int godot_js_display_tts_available();  extern void godot_js_display_vk_cb(void (*p_input)(const char *p_text, int p_cursor));  extern void godot_js_display_vk_show(const char *p_text, int p_multiline, int p_start, int p_end);  extern void godot_js_display_vk_hide(); diff --git a/platform/javascript/js/libs/library_godot_display.js b/platform/javascript/js/libs/library_godot_display.js index 2bdf4e56ad..54d48643db 100644 --- a/platform/javascript/js/libs/library_godot_display.js +++ b/platform/javascript/js/libs/library_godot_display.js @@ -330,6 +330,91 @@ const GodotDisplay = {  		return 0;  	}, +	godot_js_tts_is_speaking__sig: 'i', +	godot_js_tts_is_speaking: function () { +		return window.speechSynthesis.speaking; +	}, + +	godot_js_tts_is_paused__sig: 'i', +	godot_js_tts_is_paused: function () { +		return window.speechSynthesis.paused; +	}, + +	godot_js_tts_get_voices__sig: 'vi', +	godot_js_tts_get_voices: function (p_callback) { +		const func = GodotRuntime.get_func(p_callback); +		try { +			const arr = []; +			const voices = window.speechSynthesis.getVoices(); +			for (let i = 0; i < voices.length; i++) { +				arr.push(`${voices[i].lang};${voices[i].name}`); +			} +			const c_ptr = GodotRuntime.allocStringArray(arr); +			func(arr.length, c_ptr); +			GodotRuntime.freeStringArray(c_ptr, arr.length); +		} catch (e) { +			// Fail graciously. +		} +	}, + +	godot_js_tts_speak__sig: 'viiiffii', +	godot_js_tts_speak: function (p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_callback) { +		const func = GodotRuntime.get_func(p_callback); + +		function listener_end(evt) { +			evt.currentTarget.cb(1 /*TTS_UTTERANCE_ENDED*/, evt.currentTarget.id, 0); +		} + +		function listener_start(evt) { +			evt.currentTarget.cb(0 /*TTS_UTTERANCE_STARTED*/, evt.currentTarget.id, 0); +		} + +		function listener_error(evt) { +			evt.currentTarget.cb(2 /*TTS_UTTERANCE_CANCELED*/, evt.currentTarget.id, 0); +		} + +		function listener_bound(evt) { +			evt.currentTarget.cb(3 /*TTS_UTTERANCE_BOUNDARY*/, evt.currentTarget.id, evt.charIndex); +		} + +		const utterance = new SpeechSynthesisUtterance(GodotRuntime.parseString(p_text)); +		utterance.rate = p_rate; +		utterance.pitch = p_pitch; +		utterance.volume = p_volume / 100.0; +		utterance.addEventListener('end', listener_end); +		utterance.addEventListener('start', listener_start); +		utterance.addEventListener('error', listener_error); +		utterance.addEventListener('boundary', listener_bound); +		utterance.id = p_utterance_id; +		utterance.cb = func; +		const voice = GodotRuntime.parseString(p_voice); +		const voices = window.speechSynthesis.getVoices(); +		for (let i = 0; i < voices.length; i++) { +			if (voices[i].name === voice) { +				utterance.voice = voices[i]; +				break; +			} +		} +		window.speechSynthesis.resume(); +		window.speechSynthesis.speak(utterance); +	}, + +	godot_js_tts_pause__sig: 'v', +	godot_js_tts_pause: function () { +		window.speechSynthesis.pause(); +	}, + +	godot_js_tts_resume__sig: 'v', +	godot_js_tts_resume: function () { +		window.speechSynthesis.resume(); +	}, + +	godot_js_tts_stop__sig: 'v', +	godot_js_tts_stop: function () { +		window.speechSynthesis.cancel(); +		window.speechSynthesis.resume(); +	}, +  	godot_js_display_alert__sig: 'vi',  	godot_js_display_alert: function (p_text) {  		window.alert(GodotRuntime.parseString(p_text)); // eslint-disable-line no-alert @@ -625,6 +710,11 @@ const GodotDisplay = {  		return GodotDisplayVK.available();  	}, +	godot_js_display_tts_available__sig: 'i', +	godot_js_display_tts_available: function () { +		return 'speechSynthesis' in window; +	}, +  	godot_js_display_vk_cb__sig: 'vi',  	godot_js_display_vk_cb: function (p_input_cb) {  		const input_cb = GodotRuntime.get_func(p_input_cb); diff --git a/platform/linuxbsd/SCsub b/platform/linuxbsd/SCsub index cec8706fbc..09a432eae2 100644 --- a/platform/linuxbsd/SCsub +++ b/platform/linuxbsd/SCsub @@ -20,6 +20,9 @@ if "x11" in env and env["x11"]:          "key_mapping_x11.cpp",      ] +if "speechd" in env and env["speechd"]: +    common_linuxbsd.append(["speechd-so_wrap.c", "tts_linux.cpp"]) +  if "vulkan" in env and env["vulkan"]:      common_linuxbsd.append("vulkan_context_x11.cpp") diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index 2fba58fc53..f8d69b4ca9 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -75,6 +75,7 @@ def get_opts():          BoolVariable("use_msan", "Use LLVM compiler memory sanitizer (MSAN)", False),          BoolVariable("pulseaudio", "Detect and use PulseAudio", True),          BoolVariable("dbus", "Detect and use D-Bus to handle screensaver", True), +        BoolVariable("speechd", "Detect and use Speech Dispatcher for Text-to-Speech support", True),          BoolVariable("udev", "Use udev for gamepad connection callbacks", True),          BoolVariable("x11", "Enable X11 display", True),          BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True), @@ -337,6 +338,13 @@ def configure(env):          else:              print("Warning: D-Bus development libraries not found. Disabling screensaver prevention.") +    if env["speechd"]: +        if os.system("pkg-config --exists speech-dispatcher") == 0:  # 0 means found +            env.Append(CPPDEFINES=["SPEECHD_ENABLED"]) +            env.ParseConfig("pkg-config speech-dispatcher --cflags")  # Only cflags, we dlopen the library. +        else: +            print("Warning: Speech Dispatcher development libraries not found. Disabling Text-to-Speech support.") +      if platform.system() == "Linux":          env.Append(CPPDEFINES=["JOYDEV_ENABLED"])          if env["udev"]: diff --git a/platform/linuxbsd/display_server_x11.cpp b/platform/linuxbsd/display_server_x11.cpp index a36fcabd91..027f8562eb 100644 --- a/platform/linuxbsd/display_server_x11.cpp +++ b/platform/linuxbsd/display_server_x11.cpp @@ -139,6 +139,7 @@ bool DisplayServerX11::has_feature(Feature p_feature) const {  		case FEATURE_KEEP_SCREEN_ON:  #endif  		case FEATURE_CLIPBOARD_PRIMARY: +		case FEATURE_TEXT_TO_SPEECH:  			return true;  		default: {  		} @@ -307,6 +308,45 @@ void DisplayServerX11::_flush_mouse_motion() {  	xi.relative_motion.y = 0;  } +#ifdef SPEECHD_ENABLED + +bool DisplayServerX11::tts_is_speaking() const { +	ERR_FAIL_COND_V(!tts, false); +	return tts->is_speaking(); +} + +bool DisplayServerX11::tts_is_paused() const { +	ERR_FAIL_COND_V(!tts, false); +	return tts->is_paused(); +} + +Array DisplayServerX11::tts_get_voices() const { +	ERR_FAIL_COND_V(!tts, Array()); +	return tts->get_voices(); +} + +void DisplayServerX11::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	ERR_FAIL_COND(!tts); +	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerX11::tts_pause() { +	ERR_FAIL_COND(!tts); +	tts->pause(); +} + +void DisplayServerX11::tts_resume() { +	ERR_FAIL_COND(!tts); +	tts->resume(); +} + +void DisplayServerX11::tts_stop() { +	ERR_FAIL_COND(!tts); +	tts->stop(); +} + +#endif +  void DisplayServerX11::mouse_set_mode(MouseMode p_mode) {  	_THREAD_SAFE_METHOD_ @@ -4633,6 +4673,11 @@ DisplayServerX11::DisplayServerX11(const String &p_rendering_driver, WindowMode  	xdnd_finished = XInternAtom(x11_display, "XdndFinished", False);  	xdnd_selection = XInternAtom(x11_display, "XdndSelection", False); +#ifdef SPEECHD_ENABLED +	// Init TTS +	tts = memnew(TTS_Linux); +#endif +  	//!!!!!!!!!!!!!!!!!!!!!!!!!!  	//TODO - do Vulkan and OpenGL support checks, driver selection and fallback  	rendering_driver = p_rendering_driver; @@ -4985,6 +5030,10 @@ DisplayServerX11::~DisplayServerX11() {  		memfree(xmbstring);  	} +#ifdef SPEECHD_ENABLED +	memdelete(tts); +#endif +  #ifdef DBUS_ENABLED  	memdelete(screensaver);  #endif diff --git a/platform/linuxbsd/display_server_x11.h b/platform/linuxbsd/display_server_x11.h index cd673d94d9..3d49886b94 100644 --- a/platform/linuxbsd/display_server_x11.h +++ b/platform/linuxbsd/display_server_x11.h @@ -46,6 +46,10 @@  #include "servers/rendering/renderer_compositor.h"  #include "servers/rendering_server.h" +#if defined(SPEECHD_ENABLED) +#include "tts_linux.h" +#endif +  #if defined(GLES3_ENABLED)  #include "gl_manager_x11.h"  #endif @@ -112,6 +116,10 @@ class DisplayServerX11 : public DisplayServer {  	bool keep_screen_on = false;  #endif +#ifdef SPEECHD_ENABLED +	TTS_Linux *tts = nullptr; +#endif +  	struct WindowData {  		Window x11_window;  		::XIC xic; @@ -298,6 +306,17 @@ public:  	virtual bool has_feature(Feature p_feature) const override;  	virtual String get_name() const override; +#ifdef SPEECHD_ENABLED +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +#endif +  	virtual void mouse_set_mode(MouseMode p_mode) override;  	virtual MouseMode mouse_get_mode() const override; diff --git a/platform/linuxbsd/speechd-so_wrap.c b/platform/linuxbsd/speechd-so_wrap.c new file mode 100644 index 0000000000..749474e181 --- /dev/null +++ b/platform/linuxbsd/speechd-so_wrap.c @@ -0,0 +1,881 @@ +// This file is generated. Do not edit! +// see https://github.com/hpvb/dynload-wrapper for details +// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21 +// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c +// +#include <stdint.h> + +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd +#define spd_open spd_open_dylibloader_orig_speechd +#define spd_open2 spd_open2_dylibloader_orig_speechd +#define spd_close spd_close_dylibloader_orig_speechd +#define spd_say spd_say_dylibloader_orig_speechd +#define spd_sayf spd_sayf_dylibloader_orig_speechd +#define spd_stop spd_stop_dylibloader_orig_speechd +#define spd_stop_all spd_stop_all_dylibloader_orig_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd +#define spd_cancel spd_cancel_dylibloader_orig_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd +#define spd_pause spd_pause_dylibloader_orig_speechd +#define spd_pause_all spd_pause_all_dylibloader_orig_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd +#define spd_resume spd_resume_dylibloader_orig_speechd +#define spd_resume_all spd_resume_all_dylibloader_orig_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd +#define spd_key spd_key_dylibloader_orig_speechd +#define spd_char spd_char_dylibloader_orig_speechd +#define spd_wchar spd_wchar_dylibloader_orig_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd +#define spd_set_notification spd_set_notification_dylibloader_orig_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd +#define spd_set_volume spd_set_volume_dylibloader_orig_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd +#define spd_get_volume spd_get_volume_dylibloader_orig_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd +#define spd_set_language spd_set_language_dylibloader_orig_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd +#define spd_get_language spd_get_language_dylibloader_orig_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd +#define spd_list_modules spd_list_modules_dylibloader_orig_speechd +#define free_spd_modules free_spd_modules_dylibloader_orig_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd +#define spd_list_voices spd_list_voices_dylibloader_orig_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd +#define free_spd_voices free_spd_voices_dylibloader_orig_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd +#define spd_execute_command spd_execute_command_dylibloader_orig_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd +#define spd_send_data spd_send_data_dylibloader_orig_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd +#include <libspeechd.h> +#undef SPDConnectionAddress__free +#undef spd_get_default_address +#undef spd_open +#undef spd_open2 +#undef spd_close +#undef spd_say +#undef spd_sayf +#undef spd_stop +#undef spd_stop_all +#undef spd_stop_uid +#undef spd_cancel +#undef spd_cancel_all +#undef spd_cancel_uid +#undef spd_pause +#undef spd_pause_all +#undef spd_pause_uid +#undef spd_resume +#undef spd_resume_all +#undef spd_resume_uid +#undef spd_key +#undef spd_char +#undef spd_wchar +#undef spd_sound_icon +#undef spd_set_voice_type +#undef spd_set_voice_type_all +#undef spd_set_voice_type_uid +#undef spd_get_voice_type +#undef spd_set_synthesis_voice +#undef spd_set_synthesis_voice_all +#undef spd_set_synthesis_voice_uid +#undef spd_set_data_mode +#undef spd_set_notification_on +#undef spd_set_notification_off +#undef spd_set_notification +#undef spd_set_voice_rate +#undef spd_set_voice_rate_all +#undef spd_set_voice_rate_uid +#undef spd_get_voice_rate +#undef spd_set_voice_pitch +#undef spd_set_voice_pitch_all +#undef spd_set_voice_pitch_uid +#undef spd_get_voice_pitch +#undef spd_set_voice_pitch_range +#undef spd_set_voice_pitch_range_all +#undef spd_set_voice_pitch_range_uid +#undef spd_set_volume +#undef spd_set_volume_all +#undef spd_set_volume_uid +#undef spd_get_volume +#undef spd_set_punctuation +#undef spd_set_punctuation_all +#undef spd_set_punctuation_uid +#undef spd_set_capital_letters +#undef spd_set_capital_letters_all +#undef spd_set_capital_letters_uid +#undef spd_set_spelling +#undef spd_set_spelling_all +#undef spd_set_spelling_uid +#undef spd_set_language +#undef spd_set_language_all +#undef spd_set_language_uid +#undef spd_get_language +#undef spd_set_output_module +#undef spd_set_output_module_all +#undef spd_set_output_module_uid +#undef spd_get_message_list_fd +#undef spd_list_modules +#undef free_spd_modules +#undef spd_get_output_module +#undef spd_list_voices +#undef spd_list_synthesis_voices +#undef free_spd_voices +#undef spd_execute_command_with_list_reply +#undef spd_execute_command +#undef spd_execute_command_with_reply +#undef spd_execute_command_wo_mutex +#undef spd_send_data +#undef spd_send_data_wo_mutex +#include <dlfcn.h> +#include <stdio.h> +void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*); +SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**); +SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode); +SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**); +void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...); +int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t); +int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int); +SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode); +int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*); +int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int); +int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int); +int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int); +int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*); +int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**); +char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*); +void (*free_spd_modules_dylibloader_wrapper_speechd)( char**); +char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*); +char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*); +SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*); +void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**); +char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*); +int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*); +int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**); +int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*); +char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +int initialize_speechd(int verbose) { +  void *handle; +  char *error; +  handle = dlopen("libspeechd.so.2", RTLD_LAZY); +  if (!handle) { +    if (verbose) { +      fprintf(stderr, "%s\n", dlerror()); +    } +    return(1); +  } +  dlerror(); +// SPDConnectionAddress__free +  *(void **) (&SPDConnectionAddress__free_dylibloader_wrapper_speechd) = dlsym(handle, "SPDConnectionAddress__free"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_default_address +  *(void **) (&spd_get_default_address_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_default_address"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_open +  *(void **) (&spd_open_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_open2 +  *(void **) (&spd_open2_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open2"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_close +  *(void **) (&spd_close_dylibloader_wrapper_speechd) = dlsym(handle, "spd_close"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_say +  *(void **) (&spd_say_dylibloader_wrapper_speechd) = dlsym(handle, "spd_say"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_sayf +  *(void **) (&spd_sayf_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sayf"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_stop +  *(void **) (&spd_stop_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_stop_all +  *(void **) (&spd_stop_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_stop_uid +  *(void **) (&spd_stop_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_cancel +  *(void **) (&spd_cancel_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_cancel_all +  *(void **) (&spd_cancel_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_cancel_uid +  *(void **) (&spd_cancel_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_pause +  *(void **) (&spd_pause_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_pause_all +  *(void **) (&spd_pause_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_pause_uid +  *(void **) (&spd_pause_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_resume +  *(void **) (&spd_resume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_resume_all +  *(void **) (&spd_resume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_resume_uid +  *(void **) (&spd_resume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_key +  *(void **) (&spd_key_dylibloader_wrapper_speechd) = dlsym(handle, "spd_key"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_char +  *(void **) (&spd_char_dylibloader_wrapper_speechd) = dlsym(handle, "spd_char"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_wchar +  *(void **) (&spd_wchar_dylibloader_wrapper_speechd) = dlsym(handle, "spd_wchar"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_sound_icon +  *(void **) (&spd_sound_icon_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sound_icon"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_type +  *(void **) (&spd_set_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_type_all +  *(void **) (&spd_set_voice_type_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_type_uid +  *(void **) (&spd_set_voice_type_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_voice_type +  *(void **) (&spd_get_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_type"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_synthesis_voice +  *(void **) (&spd_set_synthesis_voice_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_synthesis_voice_all +  *(void **) (&spd_set_synthesis_voice_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_synthesis_voice_uid +  *(void **) (&spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_data_mode +  *(void **) (&spd_set_data_mode_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_data_mode"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_notification_on +  *(void **) (&spd_set_notification_on_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_on"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_notification_off +  *(void **) (&spd_set_notification_off_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_off"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_notification +  *(void **) (&spd_set_notification_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_rate +  *(void **) (&spd_set_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_rate_all +  *(void **) (&spd_set_voice_rate_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_rate_uid +  *(void **) (&spd_set_voice_rate_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_voice_rate +  *(void **) (&spd_get_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_rate"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch +  *(void **) (&spd_set_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch_all +  *(void **) (&spd_set_voice_pitch_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch_uid +  *(void **) (&spd_set_voice_pitch_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_voice_pitch +  *(void **) (&spd_get_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_pitch"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch_range +  *(void **) (&spd_set_voice_pitch_range_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch_range_all +  *(void **) (&spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_voice_pitch_range_uid +  *(void **) (&spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_volume +  *(void **) (&spd_set_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_volume_all +  *(void **) (&spd_set_volume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_volume_uid +  *(void **) (&spd_set_volume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_volume +  *(void **) (&spd_get_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_volume"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_punctuation +  *(void **) (&spd_set_punctuation_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_punctuation_all +  *(void **) (&spd_set_punctuation_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_punctuation_uid +  *(void **) (&spd_set_punctuation_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_capital_letters +  *(void **) (&spd_set_capital_letters_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_capital_letters_all +  *(void **) (&spd_set_capital_letters_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_capital_letters_uid +  *(void **) (&spd_set_capital_letters_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_spelling +  *(void **) (&spd_set_spelling_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_spelling_all +  *(void **) (&spd_set_spelling_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_spelling_uid +  *(void **) (&spd_set_spelling_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_language +  *(void **) (&spd_set_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_language_all +  *(void **) (&spd_set_language_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_language_uid +  *(void **) (&spd_set_language_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_language +  *(void **) (&spd_get_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_language"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_output_module +  *(void **) (&spd_set_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_output_module_all +  *(void **) (&spd_set_output_module_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_all"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_set_output_module_uid +  *(void **) (&spd_set_output_module_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_uid"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_message_list_fd +  *(void **) (&spd_get_message_list_fd_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_message_list_fd"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_list_modules +  *(void **) (&spd_list_modules_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_modules"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// free_spd_modules +  *(void **) (&free_spd_modules_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_modules"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_get_output_module +  *(void **) (&spd_get_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_output_module"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_list_voices +  *(void **) (&spd_list_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_voices"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_list_synthesis_voices +  *(void **) (&spd_list_synthesis_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_synthesis_voices"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// free_spd_voices +  *(void **) (&free_spd_voices_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_voices"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_execute_command_with_list_reply +  *(void **) (&spd_execute_command_with_list_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_list_reply"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_execute_command +  *(void **) (&spd_execute_command_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_execute_command_with_reply +  *(void **) (&spd_execute_command_with_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_reply"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_execute_command_wo_mutex +  *(void **) (&spd_execute_command_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_wo_mutex"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_send_data +  *(void **) (&spd_send_data_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +// spd_send_data_wo_mutex +  *(void **) (&spd_send_data_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data_wo_mutex"); +  if (verbose) { +    error = dlerror(); +    if (error != NULL) { +      fprintf(stderr, "%s\n", error); +    } +  } +return 0; +} diff --git a/platform/linuxbsd/speechd-so_wrap.h b/platform/linuxbsd/speechd-so_wrap.h new file mode 100644 index 0000000000..8e1c053348 --- /dev/null +++ b/platform/linuxbsd/speechd-so_wrap.h @@ -0,0 +1,330 @@ +#ifndef DYLIBLOAD_WRAPPER_SPEECHD +#define DYLIBLOAD_WRAPPER_SPEECHD +// This file is generated. Do not edit! +// see https://github.com/hpvb/dynload-wrapper for details +// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21 +// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c +// +#include <stdint.h> + +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd +#define spd_open spd_open_dylibloader_orig_speechd +#define spd_open2 spd_open2_dylibloader_orig_speechd +#define spd_close spd_close_dylibloader_orig_speechd +#define spd_say spd_say_dylibloader_orig_speechd +#define spd_sayf spd_sayf_dylibloader_orig_speechd +#define spd_stop spd_stop_dylibloader_orig_speechd +#define spd_stop_all spd_stop_all_dylibloader_orig_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd +#define spd_cancel spd_cancel_dylibloader_orig_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd +#define spd_pause spd_pause_dylibloader_orig_speechd +#define spd_pause_all spd_pause_all_dylibloader_orig_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd +#define spd_resume spd_resume_dylibloader_orig_speechd +#define spd_resume_all spd_resume_all_dylibloader_orig_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd +#define spd_key spd_key_dylibloader_orig_speechd +#define spd_char spd_char_dylibloader_orig_speechd +#define spd_wchar spd_wchar_dylibloader_orig_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd +#define spd_set_notification spd_set_notification_dylibloader_orig_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd +#define spd_set_volume spd_set_volume_dylibloader_orig_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd +#define spd_get_volume spd_get_volume_dylibloader_orig_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd +#define spd_set_language spd_set_language_dylibloader_orig_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd +#define spd_get_language spd_get_language_dylibloader_orig_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd +#define spd_list_modules spd_list_modules_dylibloader_orig_speechd +#define free_spd_modules free_spd_modules_dylibloader_orig_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd +#define spd_list_voices spd_list_voices_dylibloader_orig_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd +#define free_spd_voices free_spd_voices_dylibloader_orig_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd +#define spd_execute_command spd_execute_command_dylibloader_orig_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd +#define spd_send_data spd_send_data_dylibloader_orig_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd +#include <libspeechd.h> +#undef SPDConnectionAddress__free +#undef spd_get_default_address +#undef spd_open +#undef spd_open2 +#undef spd_close +#undef spd_say +#undef spd_sayf +#undef spd_stop +#undef spd_stop_all +#undef spd_stop_uid +#undef spd_cancel +#undef spd_cancel_all +#undef spd_cancel_uid +#undef spd_pause +#undef spd_pause_all +#undef spd_pause_uid +#undef spd_resume +#undef spd_resume_all +#undef spd_resume_uid +#undef spd_key +#undef spd_char +#undef spd_wchar +#undef spd_sound_icon +#undef spd_set_voice_type +#undef spd_set_voice_type_all +#undef spd_set_voice_type_uid +#undef spd_get_voice_type +#undef spd_set_synthesis_voice +#undef spd_set_synthesis_voice_all +#undef spd_set_synthesis_voice_uid +#undef spd_set_data_mode +#undef spd_set_notification_on +#undef spd_set_notification_off +#undef spd_set_notification +#undef spd_set_voice_rate +#undef spd_set_voice_rate_all +#undef spd_set_voice_rate_uid +#undef spd_get_voice_rate +#undef spd_set_voice_pitch +#undef spd_set_voice_pitch_all +#undef spd_set_voice_pitch_uid +#undef spd_get_voice_pitch +#undef spd_set_voice_pitch_range +#undef spd_set_voice_pitch_range_all +#undef spd_set_voice_pitch_range_uid +#undef spd_set_volume +#undef spd_set_volume_all +#undef spd_set_volume_uid +#undef spd_get_volume +#undef spd_set_punctuation +#undef spd_set_punctuation_all +#undef spd_set_punctuation_uid +#undef spd_set_capital_letters +#undef spd_set_capital_letters_all +#undef spd_set_capital_letters_uid +#undef spd_set_spelling +#undef spd_set_spelling_all +#undef spd_set_spelling_uid +#undef spd_set_language +#undef spd_set_language_all +#undef spd_set_language_uid +#undef spd_get_language +#undef spd_set_output_module +#undef spd_set_output_module_all +#undef spd_set_output_module_uid +#undef spd_get_message_list_fd +#undef spd_list_modules +#undef free_spd_modules +#undef spd_get_output_module +#undef spd_list_voices +#undef spd_list_synthesis_voices +#undef free_spd_voices +#undef spd_execute_command_with_list_reply +#undef spd_execute_command +#undef spd_execute_command_with_reply +#undef spd_execute_command_wo_mutex +#undef spd_send_data +#undef spd_send_data_wo_mutex +#ifdef __cplusplus +extern "C" { +#endif +#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_wrapper_speechd +#define spd_get_default_address spd_get_default_address_dylibloader_wrapper_speechd +#define spd_open spd_open_dylibloader_wrapper_speechd +#define spd_open2 spd_open2_dylibloader_wrapper_speechd +#define spd_close spd_close_dylibloader_wrapper_speechd +#define spd_say spd_say_dylibloader_wrapper_speechd +#define spd_sayf spd_sayf_dylibloader_wrapper_speechd +#define spd_stop spd_stop_dylibloader_wrapper_speechd +#define spd_stop_all spd_stop_all_dylibloader_wrapper_speechd +#define spd_stop_uid spd_stop_uid_dylibloader_wrapper_speechd +#define spd_cancel spd_cancel_dylibloader_wrapper_speechd +#define spd_cancel_all spd_cancel_all_dylibloader_wrapper_speechd +#define spd_cancel_uid spd_cancel_uid_dylibloader_wrapper_speechd +#define spd_pause spd_pause_dylibloader_wrapper_speechd +#define spd_pause_all spd_pause_all_dylibloader_wrapper_speechd +#define spd_pause_uid spd_pause_uid_dylibloader_wrapper_speechd +#define spd_resume spd_resume_dylibloader_wrapper_speechd +#define spd_resume_all spd_resume_all_dylibloader_wrapper_speechd +#define spd_resume_uid spd_resume_uid_dylibloader_wrapper_speechd +#define spd_key spd_key_dylibloader_wrapper_speechd +#define spd_char spd_char_dylibloader_wrapper_speechd +#define spd_wchar spd_wchar_dylibloader_wrapper_speechd +#define spd_sound_icon spd_sound_icon_dylibloader_wrapper_speechd +#define spd_set_voice_type spd_set_voice_type_dylibloader_wrapper_speechd +#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_wrapper_speechd +#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_wrapper_speechd +#define spd_get_voice_type spd_get_voice_type_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_wrapper_speechd +#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd +#define spd_set_data_mode spd_set_data_mode_dylibloader_wrapper_speechd +#define spd_set_notification_on spd_set_notification_on_dylibloader_wrapper_speechd +#define spd_set_notification_off spd_set_notification_off_dylibloader_wrapper_speechd +#define spd_set_notification spd_set_notification_dylibloader_wrapper_speechd +#define spd_set_voice_rate spd_set_voice_rate_dylibloader_wrapper_speechd +#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_wrapper_speechd +#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_wrapper_speechd +#define spd_get_voice_rate spd_get_voice_rate_dylibloader_wrapper_speechd +#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_wrapper_speechd +#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd +#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd +#define spd_set_volume spd_set_volume_dylibloader_wrapper_speechd +#define spd_set_volume_all spd_set_volume_all_dylibloader_wrapper_speechd +#define spd_set_volume_uid spd_set_volume_uid_dylibloader_wrapper_speechd +#define spd_get_volume spd_get_volume_dylibloader_wrapper_speechd +#define spd_set_punctuation spd_set_punctuation_dylibloader_wrapper_speechd +#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_wrapper_speechd +#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_wrapper_speechd +#define spd_set_capital_letters spd_set_capital_letters_dylibloader_wrapper_speechd +#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_wrapper_speechd +#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_wrapper_speechd +#define spd_set_spelling spd_set_spelling_dylibloader_wrapper_speechd +#define spd_set_spelling_all spd_set_spelling_all_dylibloader_wrapper_speechd +#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_wrapper_speechd +#define spd_set_language spd_set_language_dylibloader_wrapper_speechd +#define spd_set_language_all spd_set_language_all_dylibloader_wrapper_speechd +#define spd_set_language_uid spd_set_language_uid_dylibloader_wrapper_speechd +#define spd_get_language spd_get_language_dylibloader_wrapper_speechd +#define spd_set_output_module spd_set_output_module_dylibloader_wrapper_speechd +#define spd_set_output_module_all spd_set_output_module_all_dylibloader_wrapper_speechd +#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_wrapper_speechd +#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_wrapper_speechd +#define spd_list_modules spd_list_modules_dylibloader_wrapper_speechd +#define free_spd_modules free_spd_modules_dylibloader_wrapper_speechd +#define spd_get_output_module spd_get_output_module_dylibloader_wrapper_speechd +#define spd_list_voices spd_list_voices_dylibloader_wrapper_speechd +#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_wrapper_speechd +#define free_spd_voices free_spd_voices_dylibloader_wrapper_speechd +#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_wrapper_speechd +#define spd_execute_command spd_execute_command_dylibloader_wrapper_speechd +#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_wrapper_speechd +#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_wrapper_speechd +#define spd_send_data spd_send_data_dylibloader_wrapper_speechd +#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_wrapper_speechd +extern void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*); +extern SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**); +extern SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode); +extern SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**); +extern void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...); +extern int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int); +extern int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t); +extern int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*); +extern int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +extern int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType); +extern int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int); +extern SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode); +extern int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +extern int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification); +extern int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*); +extern int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int); +extern int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int); +extern int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +extern int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation); +extern int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int); +extern int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +extern int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters); +extern int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int); +extern int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +extern int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling); +extern int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int); +extern int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*); +extern int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*); +extern int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int); +extern int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**); +extern char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*); +extern void (*free_spd_modules_dylibloader_wrapper_speechd)( char**); +extern char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*); +extern char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*); +extern SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*); +extern void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**); +extern char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**); +extern int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*); +extern char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +extern char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int); +int initialize_speechd(int verbose); +#ifdef __cplusplus +} +#endif +#endif diff --git a/platform/linuxbsd/tts_linux.cpp b/platform/linuxbsd/tts_linux.cpp new file mode 100644 index 0000000000..aea1183d3d --- /dev/null +++ b/platform/linuxbsd/tts_linux.cpp @@ -0,0 +1,261 @@ +/*************************************************************************/ +/*  tts_linux.cpp                                                        */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#include "tts_linux.h" + +#include "core/config/project_settings.h" +#include "servers/text_server.h" + +TTS_Linux *TTS_Linux::singleton = nullptr; + +void TTS_Linux::speech_init_thread_func(void *p_userdata) { +	TTS_Linux *tts = (TTS_Linux *)p_userdata; +	if (tts) { +		MutexLock thread_safe_method(tts->_thread_safe_); +#ifdef DEBUG_ENABLED +		int dylibloader_verbose = 1; +#else +		int dylibloader_verbose = 0; +#endif +		if (initialize_speechd(dylibloader_verbose) == 0) { +			CharString class_str; +			String config_name = GLOBAL_GET("application/config/name"); +			if (config_name.length() == 0) { +				class_str = "Godot_Engine"; +			} else { +				class_str = config_name.utf8(); +			} +			tts->synth = spd_open(class_str, "Godot_Engine_Speech_API", "Godot_Engine", SPD_MODE_THREADED); +			if (tts->synth) { +				tts->synth->callback_end = &speech_event_callback; +				tts->synth->callback_cancel = &speech_event_callback; +				tts->synth->callback_im = &speech_event_index_mark; +				spd_set_notification_on(tts->synth, SPD_END); +				spd_set_notification_on(tts->synth, SPD_CANCEL); + +				print_verbose("Text-to-Speech: Speech Dispatcher initialized."); +			} else { +				print_verbose("Text-to-Speech: Cannot initialize Speech Dispatcher synthesizer!"); +			} +		} else { +			print_verbose("Text-to-Speech: Cannot load Speech Dispatcher library!"); +		} +	} +} + +void TTS_Linux::speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark) { +	TTS_Linux *tts = TTS_Linux::get_singleton(); +	if (tts && tts->ids.has(p_msg_id)) { +		MutexLock thread_safe_method(tts->_thread_safe_); +		// Get word offset from the index mark injected to the text stream. +		String mark = String::utf8(p_index_mark); +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[p_msg_id], mark.to_int()); +	} +} + +void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type) { +	TTS_Linux *tts = TTS_Linux::get_singleton(); +	if (tts) { +		MutexLock thread_safe_method(tts->_thread_safe_); +		List<DisplayServer::TTSUtterance> &queue = tts->queue; +		if (!tts->paused && tts->ids.has(p_msg_id)) { +			if (p_type == SPD_EVENT_END) { +				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[p_msg_id]); +				tts->ids.erase(p_msg_id); +				tts->last_msg_id = -1; +				tts->speaking = false; +			} else if (p_type == SPD_EVENT_CANCEL) { +				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, tts->ids[p_msg_id]); +				tts->ids.erase(p_msg_id); +				tts->last_msg_id = -1; +				tts->speaking = false; +			} +		} +		if (!tts->speaking && queue.size() > 0) { +			DisplayServer::TTSUtterance &message = queue.front()->get(); + +			// Inject index mark after each word. +			String text; +			String language; +			SPDVoice **voices = spd_list_synthesis_voices(tts->synth); +			if (voices != nullptr) { +				SPDVoice **voices_ptr = voices; +				while (*voices_ptr != nullptr) { +					if (String::utf8((*voices_ptr)->name) == message.voice) { +						language = String::utf8((*voices_ptr)->language); +						break; +					} +					voices_ptr++; +				} +				free_spd_voices(voices); +			} +			PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language); +			int prev = 0; +			for (int i = 0; i < breaks.size(); i++) { +				text += message.text.substr(prev, breaks[i] - prev); +				text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>"; +				prev = breaks[i]; +			} +			text += message.text.substr(prev, -1); + +			spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data()); +			spd_set_volume(tts->synth, message.volume * 2 - 100); +			spd_set_voice_pitch(tts->synth, (message.pitch - 1) * 100); +			float rate = 0; +			if (message.rate > 1.f) { +				rate = log10(MIN(message.rate, 2.5f)) / log10(2.5f) * 100; +			} else if (message.rate < 1.f) { +				rate = log10(MAX(message.rate, 0.5f)) / log10(0.5f) * -100; +			} +			spd_set_voice_rate(tts->synth, rate); +			spd_set_data_mode(tts->synth, SPD_DATA_SSML); +			tts->last_msg_id = spd_say(tts->synth, SPD_TEXT, text.utf8().get_data()); +			tts->ids[tts->last_msg_id] = message.id; +			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); + +			queue.pop_front(); +			tts->speaking = true; +		} +	} +} + +bool TTS_Linux::is_speaking() const { +	return speaking; +} + +bool TTS_Linux::is_paused() const { +	return paused; +} + +Array TTS_Linux::get_voices() const { +	_THREAD_SAFE_METHOD_ + +	ERR_FAIL_COND_V(!synth, Array()); +	Array list; +	SPDVoice **voices = spd_list_synthesis_voices(synth); +	if (voices != nullptr) { +		SPDVoice **voices_ptr = voices; +		while (*voices_ptr != nullptr) { +			Dictionary voice_d; +			voice_d["name"] = String::utf8((*voices_ptr)->name); +			voice_d["id"] = String::utf8((*voices_ptr)->name); +			voice_d["language"] = String::utf8((*voices_ptr)->language) + "_" + String::utf8((*voices_ptr)->variant); +			list.push_back(voice_d); + +			voices_ptr++; +		} +		free_spd_voices(voices); +	} +	return list; +} + +void TTS_Linux::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	_THREAD_SAFE_METHOD_ + +	ERR_FAIL_COND(!synth); +	if (p_interrupt) { +		stop(); +	} + +	if (p_text.is_empty()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); +		return; +	} + +	DisplayServer::TTSUtterance message; +	message.text = p_text; +	message.voice = p_voice; +	message.volume = CLAMP(p_volume, 0, 100); +	message.pitch = CLAMP(p_pitch, 0.f, 2.f); +	message.rate = CLAMP(p_rate, 0.1f, 10.f); +	message.id = p_utterance_id; +	queue.push_back(message); + +	if (is_paused()) { +		resume(); +	} else { +		speech_event_callback(0, 0, SPD_EVENT_BEGIN); +	} +} + +void TTS_Linux::pause() { +	_THREAD_SAFE_METHOD_ + +	ERR_FAIL_COND(!synth); +	if (spd_pause(synth) == 0) { +		paused = true; +	} +} + +void TTS_Linux::resume() { +	_THREAD_SAFE_METHOD_ + +	ERR_FAIL_COND(!synth); +	spd_resume(synth); +	paused = false; +} + +void TTS_Linux::stop() { +	_THREAD_SAFE_METHOD_ + +	ERR_FAIL_COND(!synth); +	for (DisplayServer::TTSUtterance &message : queue) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); +	} +	if ((last_msg_id != -1) && ids.has(last_msg_id)) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[last_msg_id]); +	} +	queue.clear(); +	ids.clear(); +	last_msg_id = -1; +	spd_cancel(synth); +	spd_resume(synth); +	speaking = false; +	paused = false; +} + +TTS_Linux *TTS_Linux::get_singleton() { +	return singleton; +} + +TTS_Linux::TTS_Linux() { +	singleton = this; +	// Speech Dispatcher init can be slow, it might wait for helper process to start on background, so run it in the thread. +	init_thread.start(speech_init_thread_func, this); +} + +TTS_Linux::~TTS_Linux() { +	init_thread.wait_to_finish(); +	if (synth) { +		spd_close(synth); +	} + +	singleton = nullptr; +} diff --git a/platform/linuxbsd/tts_linux.h b/platform/linuxbsd/tts_linux.h new file mode 100644 index 0000000000..4d39af8970 --- /dev/null +++ b/platform/linuxbsd/tts_linux.h @@ -0,0 +1,78 @@ +/*************************************************************************/ +/*  tts_linux.h                                                          */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#ifndef TTS_LINUX_H +#define TTS_LINUX_H + +#include "core/os/thread.h" +#include "core/os/thread_safe.h" +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include "speechd-so_wrap.h" + +class TTS_Linux { +	_THREAD_SAFE_CLASS_ + +	List<DisplayServer::TTSUtterance> queue; +	SPDConnection *synth = nullptr; +	bool speaking = false; +	bool paused = false; +	int last_msg_id = -1; +	Map<int, int> ids; + +	Thread init_thread; + +	static void speech_init_thread_func(void *p_userdata); +	static void speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type); +	static void speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark); + +	static TTS_Linux *singleton; + +public: +	static TTS_Linux *get_singleton(); + +	bool is_speaking() const; +	bool is_paused() const; +	Array get_voices() const; + +	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); +	void pause(); +	void resume(); +	void stop(); + +	TTS_Linux(); +	~TTS_Linux(); +}; + +#endif // TTS_LINUX_H diff --git a/platform/osx/SCsub b/platform/osx/SCsub index d72a75af04..3a4c95613d 100644 --- a/platform/osx/SCsub +++ b/platform/osx/SCsub @@ -18,6 +18,7 @@ files = [      "key_mapping_osx.mm",      "godot_main_osx.mm",      "dir_access_osx.mm", +    "tts_osx.mm",      "joypad_osx.cpp",      "vulkan_context_osx.mm",      "gl_manager_osx_legacy.mm", diff --git a/platform/osx/display_server_osx.h b/platform/osx/display_server_osx.h index fa3091ff81..fcb3a62bec 100644 --- a/platform/osx/display_server_osx.h +++ b/platform/osx/display_server_osx.h @@ -137,6 +137,8 @@ private:  	Vector<KeyEvent> key_event_buffer;  	int key_event_pos = 0; +	id tts = nullptr; +  	Point2i im_selection;  	String im_text; @@ -264,6 +266,15 @@ public:  	virtual void global_menu_remove_item(const String &p_menu_root, int p_idx) override;  	virtual void global_menu_clear(const String &p_menu_root) override; +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +  	virtual Error dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) override;  	virtual Error dialog_input_text(String p_title, String p_description, String p_partial, const Callable &p_callback) override; diff --git a/platform/osx/display_server_osx.mm b/platform/osx/display_server_osx.mm index d209c90d87..17a44a3fbd 100644 --- a/platform/osx/display_server_osx.mm +++ b/platform/osx/display_server_osx.mm @@ -37,6 +37,8 @@  #include "key_mapping_osx.h"  #include "os_osx.h" +#include "tts_osx.h" +  #include "core/io/marshalls.h"  #include "core/math/geometry_2d.h"  #include "core/os/keyboard.h" @@ -702,6 +704,7 @@ bool DisplayServerOSX::has_feature(Feature p_feature) const {  		case FEATURE_NATIVE_ICON:  		//case FEATURE_KEEP_SCREEN_ON:  		case FEATURE_SWAP_BUFFERS: +		case FEATURE_TEXT_TO_SPEECH:  			return true;  		default: {  		} @@ -1458,6 +1461,41 @@ void DisplayServerOSX::global_menu_clear(const String &p_menu_root) {  	}  } +bool DisplayServerOSX::tts_is_speaking() const { +	ERR_FAIL_COND_V(!tts, false); +	return [tts isSpeaking]; +} + +bool DisplayServerOSX::tts_is_paused() const { +	ERR_FAIL_COND_V(!tts, false); +	return [tts isPaused]; +} + +Array DisplayServerOSX::tts_get_voices() const { +	ERR_FAIL_COND_V(!tts, Array()); +	return [tts getVoices]; +} + +void DisplayServerOSX::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	ERR_FAIL_COND(!tts); +	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt]; +} + +void DisplayServerOSX::tts_pause() { +	ERR_FAIL_COND(!tts); +	[tts pauseSpeaking]; +} + +void DisplayServerOSX::tts_resume() { +	ERR_FAIL_COND(!tts); +	[tts resumeSpeaking]; +} + +void DisplayServerOSX::tts_stop() { +	ERR_FAIL_COND(!tts); +	[tts stopSpeaking]; +} +  Error DisplayServerOSX::dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) {  	_THREAD_SAFE_METHOD_ @@ -3121,6 +3159,9 @@ DisplayServerOSX::DisplayServerOSX(const String &p_rendering_driver, WindowMode  	// Register to be notified on displays arrangement changes.  	CGDisplayRegisterReconfigurationCallback(_displays_arrangement_changed, nullptr); +	// Init TTS +	tts = [[TTS_OSX alloc] init]; +  	NSMenuItem *menu_item;  	NSString *title; diff --git a/platform/osx/tts_osx.h b/platform/osx/tts_osx.h new file mode 100644 index 0000000000..2cf6d21c18 --- /dev/null +++ b/platform/osx/tts_osx.h @@ -0,0 +1,66 @@ +/*************************************************************************/ +/*  tts_osx.h                                                            */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#ifndef TTS_OSX_H +#define TTS_OSX_H + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <AVFAudio/AVSpeechSynthesis.h> +#include <AppKit/AppKit.h> + +@interface TTS_OSX : NSObject <AVSpeechSynthesizerDelegate> { +	// AVSpeechSynthesizer +	bool speaking; +	Map<id, int> ids; + +	// NSSpeechSynthesizer +	bool paused; +	bool have_utterance; +	int last_utterance; + +	id synth; // NSSpeechSynthesizer or AVSpeechSynthesizer +	List<DisplayServer::TTSUtterance> queue; +} + +- (void)pauseSpeaking; +- (void)resumeSpeaking; +- (void)stopSpeaking; +- (bool)isSpeaking; +- (bool)isPaused; +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt; +- (Array)getVoices; +@end + +#endif // TTS_OSX_H diff --git a/platform/osx/tts_osx.mm b/platform/osx/tts_osx.mm new file mode 100644 index 0000000000..e6a5236cd9 --- /dev/null +++ b/platform/osx/tts_osx.mm @@ -0,0 +1,266 @@ +/*************************************************************************/ +/*  tts_osx.mm                                                           */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#include "tts_osx.h" + +@implementation TTS_OSX + +- (id)init { +	self = [super init]; +	self->speaking = false; +	self->have_utterance = false; +	self->last_utterance = -1; +	self->paused = false; +	if (@available(macOS 10.14, *)) { +		self->synth = [[AVSpeechSynthesizer alloc] init]; +		[self->synth setDelegate:self]; +		print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized."); +	} else { +		self->synth = [[NSSpeechSynthesizer alloc] init]; +		[self->synth setDelegate:self]; +		print_verbose("Text-to-Speech: NSSpeechSynthesizer initialized."); +	} +	return self; +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { +	NSString *string = [utterance speechString]; + +	// Convert from UTF-16 to UTF-32 position. +	int pos = 0; +	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { +		unichar c = [string characterAtIndex:i]; +		if ((c & 0xfffffc00) == 0xd800) { +			i++; +		} +		pos++; +	} + +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos); +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]); +	ids.erase(utterance); +	speaking = false; +	[self update]; +} + +// AVSpeechSynthesizer callback (macOS 10.14+) + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) { +	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]); +	ids.erase(utterance); +	speaking = false; +	[self update]; +} + +// NSSpeechSynthesizer callback (macOS 10.4+) + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth willSpeakWord:(NSRange)characterRange ofString:(NSString *)string { +	if (!paused && have_utterance) { +		// Convert from UTF-16 to UTF-32 position. +		int pos = 0; +		for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { +			unichar c = [string characterAtIndex:i]; +			if ((c & 0xfffffc00) == 0xd800) { +				i++; +			} +			pos++; +		} + +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, last_utterance, pos); +	} +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth didFinishSpeaking:(BOOL)success { +	if (!paused && have_utterance) { +		if (success) { +			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, last_utterance); +		} else { +			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance); +		} +		have_utterance = false; +	} +	speaking = false; +	[self update]; +} + +- (void)update { +	if (!speaking && queue.size() > 0) { +		DisplayServer::TTSUtterance &message = queue.front()->get(); + +		if (@available(macOS 10.14, *)) { +			AVSpeechSynthesizer *av_synth = synth; +			AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; +			[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]]; +			if (message.rate > 1.f) { +				[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)]; +			} else if (message.rate < 1.f) { +				[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)]; +			} +			[new_utterance setPitchMultiplier:message.pitch]; +			[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + +			ids[new_utterance] = message.id; +			[av_synth speakUtterance:new_utterance]; +		} else { +			NSSpeechSynthesizer *ns_synth = synth; +			[ns_synth setObject:nil forProperty:NSSpeechResetProperty error:nil]; +			[ns_synth setVoice:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]; +			int base_pitch = [[ns_synth objectForProperty:NSSpeechPitchBaseProperty error:nil] intValue]; +			[ns_synth setObject:[NSNumber numberWithInt:(base_pitch * (message.pitch / 2.f + 0.5f))] forProperty:NSSpeechPitchBaseProperty error:nullptr]; +			[ns_synth setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; +			[ns_synth setRate:(message.rate * 200)]; + +			last_utterance = message.id; +			have_utterance = true; +			[ns_synth startSpeakingString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; +		} +		queue.pop_front(); + +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); +		speaking = true; +	} +} + +- (void)pauseSpeaking { +	if (@available(macOS 10.14, *)) { +		AVSpeechSynthesizer *av_synth = synth; +		[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +	} else { +		NSSpeechSynthesizer *ns_synth = synth; +		[ns_synth pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; +	} +	paused = true; +} + +- (void)resumeSpeaking { +	if (@available(macOS 10.14, *)) { +		AVSpeechSynthesizer *av_synth = synth; +		[av_synth continueSpeaking]; +	} else { +		NSSpeechSynthesizer *ns_synth = synth; +		[ns_synth continueSpeaking]; +	} +	paused = false; +} + +- (void)stopSpeaking { +	for (DisplayServer::TTSUtterance &message : queue) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); +	} +	queue.clear(); +	if (@available(macOS 10.14, *)) { +		AVSpeechSynthesizer *av_synth = synth; +		[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +	} else { +		NSSpeechSynthesizer *ns_synth = synth; +		if (have_utterance) { +			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance); +		} +		[ns_synth stopSpeaking]; +	} +	have_utterance = false; +	speaking = false; +	paused = false; +} + +- (bool)isSpeaking { +	return speaking || (queue.size() > 0); +} + +- (bool)isPaused { +	if (@available(macOS 10.14, *)) { +		AVSpeechSynthesizer *av_synth = synth; +		return [av_synth isPaused]; +	} else { +		return paused; +	} +} + +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt { +	if (interrupt) { +		[self stopSpeaking]; +	} + +	if (text.is_empty()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id); +		return; +	} + +	DisplayServer::TTSUtterance message; +	message.text = text; +	message.voice = voice; +	message.volume = CLAMP(volume, 0, 100); +	message.pitch = CLAMP(pitch, 0.f, 2.f); +	message.rate = CLAMP(rate, 0.1f, 10.f); +	message.id = utterance_id; +	queue.push_back(message); + +	if ([self isPaused]) { +		[self resumeSpeaking]; +	} else { +		[self update]; +	} +} + +- (Array)getVoices { +	Array list; +	if (@available(macOS 10.14, *)) { +		for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) { +			NSString *voiceIdentifierString = [voice identifier]; +			NSString *voiceLocaleIdentifier = [voice language]; +			NSString *voiceName = [voice name]; +			Dictionary voice_d; +			voice_d["name"] = String::utf8([voiceName UTF8String]); +			voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]); +			voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]); +			list.push_back(voice_d); +		} +	} else { +		for (NSString *voiceIdentifierString in [NSSpeechSynthesizer availableVoices]) { +			NSString *voiceLocaleIdentifier = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceLocaleIdentifier]; +			NSString *voiceName = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceName]; +			Dictionary voice_d; +			voice_d["name"] = String([voiceName UTF8String]); +			voice_d["id"] = String([voiceIdentifierString UTF8String]); +			voice_d["language"] = String([voiceLocaleIdentifier UTF8String]); +			list.push_back(voice_d); +		} +	} +	return list; +} + +@end diff --git a/platform/windows/SCsub b/platform/windows/SCsub index 76234c3065..7e412b140f 100644 --- a/platform/windows/SCsub +++ b/platform/windows/SCsub @@ -13,6 +13,7 @@ common_win = [      "display_server_windows.cpp",      "key_mapping_windows.cpp",      "joypad_windows.cpp", +    "tts_windows.cpp",      "windows_terminal_logger.cpp",      "vulkan_context_win.cpp",      "gl_manager_windows.cpp", diff --git a/platform/windows/detect.py b/platform/windows/detect.py index 249a0d2e79..0b18fb74fb 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -252,6 +252,7 @@ def configure_msvc(env, manual_msvc_config):          "kernel32",          "ole32",          "oleaut32", +        "sapi",          "user32",          "gdi32",          "IPHLPAPI", @@ -426,6 +427,7 @@ def configure_mingw(env):              "ws2_32",              "kernel32",              "oleaut32", +            "sapi",              "dinput8",              "dxguid",              "ksuser", diff --git a/platform/windows/display_server_windows.cpp b/platform/windows/display_server_windows.cpp index 31bad0f053..0412eb2d9c 100644 --- a/platform/windows/display_server_windows.cpp +++ b/platform/windows/display_server_windows.cpp @@ -84,6 +84,7 @@ bool DisplayServerWindows::has_feature(Feature p_feature) const {  		case FEATURE_NATIVE_ICON:  		case FEATURE_SWAP_BUFFERS:  		case FEATURE_KEEP_SCREEN_ON: +		case FEATURE_TEXT_TO_SPEECH:  			return true;  		default:  			return false; @@ -133,6 +134,41 @@ void DisplayServerWindows::_set_mouse_mode_impl(MouseMode p_mode) {  	}  } +bool DisplayServerWindows::tts_is_speaking() const { +	ERR_FAIL_COND_V(!tts, false); +	return tts->is_speaking(); +} + +bool DisplayServerWindows::tts_is_paused() const { +	ERR_FAIL_COND_V(!tts, false); +	return tts->is_paused(); +} + +Array DisplayServerWindows::tts_get_voices() const { +	ERR_FAIL_COND_V(!tts, Array()); +	return tts->get_voices(); +} + +void DisplayServerWindows::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	ERR_FAIL_COND(!tts); +	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt); +} + +void DisplayServerWindows::tts_pause() { +	ERR_FAIL_COND(!tts); +	tts->pause(); +} + +void DisplayServerWindows::tts_resume() { +	ERR_FAIL_COND(!tts); +	tts->resume(); +} + +void DisplayServerWindows::tts_stop() { +	ERR_FAIL_COND(!tts); +	tts->stop(); +} +  void DisplayServerWindows::mouse_set_mode(MouseMode p_mode) {  	_THREAD_SAFE_METHOD_ @@ -3497,6 +3533,9 @@ DisplayServerWindows::DisplayServerWindows(const String &p_rendering_driver, Win  	rendering_driver = p_rendering_driver; +	// Init TTS +	tts = memnew(TTS_Windows); +  	// Note: Wacom WinTab driver API for pen input, for devices incompatible with Windows Ink.  	HMODULE wintab_lib = LoadLibraryW(L"wintab32.dll");  	if (wintab_lib) { @@ -3739,4 +3778,8 @@ DisplayServerWindows::~DisplayServerWindows() {  		gl_manager = nullptr;  	}  #endif +	if (tts) { +		memdelete(tts); +	} +	CoUninitialize();  } diff --git a/platform/windows/display_server_windows.h b/platform/windows/display_server_windows.h index fcf4b5a728..80faf71bd4 100644 --- a/platform/windows/display_server_windows.h +++ b/platform/windows/display_server_windows.h @@ -46,6 +46,7 @@  #include "servers/rendering/renderer_compositor.h"  #include "servers/rendering/renderer_rd/renderer_compositor_rd.h"  #include "servers/rendering_server.h" +#include "tts_windows.h"  #ifdef XAUDIO2_ENABLED  #include "drivers/xaudio2/audio_driver_xaudio2.h" @@ -320,6 +321,8 @@ class DisplayServerWindows : public DisplayServer {  	String rendering_driver;  	bool app_focused = false; +	TTS_Windows *tts = nullptr; +  	struct WindowData {  		HWND hWnd;  		//layered window @@ -454,6 +457,15 @@ public:  	virtual bool has_feature(Feature p_feature) const override;  	virtual String get_name() const override; +	virtual bool tts_is_speaking() const override; +	virtual bool tts_is_paused() const override; +	virtual Array tts_get_voices() const override; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; +	virtual void tts_pause() override; +	virtual void tts_resume() override; +	virtual void tts_stop() override; +  	virtual void mouse_set_mode(MouseMode p_mode) override;  	virtual MouseMode mouse_get_mode() const override; diff --git a/platform/windows/tts_windows.cpp b/platform/windows/tts_windows.cpp new file mode 100644 index 0000000000..05249934ba --- /dev/null +++ b/platform/windows/tts_windows.cpp @@ -0,0 +1,269 @@ +/*************************************************************************/ +/*  tts_windows.cpp                                                      */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#include "tts_windows.h" + +TTS_Windows *TTS_Windows::singleton = nullptr; + +void __stdcall TTS_Windows::speech_event_callback(WPARAM wParam, LPARAM lParam) { +	TTS_Windows *tts = TTS_Windows::get_singleton(); +	SPEVENT event; +	while (tts->synth->GetEvents(1, &event, NULL) == S_OK) { +		if (tts->ids.has(event.ulStreamNum)) { +			if (event.eEventId == SPEI_START_INPUT_STREAM) { +				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, tts->ids[event.ulStreamNum].id); +			} else if (event.eEventId == SPEI_END_INPUT_STREAM) { +				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[event.ulStreamNum].id); +				tts->ids.erase(event.ulStreamNum); +				tts->_update_tts(); +			} else if (event.eEventId == SPEI_WORD_BOUNDARY) { +				const Char16String &string = tts->ids[event.ulStreamNum].string; +				int pos = 0; +				for (int i = 0; i < MIN(event.lParam, string.length()); i++) { +					char16_t c = string[i]; +					if ((c & 0xfffffc00) == 0xd800) { +						i++; +					} +					pos++; +				} +				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[event.ulStreamNum].id, pos - tts->ids[event.ulStreamNum].offset); +			} +		} +	} +} + +void TTS_Windows::_update_tts() { +	if (!is_speaking() && !paused && queue.size() > 0) { +		DisplayServer::TTSUtterance &message = queue.front()->get(); + +		String text; +		DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML; +		String pitch_tag = String("<pitch absmiddle=\"") + String::num_int64(message.pitch * 10 - 10, 10) + String("\">"); +		text = pitch_tag + message.text + String("</pitch>"); + +		IEnumSpObjectTokens *cpEnum; +		ISpObjectToken *cpVoiceToken; +		ULONG ulCount = 0; +		ULONG stream_number = 0; +		ISpObjectTokenCategory *cpCategory; +		HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); +		if (SUCCEEDED(hr)) { +			hr = cpCategory->SetId(SPCAT_VOICES, false); +			if (SUCCEEDED(hr)) { +				hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); +				if (SUCCEEDED(hr)) { +					hr = cpEnum->GetCount(&ulCount); +					while (SUCCEEDED(hr) && ulCount--) { +						wchar_t *w_id = 0L; +						hr = cpEnum->Next(1, &cpVoiceToken, nullptr); +						cpVoiceToken->GetId(&w_id); +						if (String::utf16((const char16_t *)w_id) == message.voice) { +							synth->SetVoice(cpVoiceToken); +							cpVoiceToken->Release(); +							break; +						} +						cpVoiceToken->Release(); +					} +					cpEnum->Release(); +				} +			} +			cpCategory->Release(); +		} + +		UTData ut; +		ut.string = text.utf16(); +		ut.offset = pitch_tag.length(); // Substract injected <pitch> tag offset. +		ut.id = message.id; + +		synth->SetVolume(message.volume); +		synth->SetRate(10.f * log10(message.rate) / log10(3.f)); +		synth->Speak((LPCWSTR)ut.string.get_data(), flags, &stream_number); + +		ids[stream_number] = ut; + +		queue.pop_front(); +	} +} + +bool TTS_Windows::is_speaking() const { +	ERR_FAIL_COND_V(!synth, false); + +	SPVOICESTATUS status; +	synth->GetStatus(&status, nullptr); +	return (status.dwRunningState == SPRS_IS_SPEAKING); +} + +bool TTS_Windows::is_paused() const { +	ERR_FAIL_COND_V(!synth, false); +	return paused; +} + +Array TTS_Windows::get_voices() const { +	Array list; +	IEnumSpObjectTokens *cpEnum; +	ISpObjectToken *cpVoiceToken; +	ISpDataKey *cpDataKeyAttribs; +	ULONG ulCount = 0; +	ISpObjectTokenCategory *cpCategory; +	HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory); +	if (SUCCEEDED(hr)) { +		hr = cpCategory->SetId(SPCAT_VOICES, false); +		if (SUCCEEDED(hr)) { +			hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum); +			if (SUCCEEDED(hr)) { +				hr = cpEnum->GetCount(&ulCount); +				while (SUCCEEDED(hr) && ulCount--) { +					hr = cpEnum->Next(1, &cpVoiceToken, nullptr); +					HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs); +					if (SUCCEEDED(hr_attr)) { +						wchar_t *w_id = nullptr; +						wchar_t *w_lang = nullptr; +						wchar_t *w_name = nullptr; +						cpVoiceToken->GetId(&w_id); +						cpDataKeyAttribs->GetStringValue(L"Language", &w_lang); +						cpDataKeyAttribs->GetStringValue(nullptr, &w_name); +						LCID locale = wcstol(w_lang, nullptr, 16); + +						int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0); +						int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0); +						wchar_t *w_lang_code = new wchar_t[locale_chars]; +						wchar_t *w_reg_code = new wchar_t[region_chars]; +						GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars); +						GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars); + +						Dictionary voice_d; +						voice_d["id"] = String::utf16((const char16_t *)w_id); +						if (w_name) { +							voice_d["name"] = String::utf16((const char16_t *)w_name); +						} else { +							voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", ""); +						} +						voice_d["language"] = String::utf16((const char16_t *)w_lang_code) + "_" + String::utf16((const char16_t *)w_reg_code); +						list.push_back(voice_d); + +						delete[] w_lang_code; +						delete[] w_reg_code; + +						cpDataKeyAttribs->Release(); +					} +					cpVoiceToken->Release(); +				} +				cpEnum->Release(); +			} +		} +		cpCategory->Release(); +	} +	return list; +} + +void TTS_Windows::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	ERR_FAIL_COND(!synth); +	if (p_interrupt) { +		stop(); +	} + +	if (p_text.is_empty()) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id); +		return; +	} + +	DisplayServer::TTSUtterance message; +	message.text = p_text; +	message.voice = p_voice; +	message.volume = CLAMP(p_volume, 0, 100); +	message.pitch = CLAMP(p_pitch, 0.f, 2.f); +	message.rate = CLAMP(p_rate, 0.1f, 10.f); +	message.id = p_utterance_id; +	queue.push_back(message); + +	if (is_paused()) { +		resume(); +	} else { +		_update_tts(); +	} +} + +void TTS_Windows::pause() { +	ERR_FAIL_COND(!synth); +	if (!paused) { +		if (synth->Pause() == S_OK) { +			paused = true; +		} +	} +} + +void TTS_Windows::resume() { +	ERR_FAIL_COND(!synth); +	synth->Resume(); +	paused = false; +} + +void TTS_Windows::stop() { +	ERR_FAIL_COND(!synth); + +	SPVOICESTATUS status; +	synth->GetStatus(&status, nullptr); +	if (ids.has(status.ulCurrentStream)) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[status.ulCurrentStream].id); +		ids.erase(status.ulCurrentStream); +	} +	for (DisplayServer::TTSUtterance &message : queue) { +		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); +	} +	queue.clear(); +	synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr); +	synth->Resume(); +	paused = false; +} + +TTS_Windows *TTS_Windows::get_singleton() { +	return singleton; +} + +TTS_Windows::TTS_Windows() { +	singleton = this; +	CoInitialize(nullptr); + +	if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) { +		ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY); +		synth->SetInterest(event_mask, event_mask); +		synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0); +		print_verbose("Text-to-Speech: SAPI initialized."); +	} else { +		print_verbose("Text-to-Speech: Cannot initialize ISpVoice!"); +	} +} + +TTS_Windows::~TTS_Windows() { +	if (synth) { +		synth->Release(); +	} +	singleton = nullptr; +} diff --git a/platform/windows/tts_windows.h b/platform/windows/tts_windows.h new file mode 100644 index 0000000000..5da404baf9 --- /dev/null +++ b/platform/windows/tts_windows.h @@ -0,0 +1,80 @@ +/*************************************************************************/ +/*  tts_windows.h                                                        */ +/*************************************************************************/ +/*                       This file is part of:                           */ +/*                           GODOT ENGINE                                */ +/*                      https://godotengine.org                          */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */ +/*                                                                       */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the       */ +/* "Software"), to deal in the Software without restriction, including   */ +/* without limitation the rights to use, copy, modify, merge, publish,   */ +/* distribute, sublicense, and/or sell copies of the Software, and to    */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions:                                             */ +/*                                                                       */ +/* The above copyright notice and this permission notice shall be        */ +/* included in all copies or substantial portions of the Software.       */ +/*                                                                       */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */ +/*************************************************************************/ + +#ifndef TTS_WINDOWS_H +#define TTS_WINDOWS_H + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +#include <objbase.h> +#include <sapi.h> +#include <wchar.h> +#include <winnls.h> + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +class TTS_Windows { +	List<DisplayServer::TTSUtterance> queue; +	ISpVoice *synth = nullptr; +	bool paused = false; +	struct UTData { +		Char16String string; +		int offset; +		int id; +	}; +	Map<ULONG, UTData> ids; + +	static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam); +	void _update_tts(); + +	static TTS_Windows *singleton; + +public: +	static TTS_Windows *get_singleton(); + +	bool is_speaking() const; +	bool is_paused() const; +	Array get_voices() const; + +	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); +	void pause(); +	void resume(); +	void stop(); + +	TTS_Windows(); +	~TTS_Windows(); +}; + +#endif // TTS_WINDOWS_H diff --git a/servers/display_server.cpp b/servers/display_server.cpp index 67bfc75426..8d97cd2543 100644 --- a/servers/display_server.cpp +++ b/servers/display_server.cpp @@ -220,6 +220,81 @@ void DisplayServer::global_menu_clear(const String &p_menu_root) {  	WARN_PRINT("Global menus not supported by this display server.");  } +bool DisplayServer::tts_is_speaking() const { +	WARN_PRINT("TTS is not supported by this display server."); +	return false; +} + +bool DisplayServer::tts_is_paused() const { +	WARN_PRINT("TTS is not supported by this display server."); +	return false; +} + +void DisplayServer::tts_pause() { +	WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_resume() { +	WARN_PRINT("TTS is not supported by this display server."); +} + +Array DisplayServer::tts_get_voices() const { +	WARN_PRINT("TTS is not supported by this display server."); +	return Array(); +} + +PackedStringArray DisplayServer::tts_get_voices_for_language(const String &p_language) const { +	PackedStringArray ret; +	Array voices = tts_get_voices(); +	for (int i = 0; i < voices.size(); i++) { +		const Dictionary &voice = voices[i]; +		if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) { +			ret.push_back(voice["id"]); +		} +	} +	return ret; +} + +void DisplayServer::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { +	WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_stop() { +	WARN_PRINT("TTS is not supported by this display server."); +} + +void DisplayServer::tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable) { +	ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); +	utterance_callback[p_event] = p_callable; +} + +void DisplayServer::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) { +	ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX); +	switch (p_event) { +		case DisplayServer::TTS_UTTERANCE_STARTED: +		case DisplayServer::TTS_UTTERANCE_ENDED: +		case DisplayServer::TTS_UTTERANCE_CANCELED: { +			if (utterance_callback[p_event].is_valid()) { +				Variant args[1]; +				args[0] = p_id; +				const Variant *argp[] = { &args[0] }; +				utterance_callback[p_event].call_deferred(argp, 1); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. +			} +		} break; +		case DisplayServer::TTS_UTTERANCE_BOUNDARY: { +			if (utterance_callback[p_event].is_valid()) { +				Variant args[2]; +				args[0] = p_pos; +				args[1] = p_id; +				const Variant *argp[] = { &args[0], &args[1] }; +				utterance_callback[p_event].call_deferred(argp, 2); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession. +			} +		} break; +		default: +			break; +	} +} +  void DisplayServer::mouse_set_mode(MouseMode p_mode) {  	WARN_PRINT("Mouse is not supported by this display server.");  } @@ -478,6 +553,19 @@ void DisplayServer::_bind_methods() {  	ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu_root", "idx"), &DisplayServer::global_menu_remove_item);  	ClassDB::bind_method(D_METHOD("global_menu_clear", "menu_root"), &DisplayServer::global_menu_clear); +	ClassDB::bind_method(D_METHOD("tts_is_speaking"), &DisplayServer::tts_is_speaking); +	ClassDB::bind_method(D_METHOD("tts_is_paused"), &DisplayServer::tts_is_paused); +	ClassDB::bind_method(D_METHOD("tts_get_voices"), &DisplayServer::tts_get_voices); +	ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &DisplayServer::tts_get_voices_for_language); + +	ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &DisplayServer::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false)); +	ClassDB::bind_method(D_METHOD("tts_pause"), &DisplayServer::tts_pause); +	ClassDB::bind_method(D_METHOD("tts_resume"), &DisplayServer::tts_resume); +	ClassDB::bind_method(D_METHOD("tts_stop"), &DisplayServer::tts_stop); + +	ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "callable"), &DisplayServer::tts_set_utterance_callback); +	ClassDB::bind_method(D_METHOD("_tts_post_utterance_event", "event", "id", "char_pos"), &DisplayServer::tts_post_utterance_event); +  	ClassDB::bind_method(D_METHOD("mouse_set_mode", "mouse_mode"), &DisplayServer::mouse_set_mode);  	ClassDB::bind_method(D_METHOD("mouse_get_mode"), &DisplayServer::mouse_get_mode); @@ -621,6 +709,7 @@ void DisplayServer::_bind_methods() {  	BIND_ENUM_CONSTANT(FEATURE_ORIENTATION);  	BIND_ENUM_CONSTANT(FEATURE_SWAP_BUFFERS);  	BIND_ENUM_CONSTANT(FEATURE_CLIPBOARD_PRIMARY); +	BIND_ENUM_CONSTANT(FEATURE_TEXT_TO_SPEECH);  	BIND_ENUM_CONSTANT(MOUSE_MODE_VISIBLE);  	BIND_ENUM_CONSTANT(MOUSE_MODE_HIDDEN); @@ -689,6 +778,11 @@ void DisplayServer::_bind_methods() {  	BIND_ENUM_CONSTANT(DISPLAY_HANDLE);  	BIND_ENUM_CONSTANT(WINDOW_HANDLE);  	BIND_ENUM_CONSTANT(WINDOW_VIEW); + +	BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED); +	BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED); +	BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED); +	BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY);  }  void DisplayServer::register_create_function(const char *p_name, CreateFunction p_function, GetRenderingDriversFunction p_get_drivers) { diff --git a/servers/display_server.h b/servers/display_server.h index 4961b07ba3..19efcbd3dd 100644 --- a/servers/display_server.h +++ b/servers/display_server.h @@ -121,6 +121,7 @@ public:  		FEATURE_SWAP_BUFFERS,  		FEATURE_KEEP_SCREEN_ON,  		FEATURE_CLIPBOARD_PRIMARY, +		FEATURE_TEXT_TO_SPEECH,  	};  	virtual bool has_feature(Feature p_feature) const = 0; @@ -172,6 +173,40 @@ public:  	virtual void global_menu_remove_item(const String &p_menu_root, int p_idx);  	virtual void global_menu_clear(const String &p_menu_root); +	struct TTSUtterance { +		String text; +		String voice; +		int volume = 50; +		float pitch = 1.f; +		float rate = 1.f; +		int id = 0; +	}; + +	enum TTSUtteranceEvent { +		TTS_UTTERANCE_STARTED, +		TTS_UTTERANCE_ENDED, +		TTS_UTTERANCE_CANCELED, +		TTS_UTTERANCE_BOUNDARY, +		TTS_UTTERANCE_MAX, +	}; + +private: +	Callable utterance_callback[TTS_UTTERANCE_MAX]; + +public: +	virtual bool tts_is_speaking() const; +	virtual bool tts_is_paused() const; +	virtual Array tts_get_voices() const; +	virtual PackedStringArray tts_get_voices_for_language(const String &p_language) const; + +	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false); +	virtual void tts_pause(); +	virtual void tts_resume(); +	virtual void tts_stop(); + +	virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable); +	virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0); +  	enum MouseMode {  		MOUSE_MODE_VISIBLE,  		MOUSE_MODE_HIDDEN, @@ -431,5 +466,6 @@ VARIANT_ENUM_CAST(DisplayServer::WindowFlags)  VARIANT_ENUM_CAST(DisplayServer::HandleType)  VARIANT_ENUM_CAST(DisplayServer::CursorShape)  VARIANT_ENUM_CAST(DisplayServer::VSyncMode) +VARIANT_ENUM_CAST(DisplayServer::TTSUtteranceEvent)  #endif // DISPLAY_SERVER_H diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp index 001706bb6f..005cb68302 100644 --- a/servers/text/text_server_extension.cpp +++ b/servers/text/text_server_extension.cpp @@ -293,6 +293,8 @@ void TextServerExtension::_bind_methods() {  	GDVIRTUAL_BIND(strip_diacritics, "string"); +	GDVIRTUAL_BIND(string_get_word_breaks, "string", "language"); +  	GDVIRTUAL_BIND(string_to_upper, "string", "language");  	GDVIRTUAL_BIND(string_to_lower, "string", "language"); @@ -1503,6 +1505,14 @@ Array TextServerExtension::parse_structured_text(StructuredTextParser p_parser_t  	return Array();  } +PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const { +	PackedInt32Array ret; +	if (GDVIRTUAL_CALL(string_get_word_breaks, p_string, p_language, ret)) { +		return ret; +	} +	return PackedInt32Array(); +} +  TextServerExtension::TextServerExtension() {  	//NOP  } diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h index ce781097f3..7b7fc61ed7 100644 --- a/servers/text/text_server_extension.h +++ b/servers/text/text_server_extension.h @@ -485,6 +485,9 @@ public:  	virtual String strip_diacritics(const String &p_string) const override;  	GDVIRTUAL1RC(String, strip_diacritics, const String &); +	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; +	GDVIRTUAL2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); +  	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;  	virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;  	GDVIRTUAL2RC(String, string_to_upper, const String &, const String &); diff --git a/servers/text_server.cpp b/servers/text_server.cpp index d66e769e3c..7d9945f5d7 100644 --- a/servers/text_server.cpp +++ b/servers/text_server.cpp @@ -439,6 +439,8 @@ void TextServer::_bind_methods() {  	ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL(""));  	ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL("")); +	ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL("")); +  	ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics);  	ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL("")); diff --git a/servers/text_server.h b/servers/text_server.h index 7e7f26b32d..b08aa26917 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -431,6 +431,9 @@ public:  	virtual String parse_number(const String &p_string, const String &p_language = "") const = 0;  	virtual String percent_sign(const String &p_language = "") const = 0; +	// String functions. +	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0; +  	virtual String strip_diacritics(const String &p_string) const;  	// Other string operations. diff --git a/tests/servers/test_text_server.h b/tests/servers/test_text_server.h index d7de94516f..066c280fd5 100644 --- a/tests/servers/test_text_server.h +++ b/tests/servers/test_text_server.h @@ -514,6 +514,49 @@ TEST_SUITE("[[TextServer]") {  				CHECK(ts->strip_diacritics(U"ṽṿ ẁẃẅẇẉ ẋẍ ẏ ẑẓẕ ẖ ẗẘẙẛ") == U"vv wwwww xx y zzz h twys");  			}  		} + +		SUBCASE("[TextServer] Word break") { +			for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) { +				Ref<TextServer> ts = TextServerManager::get_singleton()->get_interface(i); + +				if (!ts->has_feature(TextServer::FEATURE_SIMPLE_LAYOUT)) { +					continue; +				} + +				TEST_FAIL_COND(ts.is_null(), "Invalid TS interface."); +				{ +					String text1 = U"linguistically similar and effectively form"; +					//                           14^     22^ 26^         38^ +					PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en"); +					CHECK(breaks.size() == 4); +					if (breaks.size() == 4) { +						CHECK(breaks[0] == 14); +						CHECK(breaks[1] == 22); +						CHECK(breaks[2] == 26); +						CHECK(breaks[3] == 38); +					} +				} + +				if (ts->has_feature(TextServer::FEATURE_BREAK_ITERATORS)) { +					String text2 = U"เป็นภาษาราชการและภาษาประจำชาติของประเทศไทย"; +					//				 เป็น ภาษา ราชการ และ ภาษา ประจำ ชาติ ของ ประเทศไทย +					//                 3^   7^    13^ 16^  20^   25^ 29^ 32^ + +					PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th"); +					CHECK(breaks.size() == 8); +					if (breaks.size() == 8) { +						CHECK(breaks[0] == 3); +						CHECK(breaks[1] == 7); +						CHECK(breaks[2] == 13); +						CHECK(breaks[3] == 16); +						CHECK(breaks[4] == 20); +						CHECK(breaks[5] == 25); +						CHECK(breaks[6] == 29); +						CHECK(breaks[7] == 32); +					} +				} +			} +		}  	}  }  }; // namespace TestTextServer  |