diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-11-04 14:33:37 +0200 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2022-04-28 14:35:41 +0300 |
commit | 6ab672d1ef7ece5c3019d46aeb98df3686f37e26 (patch) | |
tree | be10d088e90c6a9e60efef823f54f9aa0d70aa07 /platform/iphone | |
parent | 3e1b824c050b765095285c67b3e4c8092e1f88c6 (diff) |
Implement text-to-speech support on Android, iOS, HTML5, Linux, macOS and Windows.
Implement TextServer word break method.
Diffstat (limited to 'platform/iphone')
-rw-r--r-- | platform/iphone/SCsub | 1 | ||||
-rw-r--r-- | platform/iphone/display_server_iphone.h | 11 | ||||
-rw-r--r-- | platform/iphone/display_server_iphone.mm | 40 | ||||
-rw-r--r-- | platform/iphone/tts_ios.h | 59 | ||||
-rw-r--r-- | platform/iphone/tts_ios.mm | 164 |
5 files changed, 275 insertions, 0 deletions
diff --git a/platform/iphone/SCsub b/platform/iphone/SCsub index 58b574a72f..5e10bf5646 100644 --- a/platform/iphone/SCsub +++ b/platform/iphone/SCsub @@ -13,6 +13,7 @@ iphone_lib = [ "display_server_iphone.mm", "joypad_iphone.mm", "godot_view.mm", + "tts_ios.mm", "display_layer.mm", "godot_app_delegate.m", "godot_view_renderer.mm", diff --git a/platform/iphone/display_server_iphone.h b/platform/iphone/display_server_iphone.h index 7441550f67..6ae190b81a 100644 --- a/platform/iphone/display_server_iphone.h +++ b/platform/iphone/display_server_iphone.h @@ -58,6 +58,8 @@ class DisplayServerIPhone : public DisplayServer { RenderingDeviceVulkan *rendering_device_vulkan = nullptr; #endif + id tts = nullptr; + DisplayServer::ScreenOrientation screen_orientation; ObjectID window_attached_instance_id; @@ -123,6 +125,15 @@ public: virtual bool has_feature(Feature p_feature) const override; virtual String get_name() const override; + virtual bool tts_is_speaking() const override; + virtual bool tts_is_paused() const override; + virtual Array tts_get_voices() const override; + + virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override; + virtual void tts_pause() override; + virtual void tts_resume() override; + virtual void tts_stop() override; + virtual int get_screen_count() const override; virtual Point2i screen_get_position(int p_screen = SCREEN_OF_MAIN_WINDOW) const override; virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override; diff --git a/platform/iphone/display_server_iphone.mm b/platform/iphone/display_server_iphone.mm index a0f8daf5a0..ec58ab195a 100644 --- a/platform/iphone/display_server_iphone.mm +++ b/platform/iphone/display_server_iphone.mm @@ -38,6 +38,7 @@ #include "ios.h" #import "keyboard_input_view.h" #include "os_iphone.h" +#include "tts_ios.h" #import "view_controller.h" #import <Foundation/Foundation.h> @@ -52,6 +53,9 @@ DisplayServerIPhone *DisplayServerIPhone::get_singleton() { DisplayServerIPhone::DisplayServerIPhone(const String &p_rendering_driver, WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error) { rendering_driver = p_rendering_driver; + // Init TTS + tts = [[TTS_IOS alloc] init]; + #if defined(GLES3_ENABLED) // FIXME: Add support for both OpenGL and Vulkan when OpenGL is implemented // again, @@ -310,6 +314,7 @@ bool DisplayServerIPhone::has_feature(Feature p_feature) const { case FEATURE_ORIENTATION: case FEATURE_TOUCHSCREEN: case FEATURE_VIRTUAL_KEYBOARD: + case FEATURE_TEXT_TO_SPEECH: return true; default: return false; @@ -320,6 +325,41 @@ String DisplayServerIPhone::get_name() const { return "iPhone"; } +bool DisplayServerIPhone::tts_is_speaking() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isSpeaking]; +} + +bool DisplayServerIPhone::tts_is_paused() const { + ERR_FAIL_COND_V(!tts, false); + return [tts isPaused]; +} + +Array DisplayServerIPhone::tts_get_voices() const { + ERR_FAIL_COND_V(!tts, Array()); + return [tts getVoices]; +} + +void DisplayServerIPhone::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) { + ERR_FAIL_COND(!tts); + [tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt]; +} + +void DisplayServerIPhone::tts_pause() { + ERR_FAIL_COND(!tts); + [tts pauseSpeaking]; +} + +void DisplayServerIPhone::tts_resume() { + ERR_FAIL_COND(!tts); + [tts resumeSpeaking]; +} + +void DisplayServerIPhone::tts_stop() { + ERR_FAIL_COND(!tts); + [tts stopSpeaking]; +} + int DisplayServerIPhone::get_screen_count() const { return 1; } diff --git a/platform/iphone/tts_ios.h b/platform/iphone/tts_ios.h new file mode 100644 index 0000000000..c7defeb98f --- /dev/null +++ b/platform/iphone/tts_ios.h @@ -0,0 +1,59 @@ +/*************************************************************************/ +/* tts_ios.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef TTS_IOS_H +#define TTS_IOS_H + +#include <AVFAudio/AVSpeechSynthesis.h> + +#include "core/string/ustring.h" +#include "core/templates/list.h" +#include "core/templates/map.h" +#include "core/variant/array.h" +#include "servers/display_server.h" + +@interface TTS_IOS : NSObject <AVSpeechSynthesizerDelegate> { + bool speaking; + Map<id, int> ids; + + AVSpeechSynthesizer *av_synth; + List<DisplayServer::TTSUtterance> queue; +} + +- (void)pauseSpeaking; +- (void)resumeSpeaking; +- (void)stopSpeaking; +- (bool)isSpeaking; +- (bool)isPaused; +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt; +- (Array)getVoices; +@end + +#endif // TTS_IOS_H diff --git a/platform/iphone/tts_ios.mm b/platform/iphone/tts_ios.mm new file mode 100644 index 0000000000..a079d02add --- /dev/null +++ b/platform/iphone/tts_ios.mm @@ -0,0 +1,164 @@ +/*************************************************************************/ +/* tts_ios.mm */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "tts_ios.h" + +@implementation TTS_IOS + +- (id)init { + self = [super init]; + self->speaking = false; + self->av_synth = [[AVSpeechSynthesizer alloc] init]; + [self->av_synth setDelegate:self]; + print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized."); + return self; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance { + NSString *string = [utterance speechString]; + + // Convert from UTF-16 to UTF-32 position. + int pos = 0; + for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) { + unichar c = [string characterAtIndex:i]; + if ((c & 0xfffffc00) == 0xd800) { + i++; + } + pos++; + } + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos); +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]); + ids.erase(utterance); + speaking = false; + [self update]; +} + +- (void)update { + if (!speaking && queue.size() > 0) { + DisplayServer::TTSUtterance &message = queue.front()->get(); + + AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]]; + [new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]]; + if (message.rate > 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)]; + } else if (message.rate < 1.f) { + [new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)]; + } + [new_utterance setPitchMultiplier:message.pitch]; + [new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))]; + + ids[new_utterance] = message.id; + [av_synth speakUtterance:new_utterance]; + + queue.pop_front(); + + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id); + speaking = true; + } +} + +- (void)pauseSpeaking { + [av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate]; +} + +- (void)resumeSpeaking { + [av_synth continueSpeaking]; +} + +- (void)stopSpeaking { + for (DisplayServer::TTSUtterance &message : queue) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id); + } + queue.clear(); + [av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate]; + speaking = false; +} + +- (bool)isSpeaking { + return speaking || (queue.size() > 0); +} + +- (bool)isPaused { + return [av_synth isPaused]; +} + +- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt { + if (interrupt) { + [self stopSpeaking]; + } + + if (text.is_empty()) { + DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id); + return; + } + + DisplayServer::TTSUtterance message; + message.text = text; + message.voice = voice; + message.volume = CLAMP(volume, 0, 100); + message.pitch = CLAMP(pitch, 0.f, 2.f); + message.rate = CLAMP(rate, 0.1f, 10.f); + message.id = utterance_id; + queue.push_back(message); + + if ([self isPaused]) { + [self resumeSpeaking]; + } else { + [self update]; + } +} + +- (Array)getVoices { + Array list; + for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) { + NSString *voiceIdentifierString = [voice identifier]; + NSString *voiceLocaleIdentifier = [voice language]; + NSString *voiceName = [voice name]; + Dictionary voice_d; + voice_d["name"] = String::utf8([voiceName UTF8String]); + voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]); + voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]); + list.push_back(voice_d); + } + return list; +} + +@end |