From 63f3051154a7e672956cffe41f90ed8d56a9ec23 Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Tue, 12 Oct 2021 21:36:08 +0300 Subject: Implement TextServer `strip_diacritics` function. --- modules/text_server_adv/text_server_adv.cpp | 33 +++++++++++++++++++++++++++++ modules/text_server_adv/text_server_adv.h | 3 +++ 2 files changed, 36 insertions(+) (limited to 'modules/text_server_adv') diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index c459141265..776134b598 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -4924,6 +4924,39 @@ String TextServerAdvanced::percent_sign(const String &p_language) const { return "%"; } +String TextServerAdvanced::strip_diacritics(const String &p_string) const { + UErrorCode err = U_ZERO_ERROR; + + // Get NFKD normalizer singleton. + const UNormalizer2 *unorm = unorm2_getNFKDInstance(&err); + ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err)); + + // Convert to UTF-16. + Char16String utf16 = p_string.utf16(); + + // Normalize. + Char16String normalized; + err = U_ZERO_ERROR; + int32_t len = unorm2_normalize(unorm, utf16.ptr(), -1, nullptr, 0, &err); + ERR_FAIL_COND_V_MSG(err != U_BUFFER_OVERFLOW_ERROR, TextServer::strip_diacritics(p_string), u_errorName(err)); + normalized.resize(len); + err = U_ZERO_ERROR; + unorm2_normalize(unorm, utf16.ptr(), -1, normalized.ptrw(), len, &err); + ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err)); + + // Convert back to UTF-32. + String normalized_string = String::utf16(normalized.ptr(), len); + + // Strip combining characters. + String result; + for (int i = 0; i < normalized_string.length(); i++) { + if (u_getCombiningClass(normalized_string[i]) == 0) { + result += normalized_string[i]; + } + } + return result; +} + TextServerAdvanced::TextServerAdvanced() { _insert_num_systems_lang(); _insert_feature_sets(); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 333b68e074..15f3a7f1a9 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -501,6 +502,8 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const override; virtual String percent_sign(const String &p_language = "") const override; + virtual String strip_diacritics(const String &p_string) const override; + TextServerAdvanced(); ~TextServerAdvanced(); }; -- cgit v1.2.3