diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-10-12 21:36:08 +0300 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-10-28 10:12:57 +0300 |
commit | 63f3051154a7e672956cffe41f90ed8d56a9ec23 (patch) | |
tree | 482166808a72700a51e9e29b73e35abc7f279da0 /modules/text_server_adv | |
parent | 0ec77631979997b3e6bcd9146ea8f1c3e4166b81 (diff) |
Implement TextServer `strip_diacritics` function.
Diffstat (limited to 'modules/text_server_adv')
-rw-r--r-- | modules/text_server_adv/text_server_adv.cpp | 33 | ||||
-rw-r--r-- | modules/text_server_adv/text_server_adv.h | 3 |
2 files changed, 36 insertions, 0 deletions
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index c459141265..776134b598 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -4924,6 +4924,39 @@ String TextServerAdvanced::percent_sign(const String &p_language) const { return "%"; } +String TextServerAdvanced::strip_diacritics(const String &p_string) const { + UErrorCode err = U_ZERO_ERROR; + + // Get NFKD normalizer singleton. + const UNormalizer2 *unorm = unorm2_getNFKDInstance(&err); + ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err)); + + // Convert to UTF-16. + Char16String utf16 = p_string.utf16(); + + // Normalize. + Char16String normalized; + err = U_ZERO_ERROR; + int32_t len = unorm2_normalize(unorm, utf16.ptr(), -1, nullptr, 0, &err); + ERR_FAIL_COND_V_MSG(err != U_BUFFER_OVERFLOW_ERROR, TextServer::strip_diacritics(p_string), u_errorName(err)); + normalized.resize(len); + err = U_ZERO_ERROR; + unorm2_normalize(unorm, utf16.ptr(), -1, normalized.ptrw(), len, &err); + ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err)); + + // Convert back to UTF-32. + String normalized_string = String::utf16(normalized.ptr(), len); + + // Strip combining characters. + String result; + for (int i = 0; i < normalized_string.length(); i++) { + if (u_getCombiningClass(normalized_string[i]) == 0) { + result += normalized_string[i]; + } + } + return result; +} + TextServerAdvanced::TextServerAdvanced() { _insert_num_systems_lang(); _insert_feature_sets(); diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 333b68e074..15f3a7f1a9 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -50,6 +50,7 @@ #include <unicode/udata.h> #include <unicode/uiter.h> #include <unicode/uloc.h> +#include <unicode/unorm2.h> #include <unicode/uscript.h> #include <unicode/ustring.h> #include <unicode/utypes.h> @@ -501,6 +502,8 @@ public: virtual String parse_number(const String &p_string, const String &p_language = "") const override; virtual String percent_sign(const String &p_language = "") const override; + virtual String strip_diacritics(const String &p_string) const override; + TextServerAdvanced(); ~TextServerAdvanced(); }; |