summaryrefslogtreecommitdiff
path: root/modules/text_server_adv
diff options
context:
space:
mode:
authorbruvzg <7645683+bruvzg@users.noreply.github.com>2021-10-12 21:36:08 +0300
committerbruvzg <7645683+bruvzg@users.noreply.github.com>2021-10-28 10:12:57 +0300
commit63f3051154a7e672956cffe41f90ed8d56a9ec23 (patch)
tree482166808a72700a51e9e29b73e35abc7f279da0 /modules/text_server_adv
parent0ec77631979997b3e6bcd9146ea8f1c3e4166b81 (diff)
Implement TextServer `strip_diacritics` function.
Diffstat (limited to 'modules/text_server_adv')
-rw-r--r--modules/text_server_adv/text_server_adv.cpp33
-rw-r--r--modules/text_server_adv/text_server_adv.h3
2 files changed, 36 insertions, 0 deletions
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp
index c459141265..776134b598 100644
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -4924,6 +4924,39 @@ String TextServerAdvanced::percent_sign(const String &p_language) const {
return "%";
}
+String TextServerAdvanced::strip_diacritics(const String &p_string) const {
+ UErrorCode err = U_ZERO_ERROR;
+
+ // Get NFKD normalizer singleton.
+ const UNormalizer2 *unorm = unorm2_getNFKDInstance(&err);
+ ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err));
+
+ // Convert to UTF-16.
+ Char16String utf16 = p_string.utf16();
+
+ // Normalize.
+ Char16String normalized;
+ err = U_ZERO_ERROR;
+ int32_t len = unorm2_normalize(unorm, utf16.ptr(), -1, nullptr, 0, &err);
+ ERR_FAIL_COND_V_MSG(err != U_BUFFER_OVERFLOW_ERROR, TextServer::strip_diacritics(p_string), u_errorName(err));
+ normalized.resize(len);
+ err = U_ZERO_ERROR;
+ unorm2_normalize(unorm, utf16.ptr(), -1, normalized.ptrw(), len, &err);
+ ERR_FAIL_COND_V_MSG(U_FAILURE(err), TextServer::strip_diacritics(p_string), u_errorName(err));
+
+ // Convert back to UTF-32.
+ String normalized_string = String::utf16(normalized.ptr(), len);
+
+ // Strip combining characters.
+ String result;
+ for (int i = 0; i < normalized_string.length(); i++) {
+ if (u_getCombiningClass(normalized_string[i]) == 0) {
+ result += normalized_string[i];
+ }
+ }
+ return result;
+}
+
TextServerAdvanced::TextServerAdvanced() {
_insert_num_systems_lang();
_insert_feature_sets();
diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h
index 333b68e074..15f3a7f1a9 100644
--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@@ -50,6 +50,7 @@
#include <unicode/udata.h>
#include <unicode/uiter.h>
#include <unicode/uloc.h>
+#include <unicode/unorm2.h>
#include <unicode/uscript.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
@@ -501,6 +502,8 @@ public:
virtual String parse_number(const String &p_string, const String &p_language = "") const override;
virtual String percent_sign(const String &p_language = "") const override;
+ virtual String strip_diacritics(const String &p_string) const override;
+
TextServerAdvanced();
~TextServerAdvanced();
};