diff options
author | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2021-11-12 10:12:37 +0200 |
---|---|---|
committer | bruvzg <7645683+bruvzg@users.noreply.github.com> | 2022-08-02 15:37:49 +0300 |
commit | 4373a0bb865265f07507d36e6c151a556f3d94e8 (patch) | |
tree | 7f6f260a19773a9b0c32d6474f15a6d87ed4320f /modules/text_server_adv | |
parent | 119b2874c39aeb0aff51603fbf00f75b414a1730 (diff) |
[TextServer] Add ICU Unicode security and spoofing detection.
Diffstat (limited to 'modules/text_server_adv')
-rw-r--r-- | modules/text_server_adv/SCsub | 11 | ||||
-rw-r--r-- | modules/text_server_adv/text_server_adv.cpp | 63 | ||||
-rw-r--r-- | modules/text_server_adv/text_server_adv.h | 4 |
3 files changed, 75 insertions, 3 deletions
diff --git a/modules/text_server_adv/SCsub b/modules/text_server_adv/SCsub index d212fe62b4..73e5c2bf74 100644 --- a/modules/text_server_adv/SCsub +++ b/modules/text_server_adv/SCsub @@ -121,7 +121,7 @@ if env["builtin_harfbuzz"]: env_harfbuzz.Append(CCFLAGS=["-DHAVE_ICU"]) if env["builtin_icu"]: - env_harfbuzz.Prepend(CPPPATH=["#thirdparty/icu4c/common/"]) + env_harfbuzz.Prepend(CPPPATH=["#thirdparty/icu4c/common/", "#thirdparty/icu4c/i18n/"]) env_harfbuzz.Append(CCFLAGS=["-DU_HAVE_LIB_SUFFIX=1", "-DU_LIB_SUFFIX_C_NAME=_godot", "-DHAVE_ICU_BUILTIN"]) if freetype_enabled: @@ -439,6 +439,10 @@ if env["builtin_icu"]: "common/uvectr32.cpp", "common/uvectr64.cpp", "common/wintz.cpp", + "i18n/scriptset.cpp", + "i18n/ucln_in.cpp", + "i18n/uspoof.cpp", + "i18n/uspoof_impl.cpp", ] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] @@ -451,7 +455,7 @@ if env["builtin_icu"]: else: thirdparty_sources += ["icu_data/icudata_stub.cpp"] - env_icu.Prepend(CPPPATH=["#thirdparty/icu4c/common/"]) + env_icu.Prepend(CPPPATH=["#thirdparty/icu4c/common/", "#thirdparty/icu4c/i18n/"]) env_icu.Append( CXXFLAGS=[ "-DU_STATIC_IMPLEMENTATION", @@ -463,6 +467,7 @@ if env["builtin_icu"]: "-DUCONFIG_NO_IDNA", "-DUCONFIG_NO_FILE_IO", "-DUCONFIG_NO_TRANSLITERATION", + "-DUCONFIG_NO_REGULAR_EXPRESSIONS", "-DPKGDATA_MODE=static", "-DU_ENABLE_DYLOAD=0", "-DU_HAVE_LIB_SUFFIX=1", @@ -480,7 +485,7 @@ if env["builtin_icu"]: if env_text_server_adv["tools"]: env_text_server_adv.Append(CXXFLAGS=["-DICU_STATIC_DATA"]) - env_text_server_adv.Prepend(CPPPATH=["#thirdparty/icu4c/common/"]) + env_text_server_adv.Prepend(CPPPATH=["#thirdparty/icu4c/common/", "#thirdparty/icu4c/i18n/"]) lib = env_icu.add_library("icu_builtin", thirdparty_sources) thirdparty_obj += lib diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index f8dbbc2e61..bb49fb5248 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -347,6 +347,7 @@ bool TextServerAdvanced::has_feature(Feature p_feature) const { case FEATURE_CONTEXT_SENSITIVE_CASE_CONVERSION: case FEATURE_USE_SUPPORT_DATA: case FEATURE_UNICODE_IDENTIFIERS: + case FEATURE_UNICODE_SECURITY: return true; default: { } @@ -5640,6 +5641,68 @@ String TextServerAdvanced::percent_sign(const String &p_language) const { return "%"; } +int TextServerAdvanced::is_confusable(const String &p_string, const PackedStringArray &p_dict) const { + UErrorCode status = U_ZERO_ERROR; + int match_index = -1; + + Char16String utf16 = p_string.utf16(); + Vector<UChar *> skeletons; + skeletons.resize(p_dict.size()); + + USpoofChecker *sc = uspoof_open(&status); + uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status); + for (int i = 0; i < p_dict.size(); i++) { + Char16String word = p_dict[i].utf16(); + int32_t len = uspoof_getSkeleton(sc, 0, word.get_data(), -1, NULL, 0, &status); + skeletons.write[i] = (UChar *)memalloc(++len * sizeof(UChar)); + status = U_ZERO_ERROR; + uspoof_getSkeleton(sc, 0, word.get_data(), -1, skeletons.write[i], len, &status); + } + + int32_t len = uspoof_getSkeleton(sc, 0, utf16.get_data(), -1, NULL, 0, &status); + UChar *skel = (UChar *)memalloc(++len * sizeof(UChar)); + status = U_ZERO_ERROR; + uspoof_getSkeleton(sc, 0, utf16.get_data(), -1, skel, len, &status); + for (int i = 0; i < skeletons.size(); i++) { + if (u_strcmp(skel, skeletons[i]) == 0) { + match_index = i; + break; + } + } + memfree(skel); + + for (int i = 0; i < skeletons.size(); i++) { + memfree(skeletons.write[i]); + } + uspoof_close(sc); + + ERR_FAIL_COND_V_MSG(U_FAILURE(status), -1, u_errorName(status)); + + return match_index; +} + +bool TextServerAdvanced::spoof_check(const String &p_string) const { + UErrorCode status = U_ZERO_ERROR; + Char16String utf16 = p_string.utf16(); + + USet *allowed = uset_openEmpty(); + uset_addAll(allowed, uspoof_getRecommendedSet(&status)); + uset_addAll(allowed, uspoof_getInclusionSet(&status)); + + USpoofChecker *sc = uspoof_open(&status); + uspoof_setAllowedChars(sc, allowed, &status); + uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE); + + int32_t bitmask = uspoof_check(sc, utf16.get_data(), -1, NULL, &status); + + uspoof_close(sc); + uset_close(allowed); + + ERR_FAIL_COND_V_MSG(U_FAILURE(status), false, u_errorName(status)); + + return (bitmask != 0); +} + String TextServerAdvanced::strip_diacritics(const String &p_string) const { UErrorCode err = U_ZERO_ERROR; diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 170d92f8fc..b337abea7a 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -101,6 +101,7 @@ using namespace godot; #include <unicode/uloc.h> #include <unicode/unorm2.h> #include <unicode/uscript.h> +#include <unicode/uspoof.h> #include <unicode/ustring.h> #include <unicode/utypes.h> @@ -701,6 +702,9 @@ public: virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; + virtual int is_confusable(const String &p_string, const PackedStringArray &p_dict) const override; + virtual bool spoof_check(const String &p_string) const override; + virtual String strip_diacritics(const String &p_string) const override; virtual bool is_valid_identifier(const String &p_string) const override; |