summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/ustring.cpp44
-rw-r--r--core/ustring.h2
-rw-r--r--core/variant_call.cpp4
3 files changed, 50 insertions, 0 deletions
diff --git a/core/ustring.cpp b/core/ustring.cpp
index 485f7f1b62..ea9a9d903e 100644
--- a/core/ustring.cpp
+++ b/core/ustring.cpp
@@ -2810,6 +2810,50 @@ bool String::_base_is_subsequence_of(const String& p_string, bool case_insensiti
return false;
}
+Vector<String> String::bigrams() const {
+ int n_pairs = length() - 1;
+ Vector<String> b;
+ if(n_pairs <= 0) {
+ return b;
+ }
+ b.resize(n_pairs);
+ for(int i = 0; i < n_pairs; i++) {
+ b[i] = substr(i,2);
+ }
+ return b;
+}
+
+// Similarity according to Sorensen-Dice coefficient
+float String::similarity(const String& p_string) const {
+ if(operator==(p_string)) {
+ // Equal strings are totally similar
+ return 1.0f;
+ }
+ if (length() < 2 || p_string.length() < 2) {
+ // No way to calculate similarity without a single bigram
+ return 0.0f;
+ }
+
+ Vector<String> src_bigrams = bigrams();
+ Vector<String> tgt_bigrams = p_string.bigrams();
+
+ int src_size = src_bigrams.size();
+ int tgt_size = tgt_bigrams.size();
+
+ float sum = src_size + tgt_size;
+ float inter = 0;
+ for (int i = 0; i < src_size; i++) {
+ for (int j = 0; j < tgt_size; j++) {
+ if (src_bigrams[i] == tgt_bigrams[j]) {
+ inter++;
+ break;
+ }
+ }
+ }
+
+ return (2.0f * inter)/sum;
+}
+
static bool _wildcard_match(const CharType* p_pattern, const CharType* p_string,bool p_case_sensitive) {
switch (*p_pattern) {
case '\0':
diff --git a/core/ustring.h b/core/ustring.h
index 8aceb0748c..692cb4e37d 100644
--- a/core/ustring.h
+++ b/core/ustring.h
@@ -123,6 +123,8 @@ public:
bool ends_with(const String& p_string) const;
bool is_subsequence_of(const String& p_string) const;
bool is_subsequence_ofi(const String& p_string) const;
+ Vector<String> bigrams() const;
+ float similarity(const String& p_string) const;
String replace_first(String p_key,String p_with) const;
String replace(String p_key,String p_with) const;
String replacen(String p_key,String p_with) const;
diff --git a/core/variant_call.cpp b/core/variant_call.cpp
index 0055138582..683b1611d8 100644
--- a/core/variant_call.cpp
+++ b/core/variant_call.cpp
@@ -249,6 +249,8 @@ static void _call_##m_type##_##m_method(Variant& r_ret,Variant& p_self,const Var
VCALL_LOCALMEM1R(String,ends_with);
VCALL_LOCALMEM1R(String,is_subsequence_of);
VCALL_LOCALMEM1R(String,is_subsequence_ofi);
+ VCALL_LOCALMEM0R(String,bigrams);
+ VCALL_LOCALMEM1R(String,similarity);
VCALL_LOCALMEM2R(String,replace);
VCALL_LOCALMEM2R(String,replacen);
VCALL_LOCALMEM2R(String,insert);
@@ -1274,6 +1276,8 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl
ADDFUNC1(STRING,BOOL,String,ends_with,STRING,"text",varray());
ADDFUNC1(STRING,BOOL,String,is_subsequence_of,STRING,"text",varray());
ADDFUNC1(STRING,BOOL,String,is_subsequence_ofi,STRING,"text",varray());
+ ADDFUNC0(STRING,STRING_ARRAY,String,bigrams,varray());
+ ADDFUNC1(STRING,REAL,String,similarity,STRING,"text",varray());
ADDFUNC2(STRING,STRING,String,replace,STRING,"what",STRING,"forwhat",varray());
ADDFUNC2(STRING,STRING,String,replacen,STRING,"what",STRING,"forwhat",varray());