diff options
Diffstat (limited to 'modules/regex/regex.cpp')
-rw-r--r-- | modules/regex/regex.cpp | 322 |
1 files changed, 92 insertions, 230 deletions
diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index 53d1a1dd65..c10a276eae 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -36,51 +36,46 @@ extern "C" { } static void *_regex_malloc(PCRE2_SIZE size, void *user) { - return memalloc(size); } static void _regex_free(void *ptr, void *user) { - memfree(ptr); } int RegExMatch::_find(const Variant &p_name) const { - if (p_name.is_num()) { - int i = (int)p_name; - if (i >= data.size()) + if (i >= data.size()) { return -1; + } return i; } else if (p_name.get_type() == Variant::STRING) { - const Map<String, int>::Element *found = names.find((String)p_name); - if (found) + if (found) { return found->value(); + } } return -1; } String RegExMatch::get_subject() const { - return subject; } int RegExMatch::get_group_count() const { - - if (data.size() == 0) + if (data.size() == 0) { return 0; + } return data.size() - 1; } Dictionary RegExMatch::get_names() const { - Dictionary result; - for (const Map<String, int>::Element *i = names.front(); i != NULL; i = i->next()) { + for (const Map<String, int>::Element *i = names.front(); i != nullptr; i = i->next()) { result[i->key()] = i->value(); } @@ -88,13 +83,11 @@ Dictionary RegExMatch::get_names() const { } Array RegExMatch::get_strings() const { - Array result; int size = data.size(); for (int i = 0; i < size; i++) { - int start = data[i].start; if (start == -1) { @@ -111,16 +104,17 @@ Array RegExMatch::get_strings() const { } String RegExMatch::get_string(const Variant &p_name) const { - int id = _find(p_name); - if (id < 0) + if (id < 0) { return String(); + } int start = data[id].start; - if (start == -1) + if (start == -1) { return String(); + } int length = data[id].end - start; @@ -128,27 +122,26 @@ String RegExMatch::get_string(const Variant &p_name) const { } int RegExMatch::get_start(const Variant &p_name) const { - int id = _find(p_name); - if (id < 0) + if (id < 0) { return -1; + } return data[id].start; } int RegExMatch::get_end(const Variant &p_name) const { - int id = _find(p_name); - if (id < 0) + if (id < 0) { return -1; + } return data[id].end; } void RegExMatch::_bind_methods() { - ClassDB::bind_method(D_METHOD("get_subject"), &RegExMatch::get_subject); ClassDB::bind_method(D_METHOD("get_group_count"), &RegExMatch::get_group_count); ClassDB::bind_method(D_METHOD("get_names"), &RegExMatch::get_names); @@ -163,37 +156,17 @@ void RegExMatch::_bind_methods() { } void RegEx::_pattern_info(uint32_t what, void *where) const { - - if (sizeof(CharType) == 2) { - - pcre2_pattern_info_16((pcre2_code_16 *)code, what, where); - - } else { - - pcre2_pattern_info_32((pcre2_code_32 *)code, what, where); - } + pcre2_pattern_info_32((pcre2_code_32 *)code, what, where); } void RegEx::clear() { - - if (sizeof(CharType) == 2) { - - if (code) { - pcre2_code_free_16((pcre2_code_16 *)code); - code = NULL; - } - - } else { - - if (code) { - pcre2_code_free_32((pcre2_code_32 *)code); - code = NULL; - } + if (code) { + pcre2_code_free_32((pcre2_code_32 *)code); + code = nullptr; } } Error RegEx::compile(const String &p_pattern) { - pattern = p_pattern; clear(); @@ -201,122 +174,67 @@ Error RegEx::compile(const String &p_pattern) { PCRE2_SIZE offset; uint32_t flags = PCRE2_DUPNAMES; - if (sizeof(CharType) == 2) { - - pcre2_general_context_16 *gctx = (pcre2_general_context_16 *)general_ctx; - pcre2_compile_context_16 *cctx = pcre2_compile_context_create_16(gctx); - PCRE2_SPTR16 p = (PCRE2_SPTR16)pattern.c_str(); + pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; + pcre2_compile_context_32 *cctx = pcre2_compile_context_create_32(gctx); + PCRE2_SPTR32 p = (PCRE2_SPTR32)pattern.get_data(); - code = pcre2_compile_16(p, pattern.length(), flags, &err, &offset, cctx); + code = pcre2_compile_32(p, pattern.length(), flags, &err, &offset, cctx); - pcre2_compile_context_free_16(cctx); + pcre2_compile_context_free_32(cctx); - if (!code) { - PCRE2_UCHAR16 buf[256]; - pcre2_get_error_message_16(err, buf, 256); - String message = String::num(offset) + ": " + String((const CharType *)buf); - ERR_PRINT(message.utf8()); - return FAILED; - } - - } else { - - pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; - pcre2_compile_context_32 *cctx = pcre2_compile_context_create_32(gctx); - PCRE2_SPTR32 p = (PCRE2_SPTR32)pattern.c_str(); - - code = pcre2_compile_32(p, pattern.length(), flags, &err, &offset, cctx); - - pcre2_compile_context_free_32(cctx); - - if (!code) { - PCRE2_UCHAR32 buf[256]; - pcre2_get_error_message_32(err, buf, 256); - String message = String::num(offset) + ": " + String((const CharType *)buf); - ERR_PRINT(message.utf8()); - return FAILED; - } + if (!code) { + PCRE2_UCHAR32 buf[256]; + pcre2_get_error_message_32(err, buf, 256); + String message = String::num(offset) + ": " + String((const char32_t *)buf); + ERR_PRINT(message.utf8()); + return FAILED; } return OK; } Ref<RegExMatch> RegEx::search(const String &p_subject, int p_offset, int p_end) const { - - ERR_FAIL_COND_V(!is_valid(), NULL); + ERR_FAIL_COND_V(!is_valid(), nullptr); Ref<RegExMatch> result = memnew(RegExMatch); int length = p_subject.length(); - if (p_end >= 0 && p_end < length) + if (p_end >= 0 && p_end < length) { length = p_end; + } - if (sizeof(CharType) == 2) { - - pcre2_code_16 *c = (pcre2_code_16 *)code; - pcre2_general_context_16 *gctx = (pcre2_general_context_16 *)general_ctx; - pcre2_match_context_16 *mctx = pcre2_match_context_create_16(gctx); - PCRE2_SPTR16 s = (PCRE2_SPTR16)p_subject.c_str(); - - pcre2_match_data_16 *match = pcre2_match_data_create_from_pattern_16(c, gctx); - - int res = pcre2_match_16(c, s, length, p_offset, 0, match, mctx); - - if (res < 0) { - pcre2_match_data_free_16(match); - return NULL; - } - - uint32_t size = pcre2_get_ovector_count_16(match); - PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(match); - - result->data.resize(size); - - for (uint32_t i = 0; i < size; i++) { - - result->data.write[i].start = ovector[i * 2]; - result->data.write[i].end = ovector[i * 2 + 1]; - } - - pcre2_match_data_free_16(match); - pcre2_match_context_free_16(mctx); - - } else { - - pcre2_code_32 *c = (pcre2_code_32 *)code; - pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; - pcre2_match_context_32 *mctx = pcre2_match_context_create_32(gctx); - PCRE2_SPTR32 s = (PCRE2_SPTR32)p_subject.c_str(); - - pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx); + pcre2_code_32 *c = (pcre2_code_32 *)code; + pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; + pcre2_match_context_32 *mctx = pcre2_match_context_create_32(gctx); + PCRE2_SPTR32 s = (PCRE2_SPTR32)p_subject.get_data(); - int res = pcre2_match_32(c, s, length, p_offset, 0, match, mctx); + pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx); - if (res < 0) { - pcre2_match_data_free_32(match); - pcre2_match_context_free_32(mctx); + int res = pcre2_match_32(c, s, length, p_offset, 0, match, mctx); - return NULL; - } - - uint32_t size = pcre2_get_ovector_count_32(match); - PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(match); + if (res < 0) { + pcre2_match_data_free_32(match); + pcre2_match_context_free_32(mctx); - result->data.resize(size); + return nullptr; + } - for (uint32_t i = 0; i < size; i++) { + uint32_t size = pcre2_get_ovector_count_32(match); + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(match); - result->data.write[i].start = ovector[i * 2]; - result->data.write[i].end = ovector[i * 2 + 1]; - } + result->data.resize(size); - pcre2_match_data_free_32(match); - pcre2_match_context_free_32(mctx); + for (uint32_t i = 0; i < size; i++) { + result->data.write[i].start = ovector[i * 2]; + result->data.write[i].end = ovector[i * 2 + 1]; } + pcre2_match_data_free_32(match); + pcre2_match_context_free_32(mctx); + result->subject = p_subject; uint32_t count; - const CharType *table; + const char32_t *table; uint32_t entry_size; _pattern_info(PCRE2_INFO_NAMECOUNT, &count); @@ -324,13 +242,14 @@ Ref<RegExMatch> RegEx::search(const String &p_subject, int p_offset, int p_end) _pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &entry_size); for (uint32_t i = 0; i < count; i++) { - - CharType id = table[i * entry_size]; - if (result->data[id].start == -1) + char32_t id = table[i * entry_size]; + if (result->data[id].start == -1) { continue; + } String name = &table[i * entry_size + 1]; - if (result->names.has(name)) + if (result->names.has(name)) { continue; + } result->names.insert(name, id); } @@ -339,13 +258,13 @@ Ref<RegExMatch> RegEx::search(const String &p_subject, int p_offset, int p_end) } Array RegEx::search_all(const String &p_subject, int p_offset, int p_end) const { - int last_end = -1; Array result; Ref<RegExMatch> match = search(p_subject, p_offset, p_end); while (match.is_valid()) { - if (last_end == match->get_end(0)) + if (last_end == match->get_end(0)) { break; + } result.push_back(match); last_end = match->get_end(0); match = search(p_subject, match->get_end(0), p_end); @@ -354,7 +273,6 @@ Array RegEx::search_all(const String &p_subject, int p_offset, int p_end) const } String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_all, int p_offset, int p_end) const { - ERR_FAIL_COND_V(!is_valid(), String()); // safety_zone is the number of chars we allocate in addition to the number of chars expected in order to @@ -364,83 +282,55 @@ String RegEx::sub(const String &p_subject, const String &p_replacement, bool p_a const int safety_zone = 1; PCRE2_SIZE olength = p_subject.length() + 1; // space for output string and one terminating \0 character - Vector<CharType> output; + Vector<char32_t> output; output.resize(olength + safety_zone); uint32_t flags = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; - if (p_all) + if (p_all) { flags |= PCRE2_SUBSTITUTE_GLOBAL; + } PCRE2_SIZE length = p_subject.length(); - if (p_end >= 0 && (uint32_t)p_end < length) + if (p_end >= 0 && (uint32_t)p_end < length) { length = p_end; + } - if (sizeof(CharType) == 2) { - - pcre2_code_16 *c = (pcre2_code_16 *)code; - pcre2_general_context_16 *gctx = (pcre2_general_context_16 *)general_ctx; - pcre2_match_context_16 *mctx = pcre2_match_context_create_16(gctx); - PCRE2_SPTR16 s = (PCRE2_SPTR16)p_subject.c_str(); - PCRE2_SPTR16 r = (PCRE2_SPTR16)p_replacement.c_str(); - PCRE2_UCHAR16 *o = (PCRE2_UCHAR16 *)output.ptrw(); - - pcre2_match_data_16 *match = pcre2_match_data_create_from_pattern_16(c, gctx); - - int res = pcre2_substitute_16(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); - - if (res == PCRE2_ERROR_NOMEMORY) { - output.resize(olength + safety_zone); - o = (PCRE2_UCHAR16 *)output.ptrw(); - res = pcre2_substitute_16(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); - } - - pcre2_match_data_free_16(match); - pcre2_match_context_free_16(mctx); - - if (res < 0) - return String(); - - } else { + pcre2_code_32 *c = (pcre2_code_32 *)code; + pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; + pcre2_match_context_32 *mctx = pcre2_match_context_create_32(gctx); + PCRE2_SPTR32 s = (PCRE2_SPTR32)p_subject.get_data(); + PCRE2_SPTR32 r = (PCRE2_SPTR32)p_replacement.get_data(); + PCRE2_UCHAR32 *o = (PCRE2_UCHAR32 *)output.ptrw(); - pcre2_code_32 *c = (pcre2_code_32 *)code; - pcre2_general_context_32 *gctx = (pcre2_general_context_32 *)general_ctx; - pcre2_match_context_32 *mctx = pcre2_match_context_create_32(gctx); - PCRE2_SPTR32 s = (PCRE2_SPTR32)p_subject.c_str(); - PCRE2_SPTR32 r = (PCRE2_SPTR32)p_replacement.c_str(); - PCRE2_UCHAR32 *o = (PCRE2_UCHAR32 *)output.ptrw(); + pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx); - pcre2_match_data_32 *match = pcre2_match_data_create_from_pattern_32(c, gctx); + int res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); - int res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); + if (res == PCRE2_ERROR_NOMEMORY) { + output.resize(olength + safety_zone); + o = (PCRE2_UCHAR32 *)output.ptrw(); + res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); + } - if (res == PCRE2_ERROR_NOMEMORY) { - output.resize(olength + safety_zone); - o = (PCRE2_UCHAR32 *)output.ptrw(); - res = pcre2_substitute_32(c, s, length, p_offset, flags, match, mctx, r, p_replacement.length(), o, &olength); - } + pcre2_match_data_free_32(match); + pcre2_match_context_free_32(mctx); - pcre2_match_data_free_32(match); - pcre2_match_context_free_32(mctx); - - if (res < 0) - return String(); + if (res < 0) { + return String(); } return String(output.ptr(), olength); } bool RegEx::is_valid() const { - - return (code != NULL); + return (code != nullptr); } String RegEx::get_pattern() const { - return pattern; } int RegEx::get_group_count() const { - ERR_FAIL_COND_V(!is_valid(), 0); uint32_t count; @@ -451,13 +341,12 @@ int RegEx::get_group_count() const { } Array RegEx::get_names() const { - Array result; ERR_FAIL_COND_V(!is_valid(), result); uint32_t count; - const CharType *table; + const char32_t *table; uint32_t entry_size; _pattern_info(PCRE2_INFO_NAMECOUNT, &count); @@ -465,7 +354,6 @@ Array RegEx::get_names() const { _pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &entry_size); for (uint32_t i = 0; i < count; i++) { - String name = &table[i * entry_size + 1]; if (result.find(name) < 0) { result.append(name); @@ -476,50 +364,24 @@ Array RegEx::get_names() const { } RegEx::RegEx() { - - if (sizeof(CharType) == 2) { - - general_ctx = pcre2_general_context_create_16(&_regex_malloc, &_regex_free, NULL); - - } else { - - general_ctx = pcre2_general_context_create_32(&_regex_malloc, &_regex_free, NULL); - } - code = NULL; + general_ctx = pcre2_general_context_create_32(&_regex_malloc, &_regex_free, nullptr); + code = nullptr; } RegEx::RegEx(const String &p_pattern) { - - if (sizeof(CharType) == 2) { - - general_ctx = pcre2_general_context_create_16(&_regex_malloc, &_regex_free, NULL); - - } else { - - general_ctx = pcre2_general_context_create_32(&_regex_malloc, &_regex_free, NULL); - } - code = NULL; + general_ctx = pcre2_general_context_create_32(&_regex_malloc, &_regex_free, nullptr); + code = nullptr; compile(p_pattern); } RegEx::~RegEx() { - - if (sizeof(CharType) == 2) { - - if (code) - pcre2_code_free_16((pcre2_code_16 *)code); - pcre2_general_context_free_16((pcre2_general_context_16 *)general_ctx); - - } else { - - if (code) - pcre2_code_free_32((pcre2_code_32 *)code); - pcre2_general_context_free_32((pcre2_general_context_32 *)general_ctx); + if (code) { + pcre2_code_free_32((pcre2_code_32 *)code); } + pcre2_general_context_free_32((pcre2_general_context_32 *)general_ctx); } void RegEx::_bind_methods() { - ClassDB::bind_method(D_METHOD("clear"), &RegEx::clear); ClassDB::bind_method(D_METHOD("compile", "pattern"), &RegEx::compile); ClassDB::bind_method(D_METHOD("search", "subject", "offset", "end"), &RegEx::search, DEFVAL(0), DEFVAL(-1)); |