diff options
author | Ovnuniarchos <pujalte.jorge@gmail.com> | 2015-07-26 20:16:38 +0200 |
---|---|---|
committer | Ovnuniarchos <pujalte.jorge@gmail.com> | 2015-07-26 20:16:38 +0200 |
commit | af132adf94da56ff231ce6f69f8098666cf7d9b0 (patch) | |
tree | d40e6ad16b9907fb9ad134f3189035a9f85113b8 /drivers/nrex | |
parent | d1a260c9660e98da2f8a0ba4af2dd2cde6aeb311 (diff) | |
parent | 07c99e11f5939699fefa10fab0b898ddc9246f85 (diff) |
Merge remote-tracking branch 'refs/remotes/origin/master'
Diffstat (limited to 'drivers/nrex')
-rw-r--r-- | drivers/nrex/README.md | 64 | ||||
-rw-r--r-- | drivers/nrex/SCsub | 9 | ||||
-rw-r--r-- | drivers/nrex/nrex.cpp | 910 | ||||
-rw-r--r-- | drivers/nrex/nrex.hpp | 144 | ||||
-rw-r--r-- | drivers/nrex/nrex_config.h | 12 | ||||
-rw-r--r-- | drivers/nrex/regex.cpp | 114 | ||||
-rw-r--r-- | drivers/nrex/regex.h | 47 |
7 files changed, 1300 insertions, 0 deletions
diff --git a/drivers/nrex/README.md b/drivers/nrex/README.md new file mode 100644 index 0000000000..f150a5d76f --- /dev/null +++ b/drivers/nrex/README.md @@ -0,0 +1,64 @@ +# NREX: Node RegEx + +Small node-based regular expression library. It only does text pattern +matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp` +and `nrex_config.h` to your project and follow the example: + + nrex regex; + regex.compile("^(fo+)bar$"); + + nrex_result captures[regex.capture_size()]; + if (regex.match("foobar", captures)) + { + std::cout << captures[0].start << std::endl; + std::cout << captures[0].length << std::endl; + } + +More details about its use is documented in `nrex.hpp` + +Currently supported features: + * Capturing `()` and non-capturing `(?:)` groups + * Any character `.` + * Shorthand caracter classes `\w\W\s\S\d\D` + * User-defined character classes such as `[A-Za-z]` + * Simple quantifiers `?`, `*` and `+` + * Range quantifiers `{0,1}` + * Lazy (non-greedy) quantifiers `*?` + * Begining `^` and end `$` anchors + * Alternation `|` + * Backreferences `\1` to `\99` + +To do list: + * Unicode `\uFFFF` code points + +## License + +Copyright (c) 2015, Zher Huei Lee +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/drivers/nrex/SCsub b/drivers/nrex/SCsub new file mode 100644 index 0000000000..2441d3061b --- /dev/null +++ b/drivers/nrex/SCsub @@ -0,0 +1,9 @@ + +Import('env') + +sources = [ + 'nrex.cpp', + 'regex.cpp', +] +env.add_source_files(env.drivers_sources, sources) + diff --git a/drivers/nrex/nrex.cpp b/drivers/nrex/nrex.cpp new file mode 100644 index 0000000000..696d46240e --- /dev/null +++ b/drivers/nrex/nrex.cpp @@ -0,0 +1,910 @@ +// NREX: Node RegEx +// +// Copyright (c) 2015, Zher Huei Lee +// All rights reserved. +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would +// be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not +// be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#include "nrex.hpp" + +#ifdef NREX_UNICODE +#include <wctype.h> +#include <wchar.h> +#define NREX_ISALPHANUM iswalnum +#define NREX_STRLEN wcslen +#else +#include <ctype.h> +#include <string.h> +#define NREX_ISALPHANUM isalnum +#define NREX_STRLEN strlen +#endif + +#ifdef NREX_THROW_ERROR +#define NREX_COMPILE_ERROR(M) throw nrex_compile_error(M) +#else +#define NREX_COMPILE_ERROR(M) reset(); return false +#endif + +#ifndef NREX_NEW +#define NREX_NEW(X) new X +#define NREX_NEW_ARRAY(X, N) new X[N] +#define NREX_DELETE(X) delete X +#define NREX_DELETE_ARRAY(X) delete[] X +#endif + +template<typename T> +class nrex_array +{ + private: + T* _data; + unsigned int _reserved; + unsigned int _size; + public: + nrex_array() + : _data(NREX_NEW_ARRAY(T, 2)) + , _reserved(2) + , _size(0) + { + } + + ~nrex_array() + { + NREX_DELETE_ARRAY(_data); + } + + unsigned int size() const + { + return _size; + } + + void reserve(unsigned int size) + { + T* old = _data; + _data = NREX_NEW_ARRAY(T, size); + _reserved = size; + for (unsigned int i = 0; i < _size; ++i) + { + _data[i] = old[i]; + } + NREX_DELETE_ARRAY(old); + } + + void push(T item) + { + if (_size == _reserved) + { + reserve(_reserved * 2); + } + _data[_size] = item; + _size++; + } + + T& top() + { + return _data[_size - 1]; + } + + const T& operator[] (unsigned int i) const + { + return _data[i]; + } + + void pop() + { + if (_size > 0) + { + --_size; + } + } +}; + +static nrex_char nrex_unescape(nrex_char repr) +{ + switch (repr) + { + case '^': return '^'; + case '$': return '$'; + case '(': return '('; + case ')': return ')'; + case '\\': return '\\'; + case '.': return '.'; + case '+': return '+'; + case '*': return '*'; + case '?': return '?'; + case '-': return '-'; + case 'a': return '\a'; + case 'e': return '\e'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + } + return 0; +} + +struct nrex_search +{ + public: + const nrex_char* str; + nrex_result* captures; + int end; + bool complete; + + nrex_char at(int pos) + { + return str[pos]; + } + + nrex_search(const nrex_char* str, nrex_result* captures) + : str(str) + , captures(captures) + , end(0) + { + } +}; + +struct nrex_node +{ + nrex_node* next; + nrex_node* previous; + nrex_node* parent; + bool quantifiable; + + nrex_node(bool quantify = false) + : next(NULL) + , previous(NULL) + , parent(NULL) + , quantifiable(quantify) + { + } + + virtual ~nrex_node() + { + if (next) + { + NREX_DELETE(next); + } + } + + virtual int test(nrex_search* s, int pos) const + { + return next ? next->test(s, pos) : -1; + } + + virtual int test_parent(nrex_search* s, int pos) const + { + if (next) + { + pos = next->test(s, pos); + } + if (parent && pos >= 0) + { + pos = parent->test_parent(s, pos); + } + if (pos >= 0) + { + s->complete = true; + } + return pos; + } +}; + +struct nrex_node_group : public nrex_node +{ + int capturing; + bool negate; + nrex_array<nrex_node*> childset; + nrex_node* back; + + nrex_node_group(int capturing) + : nrex_node(true) + , capturing(capturing) + , negate(false) + , back(NULL) + { + } + + virtual ~nrex_node_group() + { + for (unsigned int i = 0; i < childset.size(); ++i) + { + NREX_DELETE(childset[i]); + } + + } + + int test(nrex_search* s, int pos) const + { + if (capturing >= 0) + { + s->captures[capturing].start = pos; + } + for (unsigned int i = 0; i < childset.size(); ++i) + { + s->complete = false; + int res = childset[i]->test(s, pos); + if (s->complete) + { + return res; + } + if (negate) + { + if (res < 0) + { + res = pos + 1; + } + else + { + return -1; + } + } + if (res >= 0) + { + if (capturing >= 0) + { + s->captures[capturing].length = res - pos; + } + return next ? next->test(s, res) : res; + } + } + return -1; + } + + virtual int test_parent(nrex_search* s, int pos) const + { + if (capturing >= 0) + { + s->captures[capturing].length = pos - s->captures[capturing].start; + } + return nrex_node::test_parent(s, pos); + } + + void add_childset() + { + back = NULL; + } + + void add_child(nrex_node* node) + { + node->parent = this; + node->previous = back; + if (back) + { + back->next = node; + } + else + { + childset.push(node); + } + back = node; + } + + nrex_node* swap_back(nrex_node* node) + { + if (!back) + { + add_child(node); + return NULL; + } + nrex_node* old = back; + if (!old->previous) + { + childset.pop(); + } + back = old->previous; + add_child(node); + return old; + } +}; + +struct nrex_node_char : public nrex_node +{ + nrex_char ch; + + nrex_node_char(nrex_char c) + : nrex_node(true) + , ch(c) + { + } + + int test(nrex_search* s, int pos) const + { + if (s->end == pos || s->at(pos) != ch) + { + return -1; + } + return next ? next->test(s, pos + 1) : pos + 1; + } +}; + +struct nrex_node_range : public nrex_node +{ + nrex_char start; + nrex_char end; + + nrex_node_range(nrex_char s, nrex_char e) + : nrex_node(true) + , start(s) + , end(e) + { + } + + int test(nrex_search* s, int pos) const + { + if (s->end == pos) + { + return -1; + } + nrex_char c = s->at(pos); + if (c < start || end < c) + { + return -1; + } + return next ? next->test(s, pos + 1) : pos + 1; + } +}; + +static bool nrex_is_whitespace(nrex_char repr) +{ + switch (repr) + { + case ' ': + case '\t': + case '\r': + case '\n': + case '\f': + return true; + } + return false; +} + +static bool nrex_is_shorthand(nrex_char repr) +{ + switch (repr) + { + case 'W': + case 'w': + case 'D': + case 'd': + case 'S': + case 's': + return true; + } + return false; +} + +struct nrex_node_shorthand : public nrex_node +{ + nrex_char repr; + + nrex_node_shorthand(nrex_char c) + : nrex_node(true) + , repr(c) + { + } + + int test(nrex_search* s, int pos) const + { + if (s->end == pos) + { + return -1; + } + bool found = false; + bool invert = false; + nrex_char c = s->at(pos); + switch (repr) + { + case '.': + found = true; + break; + case 'W': + invert = true; + case 'w': + if (c == '_' || NREX_ISALPHANUM(c)) + { + found = true; + } + break; + case 'D': + invert = true; + case 'd': + if ('0' <= c && c <= '9') + { + found = true; + } + break; + case 'S': + invert = true; + case 's': + if (nrex_is_whitespace(c)) + { + found = true; + } + break; + } + if (found == invert) + { + return -1; + } + return next ? next->test(s, pos + 1) : pos + 1; + } +}; + +static bool nrex_is_quantifier(nrex_char repr) +{ + switch (repr) + { + case '?': + case '*': + case '+': + case '{': + return true; + } + return false; +} + +struct nrex_node_quantifier : public nrex_node +{ + int min; + int max; + bool greedy; + nrex_node* child; + + nrex_node_quantifier() + : nrex_node() + , min(0) + , max(0) + , greedy(true) + , child(NULL) + { + } + + virtual ~nrex_node_quantifier() + { + if (child) + { + NREX_DELETE(child); + } + } + + int test(nrex_search* s, int pos) const + { + nrex_array<int> backtrack; + backtrack.push(pos); + while (backtrack.top() <= s->end) + { + if (max >= 1 && backtrack.size() > (unsigned int)max) + { + break; + } + if (!greedy && (unsigned int)min < backtrack.size()) + { + int res = backtrack.top(); + if (next) + { + res = next->test(s, res); + } + if (s->complete) + { + return res; + } + if (res >= 0 && parent->test_parent(s, res) >= 0) + { + return res; + } + } + int res = child->test(s, backtrack.top()); + if (s->complete) + { + return res; + } + if (res < 0 || res == backtrack.top()) + { + break; + } + backtrack.push(res); + } + while (greedy && (unsigned int) min < backtrack.size()) + { + int res = backtrack.top(); + if (next) + { + res = next->test(s, res); + } + if (res >= 0 && parent->test_parent(s, res) >= 0) + { + return res; + } + if (s->complete) + { + return res; + } + backtrack.pop(); + } + return -1; + } +}; + +struct nrex_node_anchor : public nrex_node +{ + bool end; + + nrex_node_anchor(bool end) + : nrex_node() + , end(end) + { + } + + int test(nrex_search* s, int pos) const + { + if (!end && pos != 0) + { + return -1; + } + else if (end && pos != s->end) + { + return -1; + } + return next ? next->test(s, pos) : pos; + } +}; + +struct nrex_node_backreference : public nrex_node +{ + int ref; + + nrex_node_backreference(int ref) + : nrex_node(true) + , ref(ref) + { + } + + int test(nrex_search* s, int pos) const + { + nrex_result& r = s->captures[ref]; + for (int i = 0; i < r.length; ++i) + { + if (pos + i >= s->end) + { + return -1; + } + if (s->at(r.start + i) != s->at(pos + i)) + { + return -1; + } + } + return next ? next->test(s, pos + r.length) : pos + r.length; + } +}; + +nrex::nrex() + : _capturing(0) + , _root(NULL) +{ +} + +nrex::~nrex() +{ + if (_root) + { + NREX_DELETE(_root); + } +} + +bool nrex::valid() const +{ + return (_root != NULL); +} + +void nrex::reset() +{ + _capturing = 0; + if (_root) + { + NREX_DELETE(_root); + } + _root = NULL; +} + +int nrex::capture_size() const +{ + return _capturing + 1; +} + +bool nrex::compile(const nrex_char* pattern) +{ + reset(); + nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing)); + nrex_array<nrex_node_group*> stack; + stack.push(root); + _root = root; + + for (const nrex_char* c = pattern; c[0] != '\0'; ++c) + { + if (c[0] == '(') + { + if (c[1] == '?') + { + if (c[2] == ':') + { + c = &c[2]; + nrex_node_group* group = NREX_NEW(nrex_node_group(-1)); + stack.top()->add_child(group); + stack.push(group); + } + else + { + NREX_COMPILE_ERROR("unrecognised qualifier for parenthesis"); + } + } + else if (_capturing < 99) + { + nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing)); + stack.top()->add_child(group); + stack.push(group); + } + else + { + nrex_node_group* group = NREX_NEW(nrex_node_group(-1)); + stack.top()->add_child(group); + stack.push(group); + } + } + else if (c[0] == ')') + { + if (stack.size() > 1) + { + stack.pop(); + } + else + { + NREX_COMPILE_ERROR("unexpected ')'"); + } + } + else if (c[0] == '[') + { + nrex_node_group* group = NREX_NEW(nrex_node_group(-1)); + stack.top()->add_child(group); + if (c[1] == '^') + { + group->negate = true; + ++c; + } + while (true) + { + group->add_childset(); + ++c; + if (c[0] == '\0') + { + NREX_COMPILE_ERROR("unclosed character class '[]'"); + } + if (c[0] == ']') + { + break; + } + else if (c[0] == '\\') + { + nrex_char unescaped = nrex_unescape(c[1]); + if (unescaped) + { + group->add_child(NREX_NEW(nrex_node_char(unescaped))); + ++c; + } + else if (nrex_is_shorthand(c[1])) + { + group->add_child(NREX_NEW(nrex_node_shorthand(c[1]))); + ++c; + } + else + { + NREX_COMPILE_ERROR("escape token not recognised"); + } + } + else + { + if (c[1] == '-' && c[2] != '\0') + { + bool range = false; + if ('A' <= c[0] && c[0] <= 'Z' && 'A' <= c[2] && c[2] <= 'Z') + { + range = true; + } + if ('a' <= c[0] && c[0] <= 'z' && 'a' <= c[2] && c[2] <= 'z') + { + range = true; + } + if ('0' <= c[0] && c[0] <= '9' && '0' <= c[2] && c[2] <= '9') + { + range = true; + } + if (range) + { + group->add_child(NREX_NEW(nrex_node_range(c[0], c[2]))); + c = &c[2]; + continue; + } + } + group->add_child(NREX_NEW(nrex_node_char(c[0]))); + } + + } + } + else if (nrex_is_quantifier(c[0])) + { + nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier); + quant->child = stack.top()->swap_back(quant); + if (quant->child == NULL || !quant->child->quantifiable) + { + NREX_COMPILE_ERROR("element not quantifiable"); + } + quant->child->previous = NULL; + quant->child->next = NULL; + quant->child->parent = quant; + if (c[0] == '?') + { + quant->min = 0; + quant->max = 1; + } + else if (c[0] == '+') + { + quant->min = 1; + quant->max = -1; + } + else if (c[0] == '*') + { + quant->min = 0; + quant->max = -1; + } + else if (c[0] == '{') + { + bool max_set = false; + quant->min = 0; + quant->max = -1; + while (true) + { + ++c; + if (c[0] == '\0') + { + NREX_COMPILE_ERROR("unclosed range quantifier '{}'"); + } + else if (c[0] == '}') + { + break; + } + else if (c[0] == ',') + { + max_set = true; + continue; + } + else if (c[0] < '0' || '9' < c[0]) + { + NREX_COMPILE_ERROR("expected numeric digits, ',' or '}'"); + } + if (max_set) + { + if (quant->max < 0) + { + quant->max = int(c[0] - '0'); + } + else + { + quant->max = quant->max * 10 + int(c[0] - '0'); + } + } + else + { + quant->min = quant->min * 10 + int(c[0] - '0'); + } + } + if (!max_set) + { + quant->max = quant->min; + } + } + if (c[1] == '?') + { + quant->greedy = false; + ++c; + } + } + else if (c[0] == '|') + { + stack.top()->add_childset(); + } + else if (c[0] == '^' || c[0] == '$') + { + stack.top()->add_child(NREX_NEW(nrex_node_anchor((c[0] == '$')))); + } + else if (c[0] == '.') + { + stack.top()->add_child(NREX_NEW(nrex_node_shorthand('.'))); + } + else if (c[0] == '\\') + { + nrex_char unescaped = nrex_unescape(c[1]); + if (unescaped) + { + stack.top()->add_child(NREX_NEW(nrex_node_char(unescaped))); + ++c; + } + else if (nrex_is_shorthand(c[1])) + { + stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1]))); + ++c; + } + else if ('1' <= c[1] && c[1] <= '9') + { + int ref = 0; + if ('0' <= c[2] && c[2] <= '9') + { + ref = int(c[1] - '0') * 10 + int(c[2] - '0'); + c = &c[2]; + } + else + { + ref = int(c[1] - '0'); + ++c; + } + if (ref > _capturing) + { + NREX_COMPILE_ERROR("backreference to non-existent capture"); + } + stack.top()->add_child(NREX_NEW(nrex_node_backreference(ref))); + } + else + { + NREX_COMPILE_ERROR("escape token not recognised"); + } + } + else + { + stack.top()->add_child(NREX_NEW(nrex_node_char(c[0]))); + } + } + return true; +} + +bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const +{ + nrex_search s(str, captures); + if (end >= offset) + { + s.end = end; + } + else + { + s.end = NREX_STRLEN(str); + } + for (int i = offset; i < s.end; ++i) + { + for (int c = 0; c <= _capturing; ++c) + { + captures[c].start = 0; + captures[c].length = 0; + } + if (_root->test(&s, i) >= 0) + { + return true; + } + } + return false; +} diff --git a/drivers/nrex/nrex.hpp b/drivers/nrex/nrex.hpp new file mode 100644 index 0000000000..2a6aa08e1d --- /dev/null +++ b/drivers/nrex/nrex.hpp @@ -0,0 +1,144 @@ +// NREX: Node RegEx +// +// Copyright (c) 2015, Zher Huei Lee +// All rights reserved. +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would +// be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not +// be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef NREX_HPP +#define NREX_HPP + +#include "nrex_config.h" + +#ifdef NREX_UNICODE +typedef wchar_t nrex_char; +#else +typedef char nrex_char; +#endif + +/*! + * \brief Struct to contain the range of a capture result + * + * The range provided is relative to the begining of the searched string. + * + * \see nrex_node::match() + */ +struct nrex_result +{ + public: + int start; /*!< Start of text range */ + int length; /*!< Length of text range */ +}; + +class nrex_node; + +/*! + * \brief Holds the compiled regex pattern + */ +class nrex +{ + private: + int _capturing; + nrex_node* _root; + public: + nrex(); + ~nrex(); + + /*! + * \brief Removes the compiled regex and frees up the memory + */ + void reset(); + + /*! + * \brief Checks if there is a compiled regex being stored + * \return True if present, False if not present + */ + bool valid() const; + + /*! + * \brief Provides number of captures the compiled regex uses + * + * This is used to provide the array size of the captures needed for + * nrex::match() to work. The size is actually the number of capture + * groups + one for the matching of the entire pattern. The result is + * always capped at 100. + * + * \return The number of captures + */ + int capture_size() const; + + /*! + * \brief Compiles the provided regex pattern + * + * This automatically removes the existing compiled regex if already + * present. + * + * If the NREX_THROW_ERROR was defined it would automatically throw a + * runtime error nrex_compile_error if it encounters a problem when + * parsing the pattern. + * + * \param The regex pattern + * \return True if the pattern was succesfully compiled + */ + bool compile(const nrex_char* pattern); + + /*! + * \brief Uses the pattern to search through the provided string + * \param str The text to search through. It only needs to be + * null terminated if the end point is not provided. + * This also determines the starting anchor. + * \param captures The array of results to store the capture results. + * The size of that array needs to be the same as the + * size given in nrex::capture_size(). As it matches + * the function fills the array with the results. 0 is + * the result for the entire pattern, 1 and above + * corresponds to the regex capture group if present. + * \param offset The starting point of the search. This does not move + * the starting anchor. Defaults to 0. + * \param end The end point of the search. This also determines + * the ending anchor. If a number less than the offset + * is provided, the search would be done until null + * termination. Defaults to -1. + * \return True if a match was found. False otherwise. + */ + bool match(const nrex_char* str, nrex_result* captures, int offset = 0, int end = -1) const; +}; + +#ifdef NREX_THROW_ERROR + +#include <stdexcept> + +class nrex_compile_error : std::runtime_error +{ + public: + nrex_compile_error(const char* message) + : std::runtime_error(message) + { + } + + ~nrex_compile_error() throw() + { + } +}; + +#endif + +#endif // NREX_HPP diff --git a/drivers/nrex/nrex_config.h b/drivers/nrex/nrex_config.h new file mode 100644 index 0000000000..540f34f8b4 --- /dev/null +++ b/drivers/nrex/nrex_config.h @@ -0,0 +1,12 @@ +// Godot-specific configuration +// To use this, replace nrex_config.h + +#include "core/os/memory.h" + +#define NREX_UNICODE +//#define NREX_THROW_ERROR + +#define NREX_NEW(X) memnew(X) +#define NREX_NEW_ARRAY(X, N) memnew_arr(X, N) +#define NREX_DELETE(X) memdelete(X) +#define NREX_DELETE_ARRAY(X) memdelete_arr(X) diff --git a/drivers/nrex/regex.cpp b/drivers/nrex/regex.cpp new file mode 100644 index 0000000000..0a813c3490 --- /dev/null +++ b/drivers/nrex/regex.cpp @@ -0,0 +1,114 @@ +/*************************************************/ +/* regex.cpp */ +/*************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/*************************************************/ +/* Source code within this file is: */ +/* (c) 2007-2010 Juan Linietsky, Ariel Manzur */ +/* All Rights Reserved. */ +/*************************************************/ + +#include "regex.h" +#include "nrex.hpp" +#include "core/os/memory.h" + +void RegEx::_bind_methods() { + + ObjectTypeDB::bind_method(_MD("compile","pattern"),&RegEx::compile); + ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1)); + ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear); + ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid); + ObjectTypeDB::bind_method(_MD("get_capture_count"),&RegEx::get_capture_count); + ObjectTypeDB::bind_method(_MD("get_capture","capture"),&RegEx::get_capture); + ObjectTypeDB::bind_method(_MD("get_captures"),&RegEx::_bind_get_captures); + +}; + +StringArray RegEx::_bind_get_captures() const { + + StringArray ret; + int count = get_capture_count(); + for (int i=0; i<count; i++) { + + String c = get_capture(i); + ret.push_back(c); + }; + + return ret; + +}; + +void RegEx::clear() { + + text.clear(); + captures.clear(); + exp.reset(); + +}; + +bool RegEx::is_valid() const { + + return exp.valid(); + +}; + +int RegEx::get_capture_count() const { + + return exp.capture_size(); +} + +String RegEx::get_capture(int capture) const { + + ERR_FAIL_COND_V( get_capture_count() <= capture, String() ); + + return text.substr(captures[capture].start, captures[capture].length); + +} + +Error RegEx::compile(const String& p_pattern) { + + clear(); + + exp.compile(p_pattern.c_str()); + + ERR_FAIL_COND_V( !exp.valid(), FAILED ); + + captures.resize(exp.capture_size()); + + return OK; + +}; + +int RegEx::find(const String& p_text, int p_start, int p_end) const { + + ERR_FAIL_COND_V( !exp.valid(), -1 ); + ERR_FAIL_COND_V( p_text.length() < p_start, -1 ); + ERR_FAIL_COND_V( p_text.length() < p_end, -1 ); + + bool res = exp.match(p_text.c_str(), &captures[0], p_start, p_end); + + if (res) { + text = p_text; + return captures[0].start; + } + text.clear(); + return -1; + +}; + +RegEx::RegEx(const String& p_pattern) { + + compile(p_pattern); + +}; + +RegEx::RegEx() { + +}; + +RegEx::~RegEx() { + + clear(); + +}; diff --git a/drivers/nrex/regex.h b/drivers/nrex/regex.h new file mode 100644 index 0000000000..0626029705 --- /dev/null +++ b/drivers/nrex/regex.h @@ -0,0 +1,47 @@ +/*************************************************/ +/* regex.h */ +/*************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/*************************************************/ +/* Source code within this file is: */ +/* (c) 2007-2010 Juan Linietsky, Ariel Manzur */ +/* All Rights Reserved. */ +/*************************************************/ + +#ifndef REGEX_H +#define REGEX_H + +#include "ustring.h" +#include "vector.h" +#include "core/reference.h" +#include "nrex.hpp" + +class RegEx : public Reference { + + OBJ_TYPE(RegEx, Reference); + + mutable String text; + mutable Vector<nrex_result> captures; + nrex exp; + +protected: + + static void _bind_methods(); + StringArray _bind_get_captures() const; + +public: + + void clear(); + bool is_valid() const; + int get_capture_count() const; + String get_capture(int capture) const; + Error compile(const String& p_pattern); + int find(const String& p_text, int p_start = 0, int p_end = -1) const; + + RegEx(); + RegEx(const String& p_pattern); + ~RegEx(); +}; + +#endif // REGEX_H |