summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/nrex/README.md4
-rw-r--r--drivers/nrex/nrex.cpp101
-rw-r--r--drivers/nrex/nrex.hpp41
-rw-r--r--drivers/nrex/regex.cpp6
-rw-r--r--drivers/nrex/regex.h2
5 files changed, 108 insertions, 46 deletions
diff --git a/drivers/nrex/README.md b/drivers/nrex/README.md
index 951b301c1e..9ff67992dc 100644
--- a/drivers/nrex/README.md
+++ b/drivers/nrex/README.md
@@ -1,5 +1,7 @@
# NREX: Node RegEx
+Version 0.1
+
Small node-based regular expression library. It only does text pattern
matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp`
and `nrex_config.h` to your project and follow the example:
@@ -32,7 +34,7 @@ Currently supported features:
* Unicode `\uFFFF` code points
* Positive `(?=)` and negative `(?!)` lookahead
* Positive `(?<=)` and negative `(?<!)` lookbehind (fixed length and no alternations)
- * Backreferences `\1` to `\9` (with option to expand to `\99`)
+ * Backreferences `\1` and `\g{1}` (limited by default to 9 - can be unlimited)
## License
diff --git a/drivers/nrex/nrex.cpp b/drivers/nrex/nrex.cpp
index 104e07f887..1eb9ec38c8 100644
--- a/drivers/nrex/nrex.cpp
+++ b/drivers/nrex/nrex.cpp
@@ -1,4 +1,5 @@
// NREX: Node RegEx
+// Version 0.1
//
// Copyright (c) 2015, Zher Huei Lee
// All rights reserved.
@@ -299,6 +300,10 @@ struct nrex_node_group : public nrex_node
{
length = 1;
}
+ if (mode == LookAhead || mode == LookBehind)
+ {
+ quantifiable = false;
+ }
}
virtual ~nrex_node_group()
@@ -322,6 +327,10 @@ struct nrex_node_group : public nrex_node
int offset = 0;
if (mode == LookBehind)
{
+ if (pos < length)
+ {
+ return -1;
+ }
offset = length;
}
int res = childset[i]->test(s, pos - offset);
@@ -450,7 +459,7 @@ struct nrex_node_char : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos || s->at(pos) != ch)
+ if (s->end <= pos || 0 > pos || s->at(pos) != ch)
{
return -1;
}
@@ -473,7 +482,7 @@ struct nrex_node_range : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -555,7 +564,7 @@ struct nrex_node_class : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -727,7 +736,7 @@ struct nrex_node_shorthand : public nrex_node
int test(nrex_search* s, int pos) const
{
- if (s->end == pos)
+ if (s->end <= pos || 0 > pos)
{
return -1;
}
@@ -811,16 +820,12 @@ struct nrex_node_quantifier : public nrex_node
int test(nrex_search* s, int pos) const
{
- return test_step(s, pos, 1);
+ return test_step(s, pos, 0, pos);
}
- int test_step(nrex_search* s, int pos, int level) const
+ int test_step(nrex_search* s, int pos, int level, int start) const
{
- if (max == 0)
- {
- return pos;
- }
- if ((max >= 1 && level > max) || pos > s->end)
+ if (pos > s->end)
{
return -1;
}
@@ -840,14 +845,26 @@ struct nrex_node_quantifier : public nrex_node
return res;
}
}
- int res = child->test(s, pos);
- if (s->complete)
+ if (max >= 0 && level > max)
{
- return res;
+ return -1;
+ }
+ if (level > 1 && level > min + 1 && pos == start)
+ {
+ return -1;
+ }
+ int res = pos;
+ if (level >= 1)
+ {
+ res = child->test(s, pos);
+ if (s->complete)
+ {
+ return res;
+ }
}
if (res >= 0)
{
- int res_step = test_step(s, res, level + 1);
+ int res_step = test_step(s, res, level + 1, start);
if (res_step >= 0)
{
return res_step;
@@ -983,6 +1000,13 @@ nrex::nrex()
{
}
+nrex::nrex(const nrex_char* pattern, int captures)
+ : _capturing(0)
+ , _root(NULL)
+{
+ compile(pattern, captures);
+}
+
nrex::~nrex()
{
if (_root)
@@ -1008,10 +1032,14 @@ void nrex::reset()
int nrex::capture_size() const
{
- return _capturing + 1;
+ if (_root)
+ {
+ return _capturing + 1;
+ }
+ return 0;
}
-bool nrex::compile(const nrex_char* pattern, bool extended)
+bool nrex::compile(const nrex_char* pattern, int captures)
{
reset();
nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
@@ -1053,7 +1081,7 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
NREX_COMPILE_ERROR("unrecognised qualifier for group");
}
}
- else if ((!extended && _capturing < 9) || (extended && _capturing < 99))
+ else if (captures >= 0 && _capturing < captures)
{
nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
stack.top()->add_child(group);
@@ -1190,15 +1218,6 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
}
else if (nrex_is_quantifier(c[0]))
{
- if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
- {
- if (c[0] == '{')
- {
- stack.top()->add_child(NREX_NEW(nrex_node_char('{')));
- continue;
- }
- NREX_COMPILE_ERROR("element not quantifiable");
- }
int min = 0;
int max = -1;
bool valid_quantifier = true;
@@ -1270,6 +1289,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
}
if (valid_quantifier)
{
+ if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
+ {
+ NREX_COMPILE_ERROR("element not quantifiable");
+ }
nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
if (min == max)
{
@@ -1323,20 +1346,26 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
++c;
}
- else if ('1' <= c[1] && c[1] <= '9')
+ else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
{
int ref = 0;
- if (extended && '0' <= c[2] && c[2] <= '9')
+ bool unclosed = false;
+ if (c[1] == 'g')
{
- ref = int(c[1] - '0') * 10 + int(c[2] - '0');
+ unclosed = true;
c = &c[2];
}
- else
+ while ('0' <= c[1] && c[1] <= '9')
{
- ref = int(c[1] - '0');
+ ref = ref * 10 + int(c[1] - '0');
++c;
}
- if (ref > _capturing)
+ if (c[1] == '}')
+ {
+ unclosed = false;
+ ++c;
+ }
+ if (ref > _capturing || ref <= 0 || unclosed)
{
NREX_COMPILE_ERROR("backreference to non-existent capture");
}
@@ -1377,6 +1406,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
{
+ if (!_root)
+ {
+ return false;
+ }
nrex_search s(str, captures);
if (end >= offset)
{
@@ -1386,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en
{
s.end = NREX_STRLEN(str);
}
- for (int i = offset; i < s.end; ++i)
+ for (int i = offset; i <= s.end; ++i)
{
for (int c = 0; c <= _capturing; ++c)
{
diff --git a/drivers/nrex/nrex.hpp b/drivers/nrex/nrex.hpp
index e26a61c39a..44e950c517 100644
--- a/drivers/nrex/nrex.hpp
+++ b/drivers/nrex/nrex.hpp
@@ -1,4 +1,5 @@
// NREX: Node RegEx
+// Version 0.1
//
// Copyright (c) 2015, Zher Huei Lee
// All rights reserved.
@@ -59,7 +60,32 @@ class nrex
int _capturing;
nrex_node* _root;
public:
+
+ /*!
+ * \brief Initialises an empty regex container
+ */
nrex();
+
+ /*!
+ * \brief Initialises and compiles the regex pattern
+ *
+ * This calls nrex::compile() with the same arguments. To check whether
+ * the compilation was successfull, use nrex::valid().
+ *
+ * If the NREX_THROW_ERROR was defined it would automatically throw a
+ * runtime error nrex_compile_error if it encounters a problem when
+ * parsing the pattern.
+ *
+ * \param pattern The regex pattern
+ * \param captures The maximum number of capture groups to allow. Any
+ * extra would be converted to non-capturing groups.
+ * If negative, no limit would be imposed. Defaults
+ * to 9.
+ *
+ * \see nrex::compile()
+ */
+ nrex(const nrex_char* pattern, int captures = 9);
+
~nrex();
/*!
@@ -78,9 +104,9 @@ class nrex
*
* This is used to provide the array size of the captures needed for
* nrex::match() to work. The size is actually the number of capture
- * groups + one for the matching of the entire pattern. The result is
- * always capped at 10 or 100, depending on the extend option given in
- * nrex::compile() (default 10).
+ * groups + one for the matching of the entire pattern. This can be
+ * capped using the extra argument given in nrex::compile()
+ * (default 10).
*
* \return The number of captures
*/
@@ -97,12 +123,13 @@ class nrex
* parsing the pattern.
*
* \param pattern The regex pattern
- * \param extended If true, raises the limit on number of capture
- * groups and back-references to 99. Otherwise limited
- * to 9. Defaults to false.
+ * \param captures The maximum number of capture groups to allow. Any
+ * extra would be converted to non-capturing groups.
+ * If negative, no limit would be imposed. Defaults
+ * to 9.
* \return True if the pattern was succesfully compiled
*/
- bool compile(const nrex_char* pattern, bool extended = false);
+ bool compile(const nrex_char* pattern, int captures = 9);
/*!
* \brief Uses the pattern to search through the provided string
diff --git a/drivers/nrex/regex.cpp b/drivers/nrex/regex.cpp
index 246384b10a..e8578221a9 100644
--- a/drivers/nrex/regex.cpp
+++ b/drivers/nrex/regex.cpp
@@ -15,7 +15,7 @@
void RegEx::_bind_methods() {
- ObjectTypeDB::bind_method(_MD("compile","pattern", "expanded"),&RegEx::compile, DEFVAL(true));
+ ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9));
ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1));
ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
@@ -68,11 +68,11 @@ String RegEx::get_capture(int capture) const {
}
-Error RegEx::compile(const String& p_pattern, bool expanded) {
+Error RegEx::compile(const String& p_pattern, int capture) {
clear();
- exp.compile(p_pattern.c_str(), expanded);
+ exp.compile(p_pattern.c_str(), capture);
ERR_FAIL_COND_V( !exp.valid(), FAILED );
diff --git a/drivers/nrex/regex.h b/drivers/nrex/regex.h
index be52da8149..76aab2aea6 100644
--- a/drivers/nrex/regex.h
+++ b/drivers/nrex/regex.h
@@ -36,7 +36,7 @@ public:
bool is_valid() const;
int get_capture_count() const;
String get_capture(int capture) const;
- Error compile(const String& p_pattern, bool expanded = false);
+ Error compile(const String& p_pattern, int capture = 9);
int find(const String& p_text, int p_start = 0, int p_end = -1) const;
RegEx();