summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_compile.c')
-rw-r--r--thirdparty/pcre2/src/pcre2_compile.c336
1 files changed, 233 insertions, 103 deletions
diff --git a/thirdparty/pcre2/src/pcre2_compile.c b/thirdparty/pcre2/src/pcre2_compile.c
index 87530fb584..6bb1de3610 100644
--- a/thirdparty/pcre2/src/pcre2_compile.c
+++ b/thirdparty/pcre2/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Other debugging code can be enabled by these defines. */
-// #define DEBUG_SHOW_CAPTURES
-// #define DEBUG_SHOW_PARSED
+/* #define DEBUG_SHOW_CAPTURES */
+/* #define DEBUG_SHOW_PARSED */
/* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */
@@ -250,34 +250,35 @@ is present where expected in a conditional group. */
#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
/* These must be kept in this order, with consecutive values, and the _ARG
-versions of PRUNE, SKIP, and THEN immediately after their non-argument
+versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */
#define META_MARK 0x80260000u /* (*MARK) */
#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
-#define META_COMMIT 0x80280000u /* (*COMMIT) */
-#define META_FAIL 0x80290000u /* (*FAIL) */
-#define META_PRUNE 0x802a0000u /* These pairs must */
-#define META_PRUNE_ARG 0x802b0000u /* be */
-#define META_SKIP 0x802c0000u /* kept */
-#define META_SKIP_ARG 0x802d0000u /* in */
-#define META_THEN 0x802e0000u /* this */
-#define META_THEN_ARG 0x802f0000u /* order */
+#define META_FAIL 0x80280000u /* (*FAIL) */
+#define META_COMMIT 0x80290000u /* These */
+#define META_COMMIT_ARG 0x802a0000u /* pairs */
+#define META_PRUNE 0x802b0000u /* must */
+#define META_PRUNE_ARG 0x802c0000u /* be */
+#define META_SKIP 0x802d0000u /* kept */
+#define META_SKIP_ARG 0x802e0000u /* in */
+#define META_THEN 0x802f0000u /* this */
+#define META_THEN_ARG 0x80300000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */
-#define META_ASTERISK 0x80300000u /* * */
-#define META_ASTERISK_PLUS 0x80310000u /* *+ */
-#define META_ASTERISK_QUERY 0x80320000u /* *? */
-#define META_PLUS 0x80330000u /* + */
-#define META_PLUS_PLUS 0x80340000u /* ++ */
-#define META_PLUS_QUERY 0x80350000u /* +? */
-#define META_QUERY 0x80360000u /* ? */
-#define META_QUERY_PLUS 0x80370000u /* ?+ */
-#define META_QUERY_QUERY 0x80380000u /* ?? */
-#define META_MINMAX 0x80390000u /* {n,m} repeat */
-#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */
-#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */
+#define META_ASTERISK 0x80310000u /* * */
+#define META_ASTERISK_PLUS 0x80320000u /* *+ */
+#define META_ASTERISK_QUERY 0x80330000u /* *? */
+#define META_PLUS 0x80340000u /* + */
+#define META_PLUS_PLUS 0x80350000u /* ++ */
+#define META_PLUS_QUERY 0x80360000u /* +? */
+#define META_QUERY 0x80370000u /* ? */
+#define META_QUERY_PLUS 0x80380000u /* ?+ */
+#define META_QUERY_QUERY 0x80390000u /* ?? */
+#define META_MINMAX 0x803a0000u /* {n,m} repeat */
+#define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
+#define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
#define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
@@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
SIZEOFFSET, /* META_LOOKBEHINDNOT */
1, /* META_MARK - plus the string length */
0, /* META_ACCEPT */
- 0, /* META_COMMIT */
0, /* META_FAIL */
+ 0, /* META_COMMIT */
+ 1, /* META_COMMIT_ARG - plus the string length */
0, /* META_PRUNE */
1, /* META_PRUNE_ARG - plus the string length */
0, /* META_SKIP */
@@ -510,17 +512,17 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
- CHAR_GRAVE_ACCENT, ESC_a,
+ CHAR_GRAVE_ACCENT, CHAR_BEL,
-ESC_b, 0,
- -ESC_d, ESC_e,
- ESC_f, 0,
+ -ESC_d, CHAR_ESC,
+ CHAR_FF, 0,
-ESC_h, 0,
0, -ESC_k,
0, 0,
- ESC_n, 0,
+ CHAR_LF, 0,
-ESC_p, 0,
- ESC_r, -ESC_s,
- ESC_tee, 0,
+ CHAR_CR, -ESC_s,
+ CHAR_HT, 0,
-ESC_v, -ESC_w,
0, 0,
-ESC_z
@@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif
static const short int escapes[] = {
-/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
-/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
-/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
-/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
-/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
-/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
-/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
-/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
-/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
-/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
-/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P,
-/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
-/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
-/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
-/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
-/* F8 */ 0, 0
+/* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0,
+/* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0,
+/* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p,
+/* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0,
+/* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0,
+/* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0,
+/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
+/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
+/* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
+/* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0,
+/* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P,
+/* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0,
+/* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X,
+/* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0,
+/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
+/* F8 */ 0, 0
};
/* We also need a table of characters that may follow \c in an EBCDIC
@@ -586,9 +588,9 @@ static const char verbnames[] =
"\0" /* Empty name is a shorthand for MARK */
STRING_MARK0
STRING_ACCEPT0
- STRING_COMMIT0
STRING_F0
STRING_FAIL0
+ STRING_COMMIT0
STRING_PRUNE0
STRING_SKIP0
STRING_THEN;
@@ -596,11 +598,11 @@ static const char verbnames[] =
static const verbitem verbs[] = {
{ 0, META_MARK, +1 }, /* > 0 => must have an argument */
{ 4, META_MARK, +1 },
- { 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */
- { 6, META_COMMIT, -1 },
+ { 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */
{ 1, META_FAIL, -1 },
{ 4, META_FAIL, -1 },
- { 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */
+ { 6, META_COMMIT, 0 },
+ { 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */
{ 4, META_SKIP, 0 },
{ 4, META_THEN, 0 }
};
@@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
/* Verb opcodes, indexed by their META code offset from META_MARK. */
static const uint32_t verbops[] = {
- OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
- OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
+ OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
+ OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
@@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
- ERR91, ERR92};
+ ERR91, ERR92, ERR93, ERR94 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -976,8 +978,8 @@ for (;;)
case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
- case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
+ case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
@@ -1067,6 +1069,10 @@ for (;;)
fprintf(stderr, "META (*MARK:");
goto SHOWARG;
+ case META_COMMIT_ARG:
+ fprintf(stderr, "META (*COMMIT:");
+ goto SHOWARG;
+
case META_PRUNE_ARG:
fprintf(stderr, "META (*PRUNE:");
goto SHOWARG;
@@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
escape = -i; /* Else return a special escape */
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
+
+ /* Perl supports \N{name} for character names and \N{U+dddd} for numerical
+ Unicode code points, as well as plain \N for "not newline". PCRE does not
+ support \N{name}. However, it does support quantification such as \N{2,3},
+ so if \N{ is not followed by U+dddd we check for a quantifier. */
+
+ if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
+ {
+ PCRE2_SPTR p = ptr + 1;
+
+ /* \N{U+ can be handled by the \x{ code. However, this construction is
+ not valid in EBCDIC environments because it specifies a Unicode
+ character, not a codepoint in the local code. For example \N{U+0041}
+ must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
+ casing semantics for the entire pattern, so allow it only in UTF (i.e.
+ Unicode) mode. */
+
+ if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
+ {
+#ifdef EBCDIC
+ *errorcodeptr = ERR93;
+#else
+ if (utf)
+ {
+ ptr = p + 1;
+ escape = 0; /* Not a fancy escape after all */
+ goto COME_FROM_NU;
+ }
+ else *errorcodeptr = ERR93;
+#endif
+ }
+
+ /* Give an error if what follows is not a quantifier, but don't override
+ an error set by the quantifier reader (e.g. number overflow). */
+
+ else
+ {
+ if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
+ *errorcodeptr == 0)
+ *errorcodeptr = ERR37;
+ }
+ }
}
}
@@ -1462,6 +1510,7 @@ else
/* A number of Perl escapes are not handled by PCRE. We give an explicit
error. */
+ case CHAR_F:
case CHAR_l:
case CHAR_L:
*errorcodeptr = ERR37;
@@ -1719,6 +1768,9 @@ else
{
if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
+#ifndef EBCDIC
+ COME_FROM_NU:
+#endif
if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
{
*errorcodeptr = ERR78;
@@ -1852,19 +1904,6 @@ else
}
}
-/* Perl supports \N{name} for character names, as well as plain \N for "not
-newline". PCRE does not support \N{name}. However, it does support
-quantification such as \N{2,3}. */
-
-if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
- ptrend - ptr > 2)
- {
- PCRE2_SPTR p = ptr + 1;
- if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
- *errorcodeptr == 0)
- *errorcodeptr = ERR37;
- }
-
/* Set the pointer to the next character before returning. */
*ptrptr = ptr;
@@ -2251,11 +2290,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-/* Of the options that are changeable within the pattern, these are tracked
-during parsing. The rest are used from META_OPTIONS items when compiling. */
+/* Options that are changeable within the pattern must be tracked during
+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
+but all must be tracked so that META_OPTIONS items set the correct values for
+the main compiling phase. */
-#define PARSE_TRACKED_OPTIONS \
- (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+ PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t meta_quantifier = 0;
+uint32_t add_after_mark = 0;
uint16_t nest_depth = 0;
int after_manual_callout = 0;
int expect_cond_assert = 0;
@@ -2434,11 +2477,17 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
- /* OR: character > 255 */
- c > 255 ||
- /* OR: not a # comment or white space */
- (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
- ))
+#ifdef SUPPORT_UNICODE
+ /* OR: character > 255 AND not Unicode Pattern White Space */
+ (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
+#endif
+ /* OR: not a # comment or isspace() white space */
+ (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
+#ifdef SUPPORT_UNICODE
+ /* and not CHAR_NEL when Unicode is supported */
+ && c != CHAR_NEL
+#endif
+ )))
{
PCRE2_SIZE verbnamelength;
@@ -2461,6 +2510,16 @@ while (ptr < ptrend)
goto FAILED;
}
*verblengthptr = (uint32_t)verbnamelength;
+
+ /* If this name was on a verb such as (*ACCEPT) which does not continue,
+ a (*MARK) was generated for the name. We now add the original verb as the
+ next item. */
+
+ if (add_after_mark != 0)
+ {
+ *parsed_pattern++ = add_after_mark;
+ add_after_mark = 0;
+ }
break;
case CHAR_BACKSLASH:
@@ -2510,11 +2569,18 @@ while (ptr < ptrend)
/* Skip over whitespace and # comments in extended mode. Note that c is a
character, not a code unit, so we must not use MAX_255 to test its size
- because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
+ whitespace characters are those designated as "Pattern White Space" by
+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+ subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
+#ifdef SUPPORT_UNICODE
+ if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
+#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@@ -3206,7 +3272,6 @@ while (ptr < ptrend)
tempptr = ptr;
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
options, TRUE, cb);
-
if (errorcode != 0)
{
CLASS_ESCAPE_FAILED:
@@ -3454,13 +3519,25 @@ while (ptr < ptrend)
if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
{
- if (verbs[i].has_arg < 0) /* Argument is forbidden */
+ /* Some optional arguments can be treated as a preceding (*MARK) */
+
+ if (verbs[i].has_arg < 0)
{
- errorcode = ERR59;
- goto FAILED;
+ add_after_mark = verbs[i].meta;
+ *parsed_pattern++ = META_MARK;
}
- *parsed_pattern++ = verbs[i].meta +
- ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+
+ /* The remaining verbs with arguments (except *MARK) need a different
+ opcode. */
+
+ else
+ {
+ *parsed_pattern++ = verbs[i].meta +
+ ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+ }
+
+ /* Set up for reading the name in the main loop. */
+
verblengthptr = parsed_pattern++;
verbnamestart = ptr;
inverbname = TRUE;
@@ -3521,17 +3598,39 @@ while (ptr < ptrend)
else
{
+ BOOL hyphenok = TRUE;
+ uint32_t oldoptions = options;
+
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
optset = &set;
+ /* ^ at the start unsets imnsx and disables the subsequent use of - */
+
+ if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
+ {
+ options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
+ PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
+ hyphenok = FALSE;
+ ptr++;
+ }
+
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
*ptr != CHAR_COLON)
{
switch (*ptr++)
{
- case CHAR_MINUS: optset = &unset; break;
+ case CHAR_MINUS:
+ if (!hyphenok)
+ {
+ errorcode = ERR94;
+ ptr--; /* Correct the offset */
+ goto FAILED;
+ }
+ optset = &unset;
+ hyphenok = FALSE;
+ break;
case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE2_DUPNAMES;
@@ -3591,7 +3690,7 @@ while (ptr < ptrend)
/* If nothing changed, no need to record. */
- if (set != 0 || unset != 0)
+ if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;
@@ -3896,9 +3995,8 @@ while (ptr < ptrend)
if (*ptr == CHAR_DOT)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
- if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
- goto FAILED;
- if (minor < 10) minor *= 10;
+ minor = (*ptr++ - CHAR_0) * 10;
+ if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
}
@@ -4261,11 +4359,11 @@ goto FAILED;
/*************************************************
-* Find first significant op code *
+* Find first significant opcode *
*************************************************/
/* This is called by several functions that scan a compiled expression looking
-for a fixed first character, or an anchoring op code etc. It skips over things
+for a fixed first character, or an anchoring opcode etc. It skips over things
that do not influence this. For some calls, it makes sense to skip negative
forward and all backward assertions, and also the \b assertion; for others it
does not.
@@ -5472,7 +5570,7 @@ for (;; pptr++)
set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
of the extra data and reset the pointer. This is so that very large
classes that contain a zillion wide characters or Unicode property tests
- do not overwrite the work space (which is on the stack). */
+ do not overwrite the workspace (which is on the stack). */
if (class_uchardata > class_uchardata_base)
{
@@ -5563,7 +5661,7 @@ for (;; pptr++)
if (class_has_8bitchar > 0)
{
*code++ |= XCL_MAP;
- memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
+ (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop)
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@@ -5655,6 +5753,7 @@ for (;; pptr++)
cb->had_pruneorskip = TRUE;
/* Fall through */
case META_MARK:
+ case META_COMMIT_ARG:
VERB_ARG:
*code++ = verbops[(meta - META_MARK) >> 16];
/* The length is in characters. */
@@ -6509,7 +6608,7 @@ for (;; pptr++)
/* Wrap the recursion call in OP_BRA brackets. */
- memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
+ (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
op_previous = *previous = OP_BRA;
PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET;
@@ -6589,7 +6688,7 @@ for (;; pptr++)
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{
- memmove(previous + 1, previous, CU2BYTES(len));
+ (void)memmove(previous + 1, previous, CU2BYTES(len));
code++;
if (repeat_max == 0)
{
@@ -6610,7 +6709,7 @@ for (;; pptr++)
else
{
int linkoffset;
- memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
+ (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA;
@@ -6811,7 +6910,7 @@ for (;; pptr++)
if (*bracode == OP_COND || *bracode == OP_SCOND)
{
int nlen = (int)(code - bracode);
- memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
+ (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@@ -7082,7 +7181,7 @@ for (;; pptr++)
else
{
- memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
+ (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE;
@@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
beginning. We accumulate in a local variable to save frequent testing of
lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
start and end of each alternative, because compiled items are discarded during
-the pre-compile phase so that the work space is not exceeded. */
+the pre-compile phase so that the workspace is not exceeded. */
length = 2 + 2*LINK_SIZE + skipunits;
@@ -7622,7 +7721,7 @@ for (;;)
{
if (cb->open_caps->flag)
{
- memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
+ (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE;
code += 1 + LINK_SIZE;
@@ -7765,10 +7864,11 @@ do {
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
}
- /* Condition */
+ /* Condition. If there is no second branch, it can't be anchored. */
- else if (op == OP_COND)
+ else if (op == OP_COND || op == OP_SCOND)
{
+ if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE;
}
@@ -8003,6 +8103,7 @@ for (;;)
break;
case OP_MARK:
+ case OP_COMMIT_ARG:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
@@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
if (crc < 0)
{
- memmove(slot + cb->name_entry_size, slot,
+ (void)memmove(slot + cb->name_entry_size, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size));
break;
}
@@ -8311,6 +8412,7 @@ for (;; pptr++)
break;
case META_MARK: /* Add the length of the name. */
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@@ -8501,6 +8603,7 @@ for (;; pptr++)
goto EXIT;
case META_MARK:
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG:
@@ -8572,6 +8675,32 @@ for (;; pptr++)
case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED;
+
+ /* Also ignore any qualifiers that follow a lookahead assertion. */
+
+ switch (pptr[1])
+ {
+ case META_ASTERISK:
+ case META_ASTERISK_PLUS:
+ case META_ASTERISK_QUERY:
+ case META_PLUS:
+ case META_PLUS_PLUS:
+ case META_PLUS_QUERY:
+ case META_QUERY:
+ case META_QUERY_PLUS:
+ case META_QUERY_QUERY:
+ pptr++;
+ break;
+
+ case META_MINMAX:
+ case META_MINMAX_PLUS:
+ case META_MINMAX_QUERY:
+ pptr += 3;
+ break;
+
+ default:
+ break;
+ }
break;
/* Lookbehinds can be ignored, but must themselves be checked. */
@@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
break;
case META_MARK:
+ case META_COMMIT_ARG:
case META_PRUNE_ARG:
case META_SKIP_ARG:
case META_THEN_ARG: