summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_match.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_match.c')
-rw-r--r--thirdparty/pcre2/src/pcre2_match.c77
1 files changed, 49 insertions, 28 deletions
diff --git a/thirdparty/pcre2/src/pcre2_match.c b/thirdparty/pcre2/src/pcre2_match.c
index e3f78c2ca3..f28cdbb47a 100644
--- a/thirdparty/pcre2/src/pcre2_match.c
+++ b/thirdparty/pcre2/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2015-2020 University of Cambridge
+ New API code Copyright (c) 2015-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -818,10 +818,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* N is now the frame of the recursion; the previous frame is at the
OP_RECURSE position. Go back there, copying the current subject position
- and mark, and move on past the OP_RECURSE. */
+ and mark, and the start_match position (\K might have changed it), and
+ then move on past the OP_RECURSE. */
P->eptr = Feptr;
P->mark = Fmark;
+ P->start_match = Fstart_match;
F = P;
Fecode += 1 + LINK_SIZE;
continue;
@@ -6115,8 +6117,8 @@ BOOL has_req_cu = FALSE;
BOOL startline;
#if PCRE2_CODE_UNIT_WIDTH == 8
-BOOL memchr_not_found_first_cu;
-BOOL memchr_not_found_first_cu2;
+PCRE2_SPTR memchr_found_first_cu;
+PCRE2_SPTR memchr_found_first_cu2;
#endif
PCRE2_UCHAR first_cu = 0;
@@ -6710,8 +6712,8 @@ start_partial = match_partial = NULL;
mb->hitend = FALSE;
#if PCRE2_CODE_UNIT_WIDTH == 8
-memchr_not_found_first_cu = FALSE;
-memchr_not_found_first_cu2 = FALSE;
+memchr_found_first_cu = NULL;
+memchr_found_first_cu2 = NULL;
#endif
for(;;)
@@ -6780,13 +6782,7 @@ for(;;)
}
}
- /* Not anchored. Advance to a unique first code unit if there is one. In
- 8-bit mode, the use of memchr() gives a big speed up, even though we have
- to call it twice in caseless mode, in order to find the earliest occurrence
- of the character in either of its cases. If a call to memchr() that
- searches the rest of the subject fails to find one case, remember that in
- order not to keep on repeating the search. This can make a huge difference
- when the strings are very long and only one case is present. */
+ /* Not anchored. Advance to a unique first code unit if there is one. */
else
{
@@ -6794,43 +6790,68 @@ for(;;)
{
if (first_cu != first_cu2) /* Caseless */
{
+ /* In 16-bit and 32_bit modes we have to do our own search, so can
+ look for both cases at once. */
+
#if PCRE2_CODE_UNIT_WIDTH != 8
PCRE2_UCHAR smc;
while (start_match < end_subject &&
(smc = UCHAR21TEST(start_match)) != first_cu &&
- smc != first_cu2)
+ smc != first_cu2)
start_match++;
+#else
+ /* In 8-bit mode, the use of memchr() gives a big speed up, even
+ though we have to call it twice in order to find the earliest
+ occurrence of the code unit in either of its cases. Caching is used
+ to remember the positions of previously found code units. This can
+ make a huge difference when the strings are very long and only one
+ case is actually present. */
-#else /* 8-bit code units */
PCRE2_SPTR pp1 = NULL;
PCRE2_SPTR pp2 = NULL;
- PCRE2_SIZE cu2size = end_subject - start_match;
+ PCRE2_SIZE searchlength = end_subject - start_match;
- if (!memchr_not_found_first_cu)
+ /* If we haven't got a previously found position for first_cu, or if
+ the current starting position is later, we need to do a search. If
+ the code unit is not found, set it to the end. */
+
+ if (memchr_found_first_cu == NULL ||
+ start_match > memchr_found_first_cu)
{
- pp1 = memchr(start_match, first_cu, end_subject - start_match);
- if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
- else cu2size = pp1 - start_match;
+ pp1 = memchr(start_match, first_cu, searchlength);
+ memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
}
- /* If pp1 is not NULL, we have arranged to search only as far as pp1,
- to see if the other case is earlier, so we can set "not found" only
- when both searches have returned NULL. */
+ /* If the start is before a previously found position, use the
+ previous position, or NULL if a previous search failed. */
+
+ else pp1 = (memchr_found_first_cu == end_subject)? NULL :
+ memchr_found_first_cu;
- if (!memchr_not_found_first_cu2)
+ /* Do the same thing for the other case. */
+
+ if (memchr_found_first_cu2 == NULL ||
+ start_match > memchr_found_first_cu2)
{
- pp2 = memchr(start_match, first_cu2, cu2size);
- memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
+ pp2 = memchr(start_match, first_cu2, searchlength);
+ memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
}
+ else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
+ memchr_found_first_cu2;
+
+ /* Set the start to the end of the subject if neither case was found.
+ Otherwise, use the earlier found point. */
+
if (pp1 == NULL)
start_match = (pp2 == NULL)? end_subject : pp2;
else
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
-#endif
+
+#endif /* 8-bit handling */
}
- /* The caseful case */
+ /* The caseful case is much simpler. */
else
{