summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_substitute.c
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_substitute.c')
-rw-r--r--thirdparty/pcre2/src/pcre2_substitute.c67
1 files changed, 54 insertions, 13 deletions
diff --git a/thirdparty/pcre2/src/pcre2_substitute.c b/thirdparty/pcre2/src/pcre2_substitute.c
index ab8d10908a..ec3dd66df9 100644
--- a/thirdparty/pcre2/src/pcre2_substitute.c
+++ b/thirdparty/pcre2/src/pcre2_substitute.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2018 University of Cambridge
+ New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -129,7 +129,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, FALSE, NULL);
+ code->overall_options, code->extra_options, FALSE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -239,13 +239,17 @@ PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[3];
+pcre2_substitute_callout_block scb;
+
+/* General initialization */
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
-/* Partial matching is not valid. */
+/* Partial matching is not valid. This must come after setting *blength to
+PCRE2_UNSET, so as not to imply an offset in the replacement. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
@@ -264,6 +268,13 @@ if (match_data == NULL)
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
+/* Fixed things in the callout block */
+
+scb.version = 0;
+scb.input = subject;
+scb.output = (PCRE2_SPTR)buffer;
+scb.ovector = ovector;
+
/* Find lengths of zero-terminated strings and the end of the replacement. */
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
@@ -390,7 +401,7 @@ do
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
goto EXIT;
}
-
+
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -401,11 +412,14 @@ do
}
subs++;
- /* Copy the text leading up to the match. */
+ /* Copy the text leading up to the match, and remember where the insert
+ begins and how many ovector pairs are set. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
+ scb.output_offsets[0] = buff_offset;
+ scb.oveccount = rc;
/* Process the replacement string. Literal mode is set by \Q, but only in
extended mode when backslashes are being interpreted. In extended mode we
@@ -421,7 +435,7 @@ do
if (ptr >= repend)
{
- if (ptrstackptr <= 0) break; /* End of replacement string */
+ if (ptrstackptr == 0) break; /* End of replacement string */
repend = ptrstack[--ptrstackptr];
ptr = ptrstack[--ptrstackptr];
continue;
@@ -702,7 +716,7 @@ do
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
- )[ch/8] & (1 << (ch%8))) == 0)
+ )[ch/8] & (1u << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
@@ -760,7 +774,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, FALSE, NULL);
+ code->overall_options, code->extra_options, FALSE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
@@ -804,7 +818,7 @@ do
{
if (((code->tables + cbits_offset +
((forcecase > 0)? cbit_upper:cbit_lower)
- )[ch/8] & (1 << (ch%8))) == 0)
+ )[ch/8] & (1u << (ch%8))) == 0)
ch = (code->tables + fcc_offset)[ch];
}
forcecase = forcecasereset;
@@ -821,10 +835,37 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
- /* The replacement has been copied to the output. Save the details of this
- match. See above for how this data is used. If we matched an empty string, do
- the magic for global matches. Finally, update the start offset to point to
- the rest of the subject string. */
+ /* The replacement has been copied to the output, or its size has been
+ remembered. Do the callout if there is one and we have done an actual
+ replacement. */
+
+ if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
+ {
+ scb.subscount = subs;
+ scb.output_offsets[1] = buff_offset;
+ rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
+
+ /* A non-zero return means cancel this substitution. Instead, copy the
+ matched string fragment. */
+
+ if (rc != 0)
+ {
+ PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
+ PCRE2_SIZE oldlength = ovector[1] - ovector[0];
+
+ buff_offset -= newlength;
+ lengthleft += newlength;
+ CHECKMEMCPY(subject + ovector[0], oldlength);
+
+ /* A negative return means do not do any more. */
+
+ if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
+ }
+ }
+
+ /* Save the details of this match. See above for how this data is used. If we
+ matched an empty string, do the magic for global matches. Finally, update the
+ start offset to point to the rest of the subject string. */
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];