diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_substitute.c')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_substitute.c | 102 |
1 files changed, 85 insertions, 17 deletions
diff --git a/thirdparty/pcre2/src/pcre2_substitute.c b/thirdparty/pcre2/src/pcre2_substitute.c index 8da951fc6e..ec3dd66df9 100644 --- a/thirdparty/pcre2/src/pcre2_substitute.c +++ b/thirdparty/pcre2/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -129,7 +129,7 @@ for (; ptr < ptrend; ptr++) ptr += 1; /* Must point after \ */ erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode, - code->overall_options, FALSE, NULL); + code->overall_options, code->extra_options, FALSE, NULL); ptr -= 1; /* Back to last code unit of escape */ if (errorcode != 0) { @@ -238,12 +238,18 @@ PCRE2_SPTR repend; PCRE2_SIZE extra_needed = 0; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; +PCRE2_SIZE ovecsave[3]; +pcre2_substitute_callout_block scb; + +/* General initialization */ buff_offset = 0; lengthleft = buff_length = *blength; *blength = PCRE2_UNSET; +ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; -/* Partial matching is not valid. */ +/* Partial matching is not valid. This must come after setting *blength to +PCRE2_UNSET, so as not to imply an offset in the replacement. */ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) return PCRE2_ERROR_BADOPTION; @@ -262,6 +268,13 @@ if (match_data == NULL) ovector = pcre2_get_ovector_pointer(match_data); ovector_count = pcre2_get_ovector_count(match_data); +/* Fixed things in the callout block */ + +scb.version = 0; +scb.input = subject; +scb.output = (PCRE2_SPTR)buffer; +scb.ovector = ovector; + /* Find lengths of zero-terminated strings and the end of the replacement. */ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); @@ -361,14 +374,34 @@ do } /* Handle a successful match. Matches that use \K to end before they start - are not supported. */ - - if (ovector[1] < ovector[0]) + or start before the current point in the subject are not supported. */ + + if (ovector[1] < ovector[0] || ovector[0] < start_offset) { rc = PCRE2_ERROR_BADSUBSPATTERN; goto EXIT; } - + + /* Check for the same match as previous. This is legitimate after matching an + empty string that starts after the initial match offset. We have tried again + at the match point in case the pattern is one like /(?<=\G.)/ which can never + match at its starting point, so running the match achieves the bumpalong. If + we do get the same (null) match at the original match point, it isn't such a + pattern, so we now do the empty string magic. In all other cases, a repeat + match should never occur. */ + + if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) + { + if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) + { + goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + ovecsave[2] = start_offset; + continue; /* Back to the top of the loop */ + } + rc = PCRE2_ERROR_INTERNAL_DUPMATCH; + goto EXIT; + } + /* Count substitutions with a paranoid check for integer overflow; surely no real call to this function would ever hit this! */ @@ -379,11 +412,14 @@ do } subs++; - /* Copy the text leading up to the match. */ + /* Copy the text leading up to the match, and remember where the insert + begins and how many ovector pairs are set. */ if (rc == 0) rc = ovector_count; fraglength = ovector[0] - start_offset; CHECKMEMCPY(subject + start_offset, fraglength); + scb.output_offsets[0] = buff_offset; + scb.oveccount = rc; /* Process the replacement string. Literal mode is set by \Q, but only in extended mode when backslashes are being interpreted. In extended mode we @@ -399,7 +435,7 @@ do if (ptr >= repend) { - if (ptrstackptr <= 0) break; /* End of replacement string */ + if (ptrstackptr == 0) break; /* End of replacement string */ repend = ptrstack[--ptrstackptr]; ptr = ptrstack[--ptrstackptr]; continue; @@ -680,7 +716,7 @@ do { if (((code->tables + cbits_offset + ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1 << (ch%8))) == 0) + )[ch/8] & (1u << (ch%8))) == 0) ch = (code->tables + fcc_offset)[ch]; } forcecase = forcecasereset; @@ -738,7 +774,7 @@ do ptr++; /* Point after \ */ rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode, - code->overall_options, FALSE, NULL); + code->overall_options, code->extra_options, FALSE, NULL); if (errorcode != 0) goto BADESCAPE; switch(rc) @@ -782,7 +818,7 @@ do { if (((code->tables + cbits_offset + ((forcecase > 0)? cbit_upper:cbit_lower) - )[ch/8] & (1 << (ch%8))) == 0) + )[ch/8] & (1u << (ch%8))) == 0) ch = (code->tables + fcc_offset)[ch]; } forcecase = forcecasereset; @@ -799,13 +835,45 @@ do } /* End handling a literal code unit */ } /* End of loop for scanning the replacement. */ - /* The replacement has been copied to the output. Update the start offset to - point to the rest of the subject string. If we matched an empty string, - do the magic for global matches. */ + /* The replacement has been copied to the output, or its size has been + remembered. Do the callout if there is one and we have done an actual + replacement. */ + + if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL) + { + scb.subscount = subs; + scb.output_offsets[1] = buff_offset; + rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); - start_offset = ovector[1]; - goptions = (ovector[0] != ovector[1])? 0 : + /* A non-zero return means cancel this substitution. Instead, copy the + matched string fragment. */ + + if (rc != 0) + { + PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0]; + PCRE2_SIZE oldlength = ovector[1] - ovector[0]; + + buff_offset -= newlength; + lengthleft += newlength; + CHECKMEMCPY(subject + ovector[0], oldlength); + + /* A negative return means do not do any more. */ + + if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL); + } + } + + /* Save the details of this match. See above for how this data is used. If we + matched an empty string, do the magic for global matches. Finally, update the + start offset to point to the rest of the subject string. */ + + ovecsave[0] = ovector[0]; + ovecsave[1] = ovector[1]; + ovecsave[2] = start_offset; + + goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; + start_offset = ovector[1]; } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ /* Copy the rest of the subject. */ |