diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_dfa_match.c')
| -rw-r--r-- | thirdparty/pcre2/src/pcre2_dfa_match.c | 69 | 
1 files changed, 44 insertions, 25 deletions
| diff --git a/thirdparty/pcre2/src/pcre2_dfa_match.c b/thirdparty/pcre2/src/pcre2_dfa_match.c index 625695b7cb..060dc7669a 100644 --- a/thirdparty/pcre2/src/pcre2_dfa_match.c +++ b/thirdparty/pcre2/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.                         Written by Philip Hazel       Original API code Copyright (c) 1997-2012 University of Cambridge -          New API code Copyright (c) 2016-2020 University of Cambridge +          New API code Copyright (c) 2016-2021 University of Cambridge  -----------------------------------------------------------------------------  Redistribution and use in source and binary forms, with or without @@ -3256,8 +3256,8 @@ BOOL has_first_cu = FALSE;  BOOL has_req_cu = FALSE;  #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu = FALSE; -BOOL memchr_not_found_first_cu2 = FALSE; +PCRE2_SPTR memchr_found_first_cu = NULL; +PCRE2_SPTR memchr_found_first_cu2 = NULL;  #endif  PCRE2_UCHAR first_cu = 0; @@ -3648,13 +3648,7 @@ for (;;)          }        } -    /* Not anchored. Advance to a unique first code unit if there is one. In -    8-bit mode, the use of memchr() gives a big speed up, even though we have -    to call it twice in caseless mode, in order to find the earliest occurrence -    of the character in either of its cases. If a call to memchr() that -    searches the rest of the subject fails to find one case, remember that in -    order not to keep on repeating the search. This can make a huge difference -    when the strings are very long and only one case is present. */ +    /* Not anchored. Advance to a unique first code unit if there is one. */      else        { @@ -3662,43 +3656,68 @@ for (;;)          {          if (first_cu != first_cu2)  /* Caseless */            { +          /* In 16-bit and 32_bit modes we have to do our own search, so can +          look for both cases at once. */ +  #if PCRE2_CODE_UNIT_WIDTH != 8            PCRE2_UCHAR smc;            while (start_match < end_subject &&                  (smc = UCHAR21TEST(start_match)) != first_cu && -                  smc != first_cu2) +                 smc != first_cu2)              start_match++; +#else +          /* In 8-bit mode, the use of memchr() gives a big speed up, even +          though we have to call it twice in order to find the earliest +          occurrence of the code unit in either of its cases. Caching is used +          to remember the positions of previously found code units. This can +          make a huge difference when the strings are very long and only one +          case is actually present. */ -#else  /* 8-bit code units */            PCRE2_SPTR pp1 = NULL;            PCRE2_SPTR pp2 = NULL; -          PCRE2_SIZE cu2size = end_subject - start_match; +          PCRE2_SIZE searchlength = end_subject - start_match; -          if (!memchr_not_found_first_cu) +          /* If we haven't got a previously found position for first_cu, or if +          the current starting position is later, we need to do a search. If +          the code unit is not found, set it to the end. */ + +          if (memchr_found_first_cu == NULL || +              start_match > memchr_found_first_cu)              { -            pp1 = memchr(start_match, first_cu, end_subject - start_match); -            if (pp1 == NULL) memchr_not_found_first_cu = TRUE; -              else cu2size = pp1 - start_match; +            pp1 = memchr(start_match, first_cu, searchlength); +            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;              } -          /* If pp1 is not NULL, we have arranged to search only as far as pp1, -          to see if the other case is earlier, so we can set "not found" only -          when both searches have returned NULL. */ +          /* If the start is before a previously found position, use the +          previous position, or NULL if a previous search failed. */ + +          else pp1 = (memchr_found_first_cu == end_subject)? NULL : +            memchr_found_first_cu; -          if (!memchr_not_found_first_cu2) +          /* Do the same thing for the other case. */ + +          if (memchr_found_first_cu2 == NULL || +              start_match > memchr_found_first_cu2)              { -            pp2 = memchr(start_match, first_cu2, cu2size); -            memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL); +            pp2 = memchr(start_match, first_cu2, searchlength); +            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;              } +          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : +            memchr_found_first_cu2; + +          /* Set the start to the end of the subject if neither case was found. +          Otherwise, use the earlier found point. */ +            if (pp1 == NULL)              start_match = (pp2 == NULL)? end_subject : pp2;            else              start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; -#endif + +#endif  /* 8-bit handling */            } -        /* The caseful case */ +        /* The caseful case is much simpler. */          else            { |