diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_compile.c')
-rw-r--r-- | thirdparty/pcre2/src/pcre2_compile.c | 250 |
1 files changed, 171 insertions, 79 deletions
diff --git a/thirdparty/pcre2/src/pcre2_compile.c b/thirdparty/pcre2/src/pcre2_compile.c index 383159be76..de259c9c40 100644 --- a/thirdparty/pcre2/src/pcre2_compile.c +++ b/thirdparty/pcre2/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -124,7 +124,7 @@ static unsigned int static int compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, - uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *, + uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, compile_block *, PCRE2_SIZE *); static int @@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */ #define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7))) -/* Private flags added to firstcu and reqcu. */ +/* Values and flags for the unsigned xxcuflags variables that accompany xxcu +variables, which are concerned with first and required code units. A value +greater than or equal to REQ_NONE means "no code unit set"; otherwise the +matching xxcu variable is set, and the low valued bits are relevant. */ -#define REQ_CASELESS (1u << 0) /* Indicates caselessness */ -#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */ -/* Negative values for the firstcu and reqcu flags */ -#define REQ_UNSET (-2) /* Not yet found anything */ -#define REQ_NONE (-1) /* Found not fixed char */ +#define REQ_UNSET 0xffffffffu /* Not yet found anything */ +#define REQ_NONE 0xfffffffeu /* Found not fixed character */ +#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */ +#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */ /* These flags are used in the groupinfo vector. */ @@ -2088,7 +2090,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr, PCRE2_UCHAR c; PCRE2_SIZE i, bot, top; PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR name[32]; +PCRE2_UCHAR name[50]; +PCRE2_UCHAR *vptr = NULL; +uint16_t ptscript = PT_NOTSCRIPT; if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; @@ -2100,36 +2104,95 @@ negation. */ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; + if (*ptr == CHAR_CIRCUMFLEX_ACCENT) { *negptr = TRUE; ptr++; } + for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; + while (c == '_' || c == '-' || isspace(c)) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; + } if (c == CHAR_NUL) goto ERROR_RETURN; if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; + name[i] = tolower(c); + if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; } + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; name[i] = 0; } -/* Otherwise there is just one following character, which must be an ASCII -letter. */ +/* If { doesn't follow \p or \P there is just one following character, which +must be an ASCII letter. */ else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) { - name[0] = c; + name[0] = tolower(c); name[1] = 0; } else goto ERROR_RETURN; *ptrptr = ptr; -/* Search for a recognized property name using binary chop. */ +/* If the property contains ':' or '=' we have class name and value separately +specified. The following are supported: + + . Bidi_Class (synonym bc), for which the property names are "bidi<name>". + . Script (synonym sc) for which the property name is the script name + . Script_Extensions (synonym scx), ditto + +As this is a small number, we currently just check the names directly. If this +grows, a sorted table and a switch will be neater. + +For both the script properties, set a PT_xxx value so that (1) they can be +distinguished and (2) invalid script names that happen to be the name of +another property can be diagnosed. */ + +if (vptr != NULL) + { + int offset = 0; + PCRE2_UCHAR sname[8]; + + *vptr = 0; /* Terminate property name */ + if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 || + PRIV(strcmp_c8)(name, STRING_bc) == 0) + { + offset = 4; + sname[0] = CHAR_b; + sname[1] = CHAR_i; /* There is no strcpy_c8 function */ + sname[2] = CHAR_d; + sname[3] = CHAR_i; + } + + else if (PRIV(strcmp_c8)(name, STRING_script) == 0 || + PRIV(strcmp_c8)(name, STRING_sc) == 0) + ptscript = PT_SC; + + else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 || + PRIV(strcmp_c8)(name, STRING_scx) == 0) + ptscript = PT_SCX; + + else + { + *errorcodeptr = ERR47; + return FALSE; + } + + /* Adjust the string in name[] as needed */ + + memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR)); + if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR)); + } + +/* Search for a recognized property using binary chop. */ bot = 0; top = PRIV(utt_size); @@ -2139,15 +2202,37 @@ while (bot < top) int r; i = (bot + top) >> 1; r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); + + /* When a matching property is found, some extra checking is needed when the + \p{xx:yy} syntax is used and xx is either sc or scx. */ + if (r == 0) { - *ptypeptr = PRIV(utt)[i].type; *pdataptr = PRIV(utt)[i].value; - return TRUE; + if (vptr == NULL || ptscript == PT_NOTSCRIPT) + { + *ptypeptr = PRIV(utt)[i].type; + return TRUE; + } + + switch (PRIV(utt)[i].type) + { + case PT_SC: + *ptypeptr = PT_SC; + return TRUE; + + case PT_SCX: + *ptypeptr = ptscript; + return TRUE; + } + + break; /* Non-script found */ } + if (r > 0) bot = i + 1; else top = i; } -*errorcodeptr = ERR47; /* Unrecognized name */ + +*errorcodeptr = ERR47; /* Unrecognized property */ return FALSE; ERROR_RETURN: /* Malformed \P or \p */ @@ -5285,9 +5370,9 @@ Arguments: pptrptr points to the current parsed pattern pointer errorcodeptr points to error code variable firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number + firstcuflagsptr place to put the first code unit flags reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number + reqcuflagsptr place to put the last required code unit flags bcptr points to current branch chain cb contains pointers to tables etc. lengthptr NULL during the real compile phase @@ -5300,8 +5385,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero static int compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, - int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr, - uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, + int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr, + uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) { int bravalue = 0; @@ -5316,9 +5401,9 @@ uint32_t zeroreqcu, zerofirstcu; uint32_t escape; uint32_t *pptr = *pptrptr; uint32_t meta, meta_arg; -int32_t firstcuflags, reqcuflags; -int32_t zeroreqcuflags, zerofirstcuflags; -int32_t req_caseopt, reqvary, tempreqvary; +uint32_t firstcuflags, reqcuflags; +uint32_t zeroreqcuflags, zerofirstcuflags; +uint32_t req_caseopt, reqvary, tempreqvary; PCRE2_SIZE offset = 0; PCRE2_SIZE length_prevgroup = 0; PCRE2_UCHAR *code = *codeptr; @@ -5374,13 +5459,13 @@ item types that can be repeated set these backoff variables appropriately. */ firstcu = reqcu = zerofirstcu = zeroreqcu = 0; firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; -/* The variable req_caseopt contains either the REQ_CASELESS value or zero, +/* The variable req_caseopt contains either the REQ_CASELESS bit or zero, according to the current setting of the caseless flag. The REQ_CASELESS value leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables to record the case status of the value. This is used only for ASCII characters. */ -req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; +req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; /* Switch on next META item until the end of the branch */ @@ -5395,13 +5480,12 @@ for (;; pptr++) BOOL possessive_quantifier; BOOL note_group_empty; int class_has_8bitchar; - int i; uint32_t mclength; uint32_t skipunits; uint32_t subreqcu, subfirstcu; uint32_t groupnumber; uint32_t verbarglen, verbculen; - int32_t subreqcuflags, subfirstcuflags; /* Must be signed */ + uint32_t subreqcuflags, subfirstcuflags; open_capitem *oc; PCRE2_UCHAR mcbuffer[8]; @@ -5770,9 +5854,9 @@ for (;; pptr++) if (taboffset >= 0) { if (tabopt >= 0) - for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; + for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; else - for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; + for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; } /* Now see if we need to remove any special characters. An option @@ -5786,9 +5870,9 @@ for (;; pptr++) being built and we are done. */ if (local_negate) - for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i]; + for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]); else - for (i = 0; i < 32; i++) classbits[i] |= pbits[i]; + for (int i = 0; i < 32; i++) classbits[i] |= pbits[i]; /* Every class contains at least one < 256 character. */ @@ -5827,21 +5911,23 @@ for (;; pptr++) switch(escape) { case ESC_d: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; break; case ESC_D: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); break; case ESC_w: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; break; case ESC_W: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); break; /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl @@ -5852,12 +5938,13 @@ for (;; pptr++) longer treat \s and \S specially. */ case ESC_s: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; break; case ESC_S: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); break; /* When adding the horizontal or vertical space lists to a class, or @@ -6098,7 +6185,7 @@ for (;; pptr++) if (negate_class && !xclass_has_prop) { /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; } memcpy(code, classbits, 32); code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); @@ -6124,7 +6211,7 @@ for (;; pptr++) if (negate_class) { /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; } memcpy(code, classbits, 32); } @@ -6198,7 +6285,7 @@ for (;; pptr++) verbarglen = *(++pptr); verbculen = 0; tempcode = code++; - for (i = 0; i < (int)verbarglen; i++) + for (int i = 0; i < (int)verbarglen; i++) { meta = *(++pptr); #ifdef SUPPORT_UNICODE @@ -6247,6 +6334,7 @@ for (;; pptr++) bravalue = OP_COND; { int count, index; + unsigned int i; PCRE2_SPTR name; named_group *ng = cb->named_groups; uint32_t length = *(++pptr); @@ -6286,7 +6374,7 @@ for (;; pptr++) groupnumber = 0; if (meta == META_COND_RNUMBER) { - for (i = 1; i < (int)length; i++) + for (i = 1; i < length; i++) { groupnumber = groupnumber * 10 + name[i] - CHAR_0; if (groupnumber > MAX_GROUP_NUMBER) @@ -6608,7 +6696,7 @@ for (;; pptr++) if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) { - if (subfirstcuflags >= 0) + if (subfirstcuflags < REQ_NONE) { firstcu = subfirstcu; firstcuflags = subfirstcuflags; @@ -6622,7 +6710,7 @@ for (;; pptr++) into reqcu if there wasn't one, using the vary flag that was in existence beforehand. */ - else if (subfirstcuflags >= 0 && subreqcuflags < 0) + else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE) { subreqcu = subfirstcu; subreqcuflags = subfirstcuflags | tempreqvary; @@ -6631,7 +6719,7 @@ for (;; pptr++) /* If the subpattern set a required code unit (or set a first code unit that isn't really the first code unit - see above), set it. */ - if (subreqcuflags >= 0) + if (subreqcuflags < REQ_NONE) { reqcu = subreqcu; reqcuflags = subreqcuflags; @@ -6650,7 +6738,7 @@ for (;; pptr++) in that example, 'X' ends up set for both. */ else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) && - subreqcuflags >= 0 && subfirstcuflags >= 0) + subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE) { reqcu = subreqcu; reqcuflags = subreqcuflags; @@ -6680,7 +6768,7 @@ for (;; pptr++) this name is duplicated. */ groupnumber = 0; - for (i = 0; i < cb->names_found; i++, ng++) + for (unsigned int i = 0; i < cb->names_found; i++, ng++) { if (length == ng->length && PRIV(strncmp)(name, ng->name, length) == 0) @@ -6935,14 +7023,19 @@ for (;; pptr++) #endif /* MAYBE_UTF_MULTI */ /* Handle the case of a single code unit - either with no UTF support, or - with UTF disabled, or for a single-code-unit UTF character. */ + with UTF disabled, or for a single-code-unit UTF character. In the latter + case, for a repeated positive match, get the caseless flag for the + required code unit from the previous character, because a class like [Aa] + sets a caseless A but by now the req_caseopt flag has been reset. */ + { mcbuffer[0] = code[-1]; mclength = 1; if (op_previous <= OP_CHARI && repeat_min > 1) { reqcu = mcbuffer[0]; - reqcuflags = req_caseopt | cb->req_varyopt; + reqcuflags = cb->req_varyopt; + if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS; } } goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ @@ -7034,7 +7127,7 @@ for (;; pptr++) *lengthptr += delta; } - else for (i = 0; i < replicate; i++) + else for (int i = 0; i < replicate; i++) { memcpy(code, previous, CU2BYTES(1 + LINK_SIZE)); previous = code; @@ -7210,12 +7303,12 @@ for (;; pptr++) else { - if (groupsetfirstcu && reqcuflags < 0) + if (groupsetfirstcu && reqcuflags >= REQ_NONE) { reqcu = firstcu; reqcuflags = firstcuflags; } - for (i = 1; (uint32_t)i < repeat_min; i++) + for (uint32_t i = 1; i < repeat_min; i++) { memcpy(code, previous, CU2BYTES(len)); code += len; @@ -7259,14 +7352,14 @@ for (;; pptr++) /* This is compiling for real */ - else for (i = repeat_max - 1; i >= 0; i--) + else for (uint32_t i = repeat_max; i >= 1; i--) { *code++ = OP_BRAZERO + repeat_type; /* All but the final copy start a new nesting, maintaining the chain of brackets outstanding. */ - if (i != 0) + if (i != 1) { int linkoffset; *code++ = OP_BRA; @@ -7985,9 +8078,9 @@ Arguments: errorcodeptr -> pointer to error code variable skipunits skip this many code units at start (for brackets and OP_COND) firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number + firstcuflagsptr place to put the first code unit flags reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number + reqcuflagsptr place to put the last required code unit flags bcptr pointer to the chain of currently open branches cb points to the data block with tables pointers etc. lengthptr NULL during the real compile phase @@ -8001,7 +8094,7 @@ Returns: 0 There has been an error static int compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr, - int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr, + uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) { PCRE2_UCHAR *code = *codeptr; @@ -8014,9 +8107,9 @@ int okreturn = 1; uint32_t *pptr = *pptrptr; uint32_t firstcu, reqcu; uint32_t lookbehindlength; -int32_t firstcuflags, reqcuflags; +uint32_t firstcuflags, reqcuflags; uint32_t branchfirstcu, branchreqcu; -int32_t branchfirstcuflags, branchreqcuflags; +uint32_t branchfirstcuflags, branchreqcuflags; PCRE2_SIZE length; branch_chain bc; @@ -8135,9 +8228,9 @@ for (;;) if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) { - if (firstcuflags >= 0) + if (firstcuflags < REQ_NONE) { - if (reqcuflags < 0) + if (reqcuflags >= REQ_NONE) { reqcu = firstcu; reqcuflags = firstcuflags; @@ -8149,8 +8242,8 @@ for (;;) /* If we (now or from before) have no firstcu, a firstcu from the branch becomes a reqcu if there isn't a branch reqcu. */ - if (firstcuflags < 0 && branchfirstcuflags >= 0 && - branchreqcuflags < 0) + if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE && + branchreqcuflags >= REQ_NONE) { branchreqcu = branchfirstcu; branchreqcuflags = branchfirstcuflags; @@ -8298,7 +8391,7 @@ Returns: TRUE or FALSE */ static BOOL -is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, +is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb, int atomcount, BOOL inassert) { do { @@ -8321,7 +8414,7 @@ do { op == OP_SCBRA || op == OP_SCBRAPOS) { int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1); if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; } @@ -8681,15 +8774,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags */ static uint32_t -find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert) +find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert) { uint32_t c = 0; -int cflags = REQ_NONE; +uint32_t cflags = REQ_NONE; *flags = REQ_NONE; do { uint32_t d; - int dflags; + uint32_t dflags; int xl = (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); @@ -8712,9 +8805,8 @@ do { case OP_SCRIPT_RUN: d = find_firstassertedcu(scode, &dflags, inassert + ((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0)); - if (dflags < 0) - return 0; - if (cflags < 0) { c = d; cflags = dflags; } + if (dflags >= REQ_NONE) return 0; + if (cflags >= REQ_NONE) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; break; @@ -8727,7 +8819,7 @@ do { case OP_MINPLUS: case OP_POSPLUS: if (inassert == 0) return 0; - if (cflags < 0) { c = scode[1]; cflags = 0; } + if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; } else if (c != scode[1]) return 0; break; @@ -8753,7 +8845,7 @@ do { #endif #endif - if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } + if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; } else if (c != scode[1]) return 0; break; } @@ -9689,7 +9781,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */ PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ -int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ +uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ uint32_t firstcu, reqcu; /* Value of first/req code unit */ uint32_t setflags = 0; /* NL and BSR set flags */ @@ -10369,13 +10461,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) (these are not saved during the compile because they can cause conflicts with actual literals that follow). */ - if (firstcuflags < 0) + if (firstcuflags >= REQ_NONE) firstcu = find_firstassertedcu(codestart, &firstcuflags, 0); /* Save the data for a first code unit. The existence of one means the minimum length must be at least 1. */ - if (firstcuflags >= 0) + if (firstcuflags < REQ_NONE) { re->first_codeunit = firstcu; re->flags |= PCRE2_FIRSTSET; @@ -10422,16 +10514,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) different character and not a non-starting code unit of the first character, because the minimum length count is in characters, not code units. */ - if (reqcuflags >= 0) + if (reqcuflags < REQ_NONE) { #if PCRE2_CODE_UNIT_WIDTH == 16 if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ - firstcuflags < 0 || /* First not set */ + firstcuflags >= REQ_NONE || /* First not set */ (firstcu & 0xf800) != 0xd800 || /* First not surrogate */ (reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */ #elif PCRE2_CODE_UNIT_WIDTH == 8 if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ - firstcuflags < 0 || /* First not set */ + firstcuflags >= REQ_NONE || /* First not set */ (firstcu & 0x80) == 0 || /* First is ASCII */ (reqcu & 0x80) == 0) /* Req is ASCII */ #endif |