summaryrefslogtreecommitdiff
path: root/thirdparty/pcre2/src/pcre2_intmodedep.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_intmodedep.h')
-rw-r--r--thirdparty/pcre2/src/pcre2_intmodedep.h165
1 files changed, 102 insertions, 63 deletions
diff --git a/thirdparty/pcre2/src/pcre2_intmodedep.h b/thirdparty/pcre2/src/pcre2_intmodedep.h
index ebff7e3066..c4c4c3adb9 100644
--- a/thirdparty/pcre2/src/pcre2_intmodedep.h
+++ b/thirdparty/pcre2/src/pcre2_intmodedep.h
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,7 @@ just to undefine them all. */
#undef ACROSSCHAR
#undef BACKCHAR
#undef BYTES2CU
+#undef CHMAX_255
#undef CU2BYTES
#undef FORWARDCHAR
#undef FORWARDCHARTEST
@@ -201,20 +202,25 @@ arithmetic results in a signed value. Hence the cast. */
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
whether its argument, which is assumed to be one code unit, is less than 256.
-The maximum length of a MARK name must fit in one code unit; currently it is
-set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
-containing exactly 256 items. When code points can be greater than 255, a check
-is needed before accessing these tables. */
+The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
+name must fit in one code unit; currently it is set to 255 or 65535. The
+TABLE_GET macro is used to access elements of tables containing exactly 256
+items. When code points can be greater than 255, a check is needed before
+accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1)
#ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS
+#define CHMAX_255(c) ((c) <= 255u)
+#else
+#define CHMAX_255(c) TRUE
#endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
+#define CHMAX_255(c) ((c) <= 255u)
#define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS
@@ -345,7 +351,7 @@ because almost all calls are already within a block of UTF-8 only code. */
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
- while((condition) && ((eptr) & 0xc0u) == 0x80u) action
+ while((condition) && ((*eptr) & 0xc0u) == 0x80u) action
/* Deposit a character into memory, returning the number of code units. */
@@ -451,7 +457,7 @@ code. */
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
- if ((condition) && ((eptr) & 0xfc00u) == 0xdc00u) action
+ if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action
/* Deposit a character into memory, returning the number of code units. */
@@ -566,15 +572,13 @@ typedef struct pcre2_real_compile_context {
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t parens_nest_limit;
+ uint32_t extra_options;
} pcre2_real_compile_context;
/* The real match context structure. */
typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
-#ifdef HEAP_MATCH_RECURSE
- pcre2_memctl stack_memctl;
-#endif
#ifdef SUPPORT_JIT
pcre2_jit_callback jit_callback;
void *jit_callback_data;
@@ -582,10 +586,19 @@ typedef struct pcre2_real_match_context {
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
+ uint32_t heap_limit;
uint32_t match_limit;
- uint32_t recursion_limit;
+ uint32_t depth_limit;
} pcre2_real_match_context;
+/* The real convert context structure. */
+
+typedef struct pcre2_real_convert_context {
+ pcre2_memctl memctl;
+ uint32_t glob_separator;
+ uint32_t glob_escape;
+} pcre2_real_convert_context;
+
/* The real compiled code structure. The type for the blocksize field is
defined specially because it is required in pcre2_serialize_decode() when
copying the size from possibly unaligned memory into a variable of the same
@@ -610,9 +623,11 @@ typedef struct pcre2_real_code {
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
+ uint32_t extra_options; /* Taken from compile_context */
uint32_t flags; /* Various state flags */
+ uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */
- uint32_t limit_recursion; /* Limit set in the pattern */
+ uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */
@@ -625,7 +640,13 @@ typedef struct pcre2_real_code {
uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code;
-/* The real match data structure. */
+/* The real match data structure. Define ovector as large as it can ever
+actually be so that array bound checkers don't grumble. Memory for this
+structure is obtained by calling pcre2_match_data_create(), which sets the size
+as the offset of ovector plus a pair of elements for each capturable string, so
+the size varies from call to call. As the maximum number of capturing
+subpatterns is 65535 we must allow for 65536 strings to include the overall
+match. (See also the heapframe structure below.) */
typedef struct pcre2_real_match_data {
pcre2_memctl memctl;
@@ -638,7 +659,7 @@ typedef struct pcre2_real_match_data {
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
uint16_t oveccount; /* Number of pairs */
int rc; /* The return code from the match */
- PCRE2_SIZE ovector[1]; /* The first field */
+ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} pcre2_real_match_data;
@@ -705,6 +726,8 @@ typedef struct compile_block {
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
+ uint16_t parens_depth; /* Depth of nested parentheses */
+ uint16_t assert_depth; /* Depth of nested assertions */
open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */
uint32_t named_group_list_size; /* Number of entries in the list */
@@ -723,8 +746,6 @@ typedef struct compile_block {
uint32_t class_range_end; /* Overall class range end */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
- int parens_depth; /* Depth of nested parentheses */
- int assert_depth; /* Depth of nested assertions */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
@@ -740,27 +761,8 @@ typedef struct pcre2_real_jit_stack {
void* stack;
} pcre2_real_jit_stack;
-/* Structure for keeping a chain of heap blocks used for saving ovectors
-during pattern recursion when the ovector is larger than can be saved on
-the system stack. */
-
-typedef struct ovecsave_frame {
- struct ovecsave_frame *next; /* Next frame on free chain */
- PCRE2_SIZE saved_ovec[1]; /* First vector element */
-} ovecsave_frame;
-
/* Structure for items in a linked list that represents an explicit recursive
-call within the pattern; used by pcre_match(). */
-
-typedef struct recursion_info {
- struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
- unsigned int group_num; /* Number of group that was called */
- PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
- uint32_t saved_capture_last; /* Last capture number */
- PCRE2_SPTR subject_position; /* Position at start of recursion */
-} recursion_info;
-
-/* A similar structure for pcre_dfa_match(). */
+call within the pattern when running pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
@@ -768,35 +770,75 @@ typedef struct dfa_recursion_info {
uint32_t group_num;
} dfa_recursion_info;
-/* Structure for building a chain of data for holding the values of the subject
-pointer at the start of each subpattern, so as to detect when an empty string
-has been matched by a subpattern - to break infinite loops; used by
-pcre2_match(). */
+/* Structure for "stack" frames that are used for remembering backtracking
+positions during matching. As these are used in a vector, with the ovector item
+being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
+only way to check this at compile time is to force an error by generating an
+array with a negative size. By putting this in a typedef (which is never used),
+we don't generate any code when all is well. */
+
+typedef struct heapframe {
+
+ /* The first set of fields are variables that have to be preserved over calls
+ to RRMATCH(), but which do not need to be copied to new frames. */
+
+ PCRE2_SPTR ecode; /* The current position in the pattern */
+ PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
+ PCRE2_SIZE length; /* Used for character, string, or code lengths */
+ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
+ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
+ uint32_t rdepth; /* "Recursion" depth */
+ uint32_t group_frame_type; /* Type information for group frames */
+ uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
+ uint8_t return_id; /* Where to go on in internal "return" */
+ uint8_t op; /* Processing opcode */
-typedef struct eptrblock {
- struct eptrblock *epb_prev;
- PCRE2_SPTR epb_saved_eptr;
-} eptrblock;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ PCRE2_UCHAR occu[6]; /* Used for other case code units */
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ PCRE2_UCHAR occu[2]; /* Used for other case code units */
+#else
+ PCRE2_UCHAR occu[1]; /* Used for other case code units */
+#endif
+
+ /* The rest have to be copied from the previous frame whenever a new frame
+ becomes current. The final field is specified as a large vector so that
+ runtime array bound checks don't catch references to it. However, for any
+ specific call to pcre2_match() the memory allocated for each frame structure
+ allows for exactly the right size ovector for the number of capturing
+ parentheses. (See also the comment for pcre2_real_match_data above.) */
+
+ PCRE2_SPTR eptr; /* MUST BE FIRST */
+ PCRE2_SPTR start_match; /* Can be adjusted by \K */
+ PCRE2_SPTR mark; /* Most recent mark on the success path */
+ uint32_t current_recurse; /* Current (deepest) recursion number */
+ uint32_t capture_last; /* Most recent capture */
+ PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
+ PCRE2_SIZE offset_top; /* Offset after highest capture */
+ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
+} heapframe;
+
+typedef char check_heapframe_size[
+ ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
-#ifdef HEAP_MATCH_RECURSE
- pcre2_memctl stack_memctl; /* For "stack" frames */
-#endif
- uint32_t match_call_count; /* As it says */
+ PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
+ heapframe *match_frames; /* Points to vector of frames */
+ heapframe *match_frames_top; /* Points after the end of the vector */
+ heapframe *stack_frames; /* The original vector on the stack */
+ PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
- uint32_t match_limit_recursion; /* As it says */
+ uint32_t match_limit_depth; /* As it says */
+ uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
- PCRE2_SIZE *ovector; /* Pointer to the offset vector */
- PCRE2_SIZE offset_end; /* One past the end */
- PCRE2_SIZE offset_max; /* The maximum usable for return data */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
@@ -807,30 +849,24 @@ typedef struct match_block {
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
- PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
- PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
+ PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
+ PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
+ uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
- uint32_t capture_last; /* Most recent capture number + overflow flag */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
- uint32_t match_function_type; /* Set for certain special calls of match() */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
- eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
- recursion_info *recursive; /* Linked list of recursion data */
- ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
+ pcre2_callout_block *cb; /* Points to a callout block */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
-#ifdef HEAP_MATCH_RECURSE
- void *match_frames_base; /* For remembering malloc'd frames */
-#endif
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
@@ -845,13 +881,16 @@ typedef struct dfa_match_block {
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
- uint32_t match_limit_recursion; /* As it says */
+ uint32_t match_limit; /* As it says */
+ uint32_t match_limit_depth; /* As it says */
+ uint32_t match_call_count; /* Number of calls of internal function */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
+ pcre2_callout_block *cb; /* Points to a callout block */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
dfa_recursion_info *recursive; /* Linked list of recursion data */