diff options
Diffstat (limited to 'thirdparty/pcre2/src/pcre2_jit_compile.c')
| -rw-r--r-- | thirdparty/pcre2/src/pcre2_jit_compile.c | 1857 | 
1 files changed, 781 insertions, 1076 deletions
diff --git a/thirdparty/pcre2/src/pcre2_jit_compile.c b/thirdparty/pcre2/src/pcre2_jit_compile.c index 1f21bfb6ad..f564127c2a 100644 --- a/thirdparty/pcre2/src/pcre2_jit_compile.c +++ b/thirdparty/pcre2/src/pcre2_jit_compile.c @@ -6,8 +6,9 @@  and semantics are as close as possible to those of the Perl 5 language.                         Written by Philip Hazel +                    This module by Zoltan Herczeg       Original API code Copyright (c) 1997-2012 University of Cambridge -          New API code Copyright (c) 2016-2018 University of Cambridge +          New API code Copyright (c) 2016-2019 University of Cambridge  -----------------------------------------------------------------------------  Redistribution and use in source and binary forms, with or without @@ -212,12 +213,6 @@ typedef struct stub_list {    struct stub_list *next;  } stub_list; -typedef struct label_addr_list { -  struct sljit_label *label; -  sljit_uw *update_addr; -  struct label_addr_list *next; -} label_addr_list; -  enum frame_types {    no_frame = -1,    no_stack = -2 @@ -271,6 +266,8 @@ typedef struct bracket_backtrack {      assert_backtrack *assert;      /* For OP_ONCE. Less than 0 if not needed. */      int framesize; +    /* For brackets with >3 alternatives. */ +    struct sljit_put_label *matching_put_label;    } u;    /* Points to our private memory word on the stack. */    int private_data_ptr; @@ -416,6 +413,8 @@ typedef struct compiler_common {    sljit_sw lcc;    /* Mode can be PCRE2_JIT_COMPLETE and others. */    int mode; +  /* TRUE, when empty match is accepted for partial matching. */ +  BOOL allow_empty_partial;    /* TRUE, when minlength is greater than 0. */    BOOL might_be_empty;    /* \K is found in the pattern. */ @@ -454,7 +453,6 @@ typedef struct compiler_common {    struct sljit_label *accept_label;    struct sljit_label *ff_newline_shortcut;    stub_list *stubs; -  label_addr_list *label_addrs;    recurse_entry *entries;    recurse_entry *currententry;    jump_list *partialmatch; @@ -563,6 +561,12 @@ typedef struct compare_context {  #define ARGUMENTS     SLJIT_S4  #define RETURN_ADDR   SLJIT_R4 +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define HAS_VIRTUAL_REGISTERS 1 +#else +#define HAS_VIRTUAL_REGISTERS 0 +#endif +  /* Local space layout. */  /* These two locals can be used by the current opcode. */  #define LOCALS0          (0 * sizeof(sljit_sw)) @@ -696,11 +700,12 @@ the start pointers when the end of the capturing group has not yet reached. */  #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \    { \ -  if (ptr[-1] <= 0x7f) \ -    c = *ptr--; \ +  c = ptr[-1]; \ +  if (c <= 0x7f) \ +    ptr--; \    else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \      { \ -    c = ptr[-1] - 0x80; \ +    c -= 0x80; \      \      if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \        { \ @@ -775,11 +780,12 @@ the start pointers when the end of the capturing group has not yet reached. */  #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \    { \ -  if (ptr[-1] < 0xd800 || ptr[-1] >= 0xe000) \ -    c = *ptr--; \ -  else if (ptr[-1] >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \ +  c = ptr[-1]; \ +  if (c < 0xd800 || c >= 0xe000) \ +    ptr--; \ +  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \      { \ -    c = (((ptr[-2] - 0xd800) << 10) | (ptr[-1] - 0xdc00)) + 0x10000; \ +    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \      ptr -= 2; \      } \    else \ @@ -793,7 +799,7 @@ the start pointers when the end of the capturing group has not yet reached. */  #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \    { \ -  if (ptr[0] < 0x110000) \ +  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \      c = *ptr++; \    else \      { \ @@ -801,6 +807,17 @@ the start pointers when the end of the capturing group has not yet reached. */      } \    } +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ +  { \ +  c = ptr[-1]; \ +  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \ +    ptr--; \ +  else \ +    { \ +    invalid_action; \ +    } \ +  } +  #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */  #endif /* SUPPORT_UNICODE */ @@ -1033,8 +1050,8 @@ switch(*cc)    return cc + 1 + 2 + cc[1];    default: -  /* All opcodes are supported now! */ -  SLJIT_UNREACHABLE(); +  /* Unsupported opcodes: OP_ASSERT_NA and OP_ASSERTBACK_NA */ +  /* SLJIT_UNREACHABLE(); */    return NULL;    }  } @@ -2371,14 +2388,14 @@ if (base_reg != TMP2)  else    {    status.saved_tmp_regs[1] = RETURN_ADDR; -  if (sljit_get_register_index(RETURN_ADDR) == -1) +  if (HAS_VIRTUAL_REGISTERS)      status.tmp_regs[1] = STR_PTR;    else      status.tmp_regs[1] = RETURN_ADDR;    }  status.saved_tmp_regs[2] = TMP3; -if (sljit_get_register_index(TMP3) == -1) +if (HAS_VIRTUAL_REGISTERS)    status.tmp_regs[2] = STR_END;  else    status.tmp_regs[2] = TMP3; @@ -2829,20 +2846,6 @@ while (list_item)  common->stubs = NULL;  } -static void add_label_addr(compiler_common *common, sljit_uw *update_addr) -{ -DEFINE_COMPILER; -label_addr_list *label_addr; - -label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); -if (label_addr == NULL) -  return; -label_addr->label = LABEL(); -label_addr->update_addr = update_addr; -label_addr->next = common->label_addrs; -common->label_addrs = label_addr; -} -  static SLJIT_INLINE void count_match(compiler_common *common)  {  DEFINE_COMPILER; @@ -2985,12 +2988,18 @@ else      }    } -OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); +if (!HAS_VIRTUAL_REGISTERS) +  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack)); +else +  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); +  if (common->mark_ptr != 0)    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);  if (common->control_head_ptr != 0)    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); +if (HAS_VIRTUAL_REGISTERS) +  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); +  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));  } @@ -3029,21 +3038,36 @@ BOOL has_pre;  OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); -if (common->mark_ptr != 0) -  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); -OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); -if (common->mark_ptr != 0) -  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); -OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), -  SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); +if (HAS_VIRTUAL_REGISTERS) +  { +  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); +  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); +  if (common->mark_ptr != 0) +    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); +  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); +  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); +  if (common->mark_ptr != 0) +    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); +  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), +    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); +  } +else +  { +  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); +  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data)); +  if (common->mark_ptr != 0) +    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); +  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount)); +  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); +  if (common->mark_ptr != 0) +    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0); +  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); +  }  has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;  GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); -OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));  loop = LABEL(); @@ -3105,20 +3129,22 @@ static SLJIT_INLINE void return_with_partial_match(compiler_common *common, stru  {  DEFINE_COMPILER;  sljit_s32 mov_opcode; +sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;  SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);  SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0    && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0)); -OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); +if (arguments_reg != ARGUMENTS) +  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),    common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);  /* Store match begin and end. */ -OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); -OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data)); +OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));  mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; @@ -3279,7 +3305,7 @@ SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);  if (common->mode == PCRE2_JIT_COMPLETE)    return; -if (!force) +if (!force && !common->allow_empty_partial)    jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);  else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)    jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); @@ -3341,7 +3367,11 @@ if (common->mode == PCRE2_JIT_COMPLETE)  /* Partial matching mode. */  jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); -add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +if (!common->allow_empty_partial) +  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) +  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1)); +  if (common->mode == PCRE2_JIT_PARTIAL_SOFT)    {    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); @@ -3357,6 +3387,35 @@ else  JUMPHERE(jump);  } +static void process_partial_match(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +/* Partial matching mode. */ +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) +  { +  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); +  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); +  JUMPHERE(jump); +  } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) +  { +  if (common->partialmatchlabel != NULL) +    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel); +  else +    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +  } +} + +static void detect_partial_match_to(compiler_common *common, struct sljit_label *label) +{ +DEFINE_COMPILER; + +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label); +process_partial_match(common); +} +  static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)  {  /* Reads the character into TMP1, keeps STR_PTR. @@ -3420,12 +3479,21 @@ if (common->utf)  #elif PCRE2_CODE_UNIT_WIDTH == 32  if (common->invalid_utf)    { +  if (max < 0xd800) return; +    if (backtracks != NULL) +    { +    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); +    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); +    }    else      { +    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);      OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);      CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); +    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); +    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);      }    }  #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ @@ -3490,8 +3558,12 @@ if (common->utf)      JUMPHERE(jump);    }  #elif PCRE2_CODE_UNIT_WIDTH == 32 -  if (common->invalid_utf) -    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); +if (common->invalid_utf) +  { +  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); +  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); +  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); +  }  #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */  #endif /* SUPPORT_UNICODE */  } @@ -3653,7 +3725,7 @@ if (common->utf)    /* Skip low surrogate if necessary. */    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); -  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0) +  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)      {      if (options & READ_CHAR_UPDATE_STR_PTR)        OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -3677,11 +3749,18 @@ if (common->utf)  if (common->invalid_utf)    {    if (backtracks != NULL) +    { +    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); +    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); +    }    else      { +    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);      OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);      CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); +    OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); +    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);      }    }  #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ @@ -3832,7 +3911,7 @@ if (common->utf && negated)      {      OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); -    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0) +    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)        {        OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));        OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); @@ -3865,9 +3944,9 @@ if (common->utf && negated)  static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)  { -/* Goes one character back. TMP2 must contain the start of -the subject buffer. Affects STR_PTR and TMP1. Does not modify -STR_PTR for invalid character sequences. */ +/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE, +TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, +and it is destroyed. Does not modify STR_PTR for invalid character sequences. */  DEFINE_COMPILER;  SLJIT_UNUSED_ARG(backtracks); @@ -4407,7 +4486,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  static void do_utfpeakcharback(compiler_common *common)  { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */  DEFINE_COMPILER;  struct sljit_jump *jump[2]; @@ -4444,7 +4523,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  static void do_utfpeakcharback_invalid(compiler_common *common)  { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */  DEFINE_COMPILER;  sljit_s32 i;  sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); @@ -4672,7 +4751,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  static void do_utfpeakcharback_invalid(compiler_common *common)  { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */  DEFINE_COMPILER;  struct sljit_jump *jump;  struct sljit_jump *exit_invalid[3]; @@ -4786,18 +4865,12 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -// PH hacking -//fprintf(stderr, "~~A\n"); -  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); -  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); -  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - +/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1); -  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); - -// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);  sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  } @@ -4866,15 +4939,27 @@ else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)    /* Check whether offset limit is set and valid. */    SLJIT_ASSERT(common->match_end_ptr != 0); -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); +    } +  else +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit)); +    OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);    end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET); -  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +  if (HAS_VIRTUAL_REGISTERS) +    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +  else +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); +  #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);  #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ -  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); +  if (HAS_VIRTUAL_REGISTERS) +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); +    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);    end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);    OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); @@ -5434,699 +5519,56 @@ CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);  }  #endif -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#include "pcre2_jit_simd_inc.h" -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); -#else -#error "Unknown code width" -#endif -} -#endif - -static sljit_s32 character_to_int32(PCRE2_UCHAR chr) -{ -sljit_u32 value = chr; -#if PCRE2_CODE_UNIT_WIDTH == 8 -#define SSE2_COMPARE_TYPE_INDEX 0 -return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -#define SSE2_COMPARE_TYPE_INDEX 1 -return (sljit_s32)((value << 16) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 32 -#define SSE2_COMPARE_TYPE_INDEX 2 -return (sljit_s32)(value); -#else -#error "Unsupported unit width" -#endif -} +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD -static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg) +static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)  { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -sljit_u8 instruction[5]; -#else -sljit_u8 instruction[4]; -#endif - -SLJIT_ASSERT(dst_xmm_reg < 8); - -/* MOVDQA xmm1, xmm2/m128 */ -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -if (src_general_reg < 8) -  { -  instruction[0] = 0x66; -  instruction[1] = 0x0f; -  instruction[2] = 0x6f; -  instruction[3] = (dst_xmm_reg << 3) | src_general_reg; -  sljit_emit_op_custom(compiler, instruction, 4); -  } -else -  { -  instruction[0] = 0x66; -  instruction[1] = 0x41; -  instruction[2] = 0x0f; -  instruction[3] = 0x6f; -  instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7); -  sljit_emit_op_custom(compiler, instruction, 4); -  } -#else -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6f; -instruction[3] = (dst_xmm_reg << 3) | src_general_reg; -sljit_emit_op_custom(compiler, instruction, 4); -#endif -} - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2, -  sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) -{ -sljit_u8 instruction[4]; -instruction[0] = 0x66; -instruction[1] = 0x0f; - -if (char1 == char2 || bit != 0) -  { -  if (bit != 0) -    { -    /* POR xmm1, xmm2/m128 */ -    /* instruction[0] = 0x66; */ -    /* instruction[1] = 0x0f; */ -    instruction[2] = 0xeb; -    instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; -    sljit_emit_op_custom(compiler, instruction, 4); -    } - -  /* PCMPEQB/W/D xmm1, xmm2/m128 */ -  /* instruction[0] = 0x66; */ -  /* instruction[1] = 0x0f; */ -  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; -  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; -  sljit_emit_op_custom(compiler, instruction, 4); -  } -else -  { -  /* MOVDQA xmm1, xmm2/m128 */ -  /* instruction[0] = 0x66; */ -  /* instruction[1] = 0x0f; */ -  instruction[2] = 0x6f; -  instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; -  sljit_emit_op_custom(compiler, instruction, 4); - -  /* PCMPEQB/W/D xmm1, xmm2/m128 */ -  /* instruction[0] = 0x66; */ -  /* instruction[1] = 0x0f; */ -  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; -  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; -  sljit_emit_op_custom(compiler, instruction, 4); - -  instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; -  sljit_emit_op_custom(compiler, instruction, 4); - -  /* POR xmm1, xmm2/m128 */ -  /* instruction[0] = 0x66; */ -  /* instruction[1] = 0x0f; */ -  instruction[2] = 0xeb; -  instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; -  sljit_emit_op_custom(compiler, instruction, 4); -  } -} - -static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) -{ -DEFINE_COMPILER; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *quit; -struct sljit_jump *partial_quit[2]; -sljit_u8 instruction[8]; -sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); -sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data_ind = 0; -sljit_s32 tmp_ind = 1; -sljit_s32 cmp1_ind = 2; -sljit_s32 cmp2_ind = 3; -sljit_u32 bit = 0; - -SLJIT_UNUSED_ARG(offset); - -if (char1 != char2) -  { -  bit = char1 ^ char2; -  if (!is_powerof2(bit)) -    bit = 0; -  } - -partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) -  add_jump(compiler, &common->failed_match, partial_quit[0]); - -/* First part (unaligned start) */ - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); - -SLJIT_ASSERT(tmp1_ind < 8); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; -instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1 != char2) -  { -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); - -  /* MOVD xmm, r/m32 */ -  instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind; -  sljit_emit_op_custom(compiler, instruction, 4); -  } - -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[3] = 0xc0 | (cmp1_ind << 3) | 2; -instruction[4] = 0; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1 != char2) -  { -  /* PSHUFD xmm1, xmm2/m128, imm8 */ -  instruction[3] = 0xc0 | (cmp2_ind << 3) | 3; -  sljit_emit_op_custom(compiler, instruction, 5); -  } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -load_from_mem_sse2(compiler, data_ind, str_ptr_ind); -fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -quit = JUMP(SLJIT_NOT_ZERO); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -start = LABEL(); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); - -partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) -  add_jump(compiler, &common->failed_match, partial_quit[1]); - -/* Second part (aligned) */ - -load_from_mem_sse2(compiler, 0, str_ptr_ind); -fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -JUMPTO(SLJIT_ZERO, start); - -JUMPHERE(quit); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -if (common->mode != PCRE2_JIT_COMPLETE) -  { -  JUMPHERE(partial_quit[0]); -  JUMPHERE(partial_quit[1]); -  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); -  CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); -  } -else -  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf && offset > 0) -  { -  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); - -  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); - -  quit = jump_if_utf_char_start(compiler, TMP1); - -  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); -  OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); -  JUMPTO(SLJIT_JUMP, restart); - -  JUMPHERE(quit); -  } -#endif -} - -#ifndef _WIN64 - -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - -static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1, -  PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) -{ -DEFINE_COMPILER; -sljit_u32 bit1 = 0; -sljit_u32 bit2 = 0; -sljit_u32 diff = IN_UCHARS(offs1 - offs2); -sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); -sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); -sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data1_ind = 0; -sljit_s32 data2_ind = 1; -sljit_s32 tmp_ind = 2; -sljit_s32 cmp1a_ind = 3; -sljit_s32 cmp1b_ind = 4; -sljit_s32 cmp2a_ind = 5; -sljit_s32 cmp2b_ind = 6; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *jump[2]; - -sljit_u8 instruction[8]; - -SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); -SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset())); -SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); - -/* Initialize. */ -if (common->match_end_ptr != 0) -  { -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); -  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); -  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - -  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); -  CMOV(SLJIT_LESS, STR_END, TMP1, 0); -  } - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; - -if (char1a == char1b) -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); -else -  { -  bit1 = char1a ^ char1b; -  if (is_powerof2(bit1)) -    { -    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); -    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); -    } -  else -    { -    bit1 = 0; -    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); -    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); -    } -  } - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1a != char1b) -  { -  instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind; -  sljit_emit_op_custom(compiler, instruction, 4); -  } - -if (char2a == char2b) -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); -else -  { -  bit2 = char2a ^ char2b; -  if (is_powerof2(bit2)) -    { -    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); -    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); -    } -  else -    { -    bit2 = 0; -    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); -    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); -    } -  } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char2a != char2b) -  { -  instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind; -  sljit_emit_op_custom(compiler, instruction, 4); -  } - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[4] = 0; - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1a != char1b) -  { -  instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; -  sljit_emit_op_custom(compiler, instruction, 5); -  } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char2a != char2b) -  { -  instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; -  sljit_emit_op_custom(compiler, instruction, 5); -  } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif - -OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2)); -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_ind); - -jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0); - -load_from_mem_sse2(compiler, data2_ind, tmp1_ind); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | tmp_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* PSRLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -/* instruction[2] = 0x73; */ -instruction[3] = 0xc0 | (3 << 3) | data2_ind; -instruction[4] = 16 - diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* POR xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xeb; -instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -jump[1] = JUMP(SLJIT_JUMP); - -JUMPHERE(jump[0]); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | data2_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -JUMPHERE(jump[1]); - -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); -fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* Ignore matches before the first STR_PTR. */ -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -jump[0] = JUMP(SLJIT_NOT_ZERO); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -/* Main loop. */ -instruction[0] = 0x66; -instruction[1] = 0x0f; - -start = LABEL(); - -load_from_mem_sse2(compiler, data2_ind, str_ptr_ind); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_ind); - -/* PSRLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (3 << 3) | data2_ind; -instruction[4] = 16 - diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | tmp_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* POR xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xeb; -instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); -fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -JUMPTO(SLJIT_ZERO, start); - -JUMPHERE(jump[0]); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -if (common->match_end_ptr != 0) -  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf) -  { -  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); - -  jump[0] = jump_if_utf_char_start(compiler, TMP1); - -  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -  CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); - -  add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); - -  JUMPHERE(jump[0]); -  } -#endif - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); - -if (common->match_end_ptr != 0) -  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -} - -static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max) -{ -sljit_s32 i, j, priority, count; -sljit_u32 priorities; -PCRE2_UCHAR a1, a2, b1, b2; - -priorities = 0; - -count = 0; -for (i = 0; i < max; i++) -  { -  if (chars[i].last_count > 2) -    { -    SLJIT_ASSERT(chars[i].last_count <= 7); - -    priorities |= (1 << chars[i].last_count); -    count++; -    } -  } - -if (count < 2) -  return FALSE; - -for (priority = 7; priority > 2; priority--) -  { -  if ((priorities & (1 << priority)) == 0) -    continue; +  sljit_s32 i, j, max_i = 0, max_j = 0; +  sljit_u32 max_pri = 0; +  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;    for (i = max - 1; i >= 1; i--) -    if (chars[i].last_count >= priority) +    { +    if (chars[i].last_count > 2)        { -      SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1); -        a1 = chars[i].chars[0];        a2 = chars[i].chars[1]; +      a_pri = chars[i].last_count; -      j = i - max_fast_forward_char_pair_sse2_offset(); +      j = i - max_fast_forward_char_pair_offset();        if (j < 0)          j = 0;        while (j < i)          { -        if (chars[j].last_count >= priority) +        b_pri = chars[j].last_count; +        if (b_pri > 2 && a_pri + b_pri >= max_pri)            {            b1 = chars[j].chars[0];            b2 = chars[j].chars[1];            if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)              { -            fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2); -            return TRUE; +            max_pri = a_pri + b_pri; +            max_i = i; +            max_j = j;              }            }          j++;          }        } -  } - -return FALSE; -} +    } -#endif +if (max_pri == 0) +  return FALSE; -#undef SSE2_COMPARE_TYPE_INDEX +fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); +return TRUE; +} -#endif +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */  static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)  { @@ -6154,13 +5596,11 @@ if (has_match_end)    CMOV(SLJIT_GREATER, STR_END, TMP1, 0);    } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD -/* SSE2 accelerated first character search. */ - -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)    { -  fast_forward_first_char2_sse2(common, char1, char2, offset); +  fast_forward_char_simd(common, char1, char2, offset);    if (offset > 0)      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); @@ -6267,8 +5707,8 @@ for (i = 0; i < max; i++)      chars[i].last_count = (chars[i].count == 255) ? 0 : 1;    } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64) -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max)) +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))    return TRUE;  #endif @@ -6353,18 +5793,21 @@ if (common->match_end_ptr != 0)    {    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); -  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));    OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);    CMOV(SLJIT_GREATER, STR_END, TMP1, 0);    }  else -  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +  { +  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); +  }  SLJIT_ASSERT(range_right >= 0); -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); -#endif +if (!HAS_VIRTUAL_REGISTERS) +  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);  start = LABEL();  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); @@ -6375,11 +5818,11 @@ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);  #endif -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); -#else -OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); -#endif +if (!HAS_VIRTUAL_REGISTERS) +  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +else +  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); +  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); @@ -6473,9 +5916,17 @@ if (common->match_end_ptr != 0)  if (common->nltype == NLTYPE_FIXED && common->newline > 255)    {    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +    } +  else +    { +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); +    }    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); @@ -6503,9 +5954,15 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)    return;    } -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +if (HAS_VIRTUAL_REGISTERS) +  { +  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +  } +else +  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); +  /* Example: match /^/ to \r\n from offset 1. */ -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));  firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);  move_back(common, NULL, FALSE); @@ -6586,7 +6043,7 @@ if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &ma    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); -  if (sljit_get_register_index(TMP3) >= 0) +  if (!HAS_VIRTUAL_REGISTERS)      {      OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);      OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0); @@ -6693,7 +6150,7 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));  jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS)    {    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); @@ -6718,7 +6175,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);  JUMPHERE(jump);  OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS)    {    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); @@ -6737,7 +6194,11 @@ static void check_wordboundary(compiler_common *common)  DEFINE_COMPILER;  struct sljit_jump *skipread;  jump_list *skipread_list = NULL; -jump_list *invalid_utf = NULL; +#ifdef SUPPORT_UNICODE +struct sljit_label *valid_utf; +jump_list *invalid_utf1 = NULL; +#endif /* SUPPORT_UNICODE */ +jump_list *invalid_utf2 = NULL;  #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE  struct sljit_jump *jump;  #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ @@ -6751,14 +6212,30 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);  skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); -if (common->mode == PCRE2_JIT_COMPLETE) -  peek_char_back(common, READ_CHAR_MAX, &invalid_utf); +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) +  { +  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1); + +  if (common->mode != PCRE2_JIT_COMPLETE) +    { +    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); +    move_back(common, NULL, TRUE); +    check_start_used_ptr(common); +    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); +    } +  }  else +#endif /* SUPPORT_UNICODE */    { -  move_back(common, &invalid_utf, FALSE); -  check_start_used_ptr(common); -  /* No need precise read since match fails anyway. */ -  read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR); +  if (common->mode == PCRE2_JIT_COMPLETE) +    peek_char_back(common, READ_CHAR_MAX, NULL); +  else +    { +    move_back(common, NULL, TRUE); +    check_start_used_ptr(common); +    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR); +    }    }  /* Testing char type. */ @@ -6802,10 +6279,13 @@ JUMPHERE(skipread);  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  check_str_end(common, &skipread_list); -peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);  /* Testing char type. This is a code duplication. */  #ifdef SUPPORT_UNICODE + +valid_utf = LABEL(); +  if (common->use_ucp)    {    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); @@ -6851,13 +6331,19 @@ sljit_emit_fast_return(compiler, TMP1, 0);  #ifdef SUPPORT_UNICODE  if (common->invalid_utf)    { -  SLJIT_ASSERT(invalid_utf != NULL); +  set_jumps(invalid_utf1, LABEL()); + +  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); +  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); -  set_jumps(invalid_utf, LABEL());    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);    sljit_emit_fast_return(compiler, TMP1, 0); -  return; + +  set_jumps(invalid_utf2, LABEL()); +  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); +  sljit_emit_fast_return(compiler, TMP1, 0);    }  #endif /* SUPPORT_UNICODE */  } @@ -7225,7 +6711,7 @@ struct sljit_label *label;  int char1_reg;  int char2_reg; -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS)    {    char1_reg = STR_END;    char2_reg = STACK_TOP; @@ -7307,7 +6793,7 @@ int char2_reg;  int lcc_table;  int opt_type = 0; -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS)    {    char2_reg = STACK_TOP;    lcc_table = STACK_LIMIT; @@ -7790,8 +7276,6 @@ if (needstype || needsscript)    if (needsscript)      {  // PH hacking -//fprintf(stderr, "~~B\n"); -        OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);        OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); @@ -7845,7 +7329,6 @@ if (needstype || needsscript)      if (!needschar)        {  // PH hacking -//fprintf(stderr, "~~C\n");    OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); @@ -7860,7 +7343,6 @@ if (needstype || needsscript)      else        {  // PH hacking -//fprintf(stderr, "~~D\n");    OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);        OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); @@ -8174,14 +7656,24 @@ struct sljit_label *label;  switch(type)    {    case OP_SOD: -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +    } +  else +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));    return cc;    case OP_SOM: -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); +    } +  else +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));    return cc; @@ -8191,9 +7683,7 @@ switch(type)  #ifdef SUPPORT_UNICODE    if (common->invalid_utf)      { -    OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); -    add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS)); -    add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); +    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));      return cc;      }  #endif /* SUPPORT_UNICODE */ @@ -8267,17 +7757,24 @@ switch(type)      JUMPHERE(jump[3]);      }    JUMPHERE(jump[0]); -  check_partial(common, FALSE); +  if (common->mode != PCRE2_JIT_COMPLETE) +    check_partial(common, TRUE);    return cc;    case OP_EOD:    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); -  check_partial(common, FALSE); +  if (common->mode != PCRE2_JIT_COMPLETE) +    check_partial(common, TRUE);    return cc;    case OP_DOLL: -  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); -  OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); +    } +  else +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));    if (!common->endonly) @@ -8291,8 +7788,13 @@ switch(type)    case OP_DOLLM:    jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); -  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); -  OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); +    } +  else +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));    check_partial(common, FALSE);    jump[0] = JUMP(SLJIT_JUMP); @@ -8327,18 +7829,38 @@ switch(type)    return cc;    case OP_CIRC: -  OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); -  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); -  OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); -  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); +    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); +    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); +    } +  else +    { +    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); +    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); +    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); +    }    return cc;    case OP_CIRCM: -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); -  jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); -  OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); +  /* TMP2 might be used by peek_char_back. */ +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); +    } +  else +    { +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); +    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); +    OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); +    }    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32));    jump[0] = JUMP(SLJIT_JUMP);    JUMPHERE(jump[1]); @@ -8367,11 +7889,16 @@ switch(type)    length = GET(cc, 0);    if (length == 0)      return cc + LINK_SIZE; -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +  if (HAS_VIRTUAL_REGISTERS) +    { +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); +    } +  else +    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));  #ifdef SUPPORT_UNICODE    if (common->utf)      { -    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));      OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);      label = LABEL();      add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); @@ -8382,9 +7909,8 @@ switch(type)    else  #endif      { -    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); -    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); +    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));      }    check_start_used_ptr(common);    return cc + LINK_SIZE; @@ -8402,12 +7928,12 @@ static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)  PCRE2_SPTR start_subject = args->begin;  PCRE2_SPTR end_subject = args->end;  int lgb, rgb, ricount; -PCRE2_SPTR prevcc, startcc, bptr; +PCRE2_SPTR prevcc, endcc, bptr;  BOOL first = TRUE;  uint32_t c;  prevcc = cc; -startcc = NULL; +endcc = NULL;  do    {    GETCHARINC(c, cc); @@ -8416,7 +7942,7 @@ do    if (first)      {      lgb = rgb; -    startcc = cc; +    endcc = cc;      first = FALSE;      continue;      } @@ -8455,25 +7981,27 @@ do         lgb != ucp_gbExtended_Pictographic)      lgb = rgb; -  prevcc = startcc; -  startcc = cc; +  prevcc = endcc; +  endcc = cc;    }  while (cc < end_subject); -return startcc; +return endcc;  } +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ +  static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)  {  PCRE2_SPTR start_subject = args->begin;  PCRE2_SPTR end_subject = args->end;  int lgb, rgb, ricount; -PCRE2_SPTR prevcc, startcc, bptr; +PCRE2_SPTR prevcc, endcc, bptr;  BOOL first = TRUE;  uint32_t c;  prevcc = cc; -startcc = NULL; +endcc = NULL;  do    {    GETCHARINC_INVALID(c, cc, end_subject, break); @@ -8482,7 +8010,7 @@ do    if (first)      {      lgb = rgb; -    startcc = cc; +    endcc = cc;      first = FALSE;      continue;      } @@ -8520,16 +8048,14 @@ do         lgb != ucp_gbExtended_Pictographic)      lgb = rgb; -  prevcc = startcc; -  startcc = cc; +  prevcc = endcc; +  endcc = cc;    }  while (cc < end_subject); -return startcc; +return endcc;  } -#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ -  static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)  {  PCRE2_SPTR start_subject = args->begin; @@ -8538,7 +8064,10 @@ int lgb, rgb, ricount;  PCRE2_SPTR bptr;  uint32_t c; -GETCHARINC(c, cc); +/* Patch by PH */ +/* GETCHARINC(c, cc); */ +c = *cc++; +  #if PCRE2_CODE_UNIT_WIDTH == 32  if (c >= 0x110000)    return NULL; @@ -8800,8 +8329,10 @@ switch(type)    if (common->invalid_utf)      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));  #else -  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf)); -  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, +    common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); +  if (!common->utf || common->invalid_utf) +    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));  #endif    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); @@ -9198,8 +8729,6 @@ if (common->utf && *cc == OP_REFI)    CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);  // PH hacking -//fprintf(stderr, "~~E\n"); -    OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);    add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); @@ -10759,10 +10288,23 @@ if (ket != OP_KET || bra != OP_BRA)  if (offset != 0)    stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); +/* Skip and count the other alternatives. */ +i = 1; +while (*cc == OP_ALT) +  { +  cc += GET(cc, 1); +  i++; +  } +  if (has_alternatives)    {    if (opcode != OP_ONCE) -    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); +    { +    if (i <= 3) +      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); +    else +      BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); +    }    if (ket != OP_KETRMAX)      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();    } @@ -10851,9 +10393,6 @@ if (bra == OP_BRAMINZERO)  if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)    count_match(common); -/* Skip the other alternatives. */ -while (*cc == OP_ALT) -  cc += GET(cc, 1);  cc += 1 + LINK_SIZE;  if (opcode == OP_ONCE) @@ -11412,174 +10951,232 @@ switch(opcode)      JUMPTO(SLJIT_JUMP, label);      if (jump != NULL)        JUMPHERE(jump); +    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); +    break;      } -  else -    { -    charpos_enabled = FALSE; -    charpos_char = 0; -    charpos_othercasebit = 0; - -    if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) -      { -      charpos_enabled = TRUE;  #ifdef SUPPORT_UNICODE -      charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); -#endif -      if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) -        { -        charpos_othercasebit = char_get_othercase_bit(common, end + 1); -        if (charpos_othercasebit == 0) -          charpos_enabled = FALSE; -        } - -      if (charpos_enabled) -        { -        charpos_char = end[1]; -        /* Consumpe the OP_CHAR opcode. */ -        end += 2; -#if PCRE2_CODE_UNIT_WIDTH == 8 -        SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); -#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 -        SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); -        if ((charpos_othercasebit & 0x100) != 0) -          charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; +  else if (type == OP_ALLANY && !common->invalid_utf) +#else +  else if (type == OP_ALLANY)  #endif -        if (charpos_othercasebit != 0) -          charpos_char |= charpos_othercasebit; - -        BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; -        BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; -        BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; -        } -      } - -    if (charpos_enabled) +    { +    if (opcode == OP_STAR)        { -      if (opcode == OP_UPTO) -        OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); +      if (private_data_ptr == 0) +        allocate_stack(common, 2); -      /* Search the first instance of charpos_char. */ -      jump = JUMP(SLJIT_JUMP); -      label = LABEL(); -      if (opcode == OP_UPTO) -        { -        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -        add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); -        } -      compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); -      if (fast_str_ptr != 0) -        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); -      JUMPHERE(jump); +      OP1(SLJIT_MOV, base, offset0, STR_END, 0); +      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); -      detect_partial_match(common, &backtrack->topbacktracks); -      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); -      if (charpos_othercasebit != 0) -        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); -      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); +      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); +      process_partial_match(common); +      if (fast_str_ptr != 0) +        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0); +      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); +      break; +      } +#ifdef SUPPORT_UNICODE +    else if (!common->utf) +#else +    else +#endif +      {        if (private_data_ptr == 0)          allocate_stack(common, 2); -      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +        OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); -      if (opcode == OP_UPTO) -        { -        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -        add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); -        } +      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); -      /* Search the last instance of charpos_char. */ -      label = LABEL(); -      compile_char1_matchingpath(common, type, cc, &no_match, FALSE); -      if (fast_str_ptr != 0) -        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); -      detect_partial_match(common, &no_match); -      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); -      if (charpos_othercasebit != 0) -        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); -      if (opcode == OP_STAR) +      if (common->mode == PCRE2_JIT_COMPLETE)          { -        CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); -        OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +        OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); +        CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);          }        else          { -        jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); -        OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); +        process_partial_match(common);          JUMPHERE(jump);          } -      if (opcode == OP_UPTO) -        { -        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -        JUMPTO(SLJIT_NOT_ZERO, label); -        } -      else -        JUMPTO(SLJIT_JUMP, label); - -      set_jumps(no_match, LABEL()); -      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); -      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));        OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + +      if (fast_str_ptr != 0) +        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); +      break;        } -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -    else if (common->utf) +    } + +  charpos_enabled = FALSE; +  charpos_char = 0; +  charpos_othercasebit = 0; + +  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) +    { +#ifdef SUPPORT_UNICODE +    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); +#else +    charpos_enabled = TRUE; +#endif +    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))        { -      if (private_data_ptr == 0) -        allocate_stack(common, 2); +      charpos_othercasebit = char_get_othercase_bit(common, end + 1); +      if (charpos_othercasebit == 0) +        charpos_enabled = FALSE; +      } -      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); -      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +    if (charpos_enabled) +      { +      charpos_char = end[1]; +      /* Consumpe the OP_CHAR opcode. */ +      end += 2; +#if PCRE2_CODE_UNIT_WIDTH == 8 +      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 +      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); +      if ((charpos_othercasebit & 0x100) != 0) +        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; +#endif +      if (charpos_othercasebit != 0) +        charpos_char |= charpos_othercasebit; -      if (opcode == OP_UPTO) -        OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); +      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; +      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; +      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; +      } +    } -      label = LABEL(); -      compile_char1_matchingpath(common, type, cc, &no_match, TRUE); -      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +  if (charpos_enabled) +    { +    if (opcode == OP_UPTO) +      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); -      if (opcode == OP_UPTO) -        { -        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -        JUMPTO(SLJIT_NOT_ZERO, label); -        } -      else -        JUMPTO(SLJIT_JUMP, label); +    /* Search the first instance of charpos_char. */ +    jump = JUMP(SLJIT_JUMP); +    label = LABEL(); +    if (opcode == OP_UPTO) +      { +      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); +      add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); +      } +    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    JUMPHERE(jump); -      set_jumps(no_match, LABEL()); -      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); -      if (fast_str_ptr != 0) -        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    detect_partial_match(common, &backtrack->topbacktracks); +    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +    if (charpos_othercasebit != 0) +      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); +    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + +    if (private_data_ptr == 0) +      allocate_stack(common, 2); +    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +    if (opcode == OP_UPTO) +      { +      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); +      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); +      } + +    /* Search the last instance of charpos_char. */ +    label = LABEL(); +    compile_char1_matchingpath(common, type, cc, &no_match, FALSE); +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    detect_partial_match(common, &no_match); +    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +    if (charpos_othercasebit != 0) +      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); +    if (opcode == OP_STAR) +      { +      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); +      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);        } -#endif      else        { -      if (private_data_ptr == 0) -        allocate_stack(common, 2); +      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); +      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +      JUMPHERE(jump); +      } -      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); -      if (opcode == OP_UPTO) -        OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); +    if (opcode == OP_UPTO) +      { +      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); +      JUMPTO(SLJIT_NOT_ZERO, label); +      } +    else +      JUMPTO(SLJIT_JUMP, label); -      label = LABEL(); -      detect_partial_match(common, &no_match); -      compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); -      if (opcode == OP_UPTO) -        { -        OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -        JUMPTO(SLJIT_NOT_ZERO, label); -        OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -        } -      else -        JUMPTO(SLJIT_JUMP, label); +    set_jumps(no_match, LABEL()); +    OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); +    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +    } +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +  else if (common->utf) +    { +    if (private_data_ptr == 0) +      allocate_stack(common, 2); -      set_jumps(no_char1_match, LABEL()); -      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -      set_jumps(no_match, LABEL()); -      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); -      if (fast_str_ptr != 0) -        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + +    if (opcode == OP_UPTO) +      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + +    detect_partial_match(common, &no_match); +    label = LABEL(); +    compile_char1_matchingpath(common, type, cc, &no_match, FALSE); +    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + +    if (opcode == OP_UPTO) +      { +      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); +      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); +      } + +    detect_partial_match_to(common, label); +    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +    set_jumps(no_match, LABEL()); +    OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    } +#endif +  else +    { +    if (private_data_ptr == 0) +      allocate_stack(common, 2); + +    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +    if (opcode == OP_UPTO) +      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + +    detect_partial_match(common, &no_match); +    label = LABEL(); +    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +    if (opcode == OP_UPTO) +      { +      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); +      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));        } + +    detect_partial_match_to(common, label); +    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +    set_jumps(no_char1_match, LABEL()); +    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +    set_jumps(no_match, LABEL()); +    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);      } +    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();    break; @@ -11616,25 +11213,47 @@ switch(opcode)    break;    case OP_POSSTAR: +#if defined SUPPORT_UNICODE +  if (type == OP_ALLANY && !common->invalid_utf) +#else +  if (type == OP_ALLANY) +#endif +    { +    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); +    process_partial_match(common); +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0); +    break; +    } +  #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32    if (common->utf)      {      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); +    detect_partial_match(common, &no_match);      label = LABEL(); -    compile_char1_matchingpath(common, type, cc, &no_match, TRUE); +    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);      OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); -    JUMPTO(SLJIT_JUMP, label); +    detect_partial_match_to(common, label); +      set_jumps(no_match, LABEL());      OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);      if (fast_str_ptr != 0) -      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +      { +      if (tmp_base == TMP3) +        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, TMP3, 0); +      else +        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +      }      break;      }  #endif -  label = LABEL();    detect_partial_match(common, &no_match); +  label = LABEL();    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); -  JUMPTO(SLJIT_JUMP, label); +  detect_partial_match_to(common, label); +  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +    set_jumps(no_char1_match, LABEL());    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    set_jumps(no_match, LABEL()); @@ -11649,23 +11268,52 @@ switch(opcode)      {      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + +    detect_partial_match(common, &no_match);      label = LABEL(); -    compile_char1_matchingpath(common, type, cc, &no_match, TRUE); +    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -    JUMPTO(SLJIT_NOT_ZERO, label); +    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); +    detect_partial_match_to(common, label); +      set_jumps(no_match, LABEL());      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);      break;      }  #endif + +  if (type == OP_ALLANY) +    { +    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + +    if (common->mode == PCRE2_JIT_COMPLETE) +      { +      OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); +      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); +      } +    else +      { +      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); +      process_partial_match(common); +      JUMPHERE(jump); +      } + +    if (fast_str_ptr != 0) +      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); +    break; +    } +    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); -  label = LABEL(); +    detect_partial_match(common, &no_match); +  label = LABEL();    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); -  JUMPTO(SLJIT_NOT_ZERO, label); +  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); +  detect_partial_match_to(common, label);    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +    set_jumps(no_char1_match, LABEL());    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));    set_jumps(no_match, LABEL()); @@ -11719,8 +11367,15 @@ if (common->accept_label == NULL)    add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));  else    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + +if (HAS_VIRTUAL_REGISTERS) +  { +  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); +  } +else +  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); +  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));  OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); @@ -11728,7 +11383,8 @@ if (common->accept_label == NULL)    add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));  else    JUMPTO(SLJIT_ZERO, common->accept_label); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));  if (common->accept_label == NULL)    add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));  else @@ -11778,10 +11434,11 @@ if (opcode == OP_SKIP)  if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)    { -  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +  if (HAS_VIRTUAL_REGISTERS) +    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); -  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); +  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);    }  return ccend; @@ -12072,11 +11729,12 @@ while (cc < ccend)      SLJIT_ASSERT(common->mark_ptr != 0);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);      allocate_stack(common, common->has_skip_arg ? 5 : 1); -    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +    if (HAS_VIRTUAL_REGISTERS) +      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); -    OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); +    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);      if (common->has_skip_arg)        {        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); @@ -12403,16 +12061,15 @@ PCRE2_SPTR ccprev;  PCRE2_UCHAR bra = OP_BRA;  PCRE2_UCHAR ket;  assert_backtrack *assert; -sljit_uw *next_update_addr = NULL;  BOOL has_alternatives;  BOOL needs_control_head = FALSE;  struct sljit_jump *brazero = NULL; -struct sljit_jump *alt1 = NULL; -struct sljit_jump *alt2 = NULL; +struct sljit_jump *next_alt = NULL;  struct sljit_jump *once = NULL;  struct sljit_jump *cond = NULL;  struct sljit_label *rmin_label = NULL;  struct sljit_label *exact_label = NULL; +struct sljit_put_label *put_label = NULL;  if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)    { @@ -12561,7 +12218,7 @@ else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)      free_stack(common, 1);      alt_max = 2; -    alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); +    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);      }    }  else if (has_alternatives) @@ -12569,21 +12226,15 @@ else if (has_alternatives)    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));    free_stack(common, 1); -  if (alt_max > 4) +  if (alt_max > 3)      { -    /* Table jump if alt_max is greater than 4. */ -    next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); -    if (SLJIT_UNLIKELY(next_update_addr == NULL)) -      return; -    sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); -    add_label_addr(common, next_update_addr++); +    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + +    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label); +    sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());      }    else -    { -    if (alt_max == 4) -      alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); -    alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); -    } +    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);    }  COMPILE_BACKTRACKINGPATH(current->top); @@ -12620,7 +12271,7 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (has_alternatives)    { -  alt_count = sizeof(sljit_uw); +  alt_count = 1;    do      {      current->top = NULL; @@ -12699,7 +12350,12 @@ if (has_alternatives)        stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);      if (opcode != OP_ONCE) -      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); +      { +      if (alt_max <= 3) +        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); +      else +        put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); +      }      if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)        { @@ -12712,24 +12368,18 @@ if (has_alternatives)      if (opcode != OP_ONCE)        { -      if (alt_max > 4) -        add_label_addr(common, next_update_addr++); -      else +      if (alt_max <= 3)          { -        if (alt_count != 2 * sizeof(sljit_uw)) -          { -          JUMPHERE(alt1); -          if (alt_max == 3 && alt_count == sizeof(sljit_uw)) -            alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); -          } -        else +        JUMPHERE(next_alt); +        alt_count++; +        if (alt_count < alt_max)            { -          JUMPHERE(alt2); -          if (alt_max == 4) -            alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); +          SLJIT_ASSERT(alt_count == 2 && alt_max == 3); +          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);            }          } -      alt_count += sizeof(sljit_uw); +      else +        sljit_set_put_label(put_label, LABEL());        }      COMPILE_BACKTRACKINGPATH(current->top); @@ -13219,11 +12869,10 @@ int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_c  int alt_count, alt_max, local_size;  backtrack_common altbacktrack;  jump_list *match = NULL; -sljit_uw *next_update_addr = NULL; -struct sljit_jump *alt1 = NULL; -struct sljit_jump *alt2 = NULL; +struct sljit_jump *next_alt = NULL;  struct sljit_jump *accept_exit = NULL;  struct sljit_label *quit; +struct sljit_put_label *put_label;  /* Recurse captures then. */  common->then_trap = NULL; @@ -13284,7 +12933,12 @@ while (1)    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);    if (alt_max > 1 || has_accept) -    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); +    { +    if (alt_max > 3) +      put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); +    else +      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); +    }    add_jump(compiler, &match, JUMP(SLJIT_JUMP)); @@ -13298,7 +12952,7 @@ while (1)      sljit_emit_fast_enter(compiler, TMP1, 0);      if (has_accept) -      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw)); +      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));      /* Save return address. */ @@ -13311,44 +12965,30 @@ while (1)        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));        free_stack(common, 2); -      if (alt_max > 4) +      if (alt_max > 3)          { -          /* Table jump if alt_max is greater than 4. */ -          next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); -          if (SLJIT_UNLIKELY(next_update_addr == NULL)) -            return; -          sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); -          add_label_addr(common, next_update_addr++); +        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); +        sljit_set_put_label(put_label, LABEL());          }        else -        { -        if (alt_max == 4) -          alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); -        alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); -        } +        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);        }      else        free_stack(common, has_accept ? 2 : 1);      } -  else if (alt_max > 4) -    add_label_addr(common, next_update_addr++); +  else if (alt_max > 3) +    sljit_set_put_label(put_label, LABEL());    else      { -    if (alt_count != 2 * sizeof(sljit_uw)) -      { -      JUMPHERE(alt1); -      if (alt_max == 3 && alt_count == sizeof(sljit_uw)) -        alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); -      } -    else +    JUMPHERE(next_alt); +    if (alt_count + 1 < alt_max)        { -      JUMPHERE(alt2); -      if (alt_max == 4) -        alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); +      SLJIT_ASSERT(alt_count == 1 && alt_max == 3); +      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);        }      } -  alt_count += sizeof(sljit_uw); +  alt_count++;    compile_backtrackingpath(common, altbacktrack.top);    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) @@ -13409,7 +13049,7 @@ if (common->accept != NULL)    OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);    allocate_stack(common, 2); -  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); +  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);    }  set_jumps(match, LABEL()); @@ -13444,7 +13084,6 @@ executable_functions *functions;  void *executable_func;  sljit_uw executable_size;  sljit_uw total_length; -label_addr_list *label_addr;  struct sljit_label *mainloop_label = NULL;  struct sljit_label *continue_match_label;  struct sljit_label *empty_match_found_label = NULL; @@ -13459,6 +13098,14 @@ struct sljit_jump *end_anchor_failed = NULL;  SLJIT_ASSERT(tables); +#if HAS_VIRTUAL_REGISTERS == 1 +SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0); +#elif HAS_VIRTUAL_REGISTERS == 0 +SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0); +#else +#error "Invalid value for HAS_VIRTUAL_REGISTERS" +#endif +  memset(&rootbacktrack, 0, sizeof(backtrack_common));  memset(common, 0, sizeof(compiler_common));  common->re = re; @@ -13476,6 +13123,7 @@ common->fcc = tables + fcc_offset;  common->lcc = (sljit_sw)(tables + lcc_offset);  common->mode = mode;  common->might_be_empty = re->minlength == 0; +common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0;  common->nltype = NLTYPE_FIXED;  switch(re->newline_convention)    { @@ -13742,7 +13390,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    sljit_free_compiler(compiler);    SLJIT_FREE(common->optimized_cbracket, allocator_data);    SLJIT_FREE(common->private_data_ptrs, allocator_data); -  PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); +  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);    return PCRE2_ERROR_NOMEMORY;    } @@ -13796,7 +13444,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))    sljit_free_compiler(compiler);    SLJIT_FREE(common->optimized_cbracket, allocator_data);    SLJIT_FREE(common->private_data_ptrs, allocator_data); -  PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); +  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);    return PCRE2_ERROR_NOMEMORY;    } @@ -13885,7 +13533,7 @@ while (common->currententry != NULL)      sljit_free_compiler(compiler);      SLJIT_FREE(common->optimized_cbracket, allocator_data);      SLJIT_FREE(common->private_data_ptrs, allocator_data); -    PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); +    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);      return PCRE2_ERROR_NOMEMORY;      }    flush_stubs(common); @@ -14028,16 +13676,11 @@ SLJIT_FREE(common->private_data_ptrs, allocator_data);  executable_func = sljit_generate_code(compiler);  executable_size = sljit_get_generated_code_size(compiler); -label_addr = common->label_addrs; -while (label_addr != NULL) -  { -  *label_addr->update_addr = sljit_get_label_addr(label_addr->label); -  label_addr = label_addr->next; -  }  sljit_free_compiler(compiler); +  if (executable_func == NULL)    { -  PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); +  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);    return PCRE2_ERROR_NOMEMORY;    } @@ -14052,7 +13695,7 @@ else      /* This case is highly unlikely since we just recently      freed a lot of memory. Not impossible though. */      sljit_free_code(executable_func); -    PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); +    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);      return PCRE2_ERROR_NOMEMORY;      }    memset(functions, 0, sizeof(executable_functions)); @@ -14097,18 +13740,12 @@ Returns:        0: success or (*NOJIT) was used  PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION  pcre2_jit_compile(pcre2_code *code, uint32_t options)  { -#ifndef SUPPORT_JIT - -(void)code; -(void)options; -return PCRE2_ERROR_JIT_BADOPTION; - -#else  /* SUPPORT_JIT */ -  pcre2_real_code *re = (pcre2_real_code *)code; -executable_functions *functions; -uint32_t excluded_options; -int result; + +#ifdef SUPPORT_JIT +executable_functions *functions = (executable_functions *)re->executable_jit; +static int executable_allocator_is_working = 0; +#endif  if (code == NULL)    return PCRE2_ERROR_NULL; @@ -14116,30 +13753,98 @@ if (code == NULL)  if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)    return PCRE2_ERROR_JIT_BADOPTION; +/* Support for invalid UTF was first introduced in JIT, with the option +PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the +compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the +preferred feature, with the earlier option deprecated. However, for backward +compatibility, if the earlier option is set, it forces the new option so that +if JIT matching falls back to the interpreter, there is still support for +invalid UTF. However, if this function has already been successfully called +without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that +non-invalid-supporting JIT code was compiled), give an error. + +If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following +actions are needed: + +  1. Remove the definition from pcre2.h.in and from the list in +     PUBLIC_JIT_COMPILE_OPTIONS above. + +  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module. + +  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c. + +  4. Delete the following short block of code. The setting of "re" and +     "functions" can be moved into the JIT-only block below, but if that is +     done, (void)re and (void)functions will be needed in the non-JIT case, to +     avoid compiler warnings. +*/ + +if ((options & PCRE2_JIT_INVALID_UTF) != 0) +  { +  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) +    { +#ifdef SUPPORT_JIT +    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION; +#endif +    re->overall_options |= PCRE2_MATCH_INVALID_UTF; +    } +  } + +/* The above tests are run with and without JIT support. This means that +PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring +interpreter support) even in the absence of JIT. But now, if there is no JIT +support, give an error return. */ + +#ifndef SUPPORT_JIT +return PCRE2_ERROR_JIT_BADOPTION; +#else  /* SUPPORT_JIT */ + +/* There is JIT support. Do the necessary. */ +  if ((re->flags & PCRE2_NOJIT) != 0) return 0; -functions = (executable_functions *)re->executable_jit; +if (executable_allocator_is_working == 0) +  { +  /* Checks whether the executable allocator is working. This check +     might run multiple times in multi-threaded environments, but the +     result should not be affected by it. */ +  void *ptr = SLJIT_MALLOC_EXEC(32); + +  executable_allocator_is_working = -1; + +  if (ptr != NULL) +    { +    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr)); +    executable_allocator_is_working = 1; +    } +  } + +if (executable_allocator_is_working < 0) +  return PCRE2_ERROR_NOMEMORY; + +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) +  options |= PCRE2_JIT_INVALID_UTF;  if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL      || functions->executable_funcs[0] == NULL)) { -  excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); -  result = jit_compile(code, options & ~excluded_options); +  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); +  int result = jit_compile(code, options & ~excluded_options);    if (result != 0)      return result;    }  if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL      || functions->executable_funcs[1] == NULL)) { -  excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); -  result = jit_compile(code, options & ~excluded_options); +  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); +  int result = jit_compile(code, options & ~excluded_options);    if (result != 0)      return result;    }  if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL      || functions->executable_funcs[2] == NULL)) { -  excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); -  result = jit_compile(code, options & ~excluded_options); +  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); +  int result = jit_compile(code, options & ~excluded_options);    if (result != 0)      return result;    }  |