diff options
Diffstat (limited to 'thirdparty/pcre2/src/sljit/sljitNativeARM_64.c')
-rw-r--r-- | thirdparty/pcre2/src/sljit/sljitNativeARM_64.c | 637 |
1 files changed, 383 insertions, 254 deletions
diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c index 3f0f5fcc30..96453b4abe 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -48,19 +48,20 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 }; -#define W_OP (1u << 31) -#define RD(rd) (reg_map[rd]) -#define RT(rt) (reg_map[rt]) -#define RN(rn) (reg_map[rn] << 5) -#define RT2(rt2) (reg_map[rt2] << 10) -#define RM(rm) (reg_map[rm] << 16) -#define VD(vd) (freg_map[vd]) -#define VT(vt) (freg_map[vt]) -#define VN(vn) (freg_map[vn] << 5) -#define VM(vm) (freg_map[vm] << 16) +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RT(rt) ((sljit_ins)reg_map[rt]) +#define RN(rn) ((sljit_ins)reg_map[rn] << 5) +#define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10) +#define RM(rm) ((sljit_ins)reg_map[rm] << 16) +#define VD(vd) ((sljit_ins)freg_map[vd]) +#define VT(vt) ((sljit_ins)freg_map[vt]) +#define VT2(vt) ((sljit_ins)freg_map[vt] << 10) +#define VN(vn) ((sljit_ins)freg_map[vn] << 5) +#define VM(vm) ((sljit_ins)freg_map[vm] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -96,8 +97,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FNEG 0x1e614000 #define FSUB 0x1e603800 #define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 #define LDP 0xa9400000 -#define LDP_PRE 0xa9c00000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 #define LDR_PRE 0xf8400c00 #define LSLV 0x9ac02000 #define LSRV 0x9ac02400 @@ -117,10 +120,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 #define STP 0xa9000000 +#define STP_F64 0x6d000000 #define STP_PRE 0xa9800000 #define STRB 0x38206800 #define STRBI 0x39000000 #define STRI 0xf9000000 +#define STRI_F64 0xfd000000 #define STR_FI 0x3d000000 #define STR_FR 0x3c206800 #define STUR_FI 0x3c000000 @@ -145,10 +150,10 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); - return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) @@ -171,14 +176,14 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->addr -= sizeof(sljit_ins); jump->flags |= PATCH_COND; return 5; } - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { @@ -231,8 +236,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; - sljit_s32 dst; + sljit_sw addr; + sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; @@ -271,7 +276,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -300,7 +305,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -313,58 +318,58 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); - buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); if (jump->flags & IS_COND) buf_ptr[-1] -= (4 << 5); break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); - buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); break; } - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); dst = buf_ptr[0] & 0x1f; - buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); - buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); if (jump->flags & PATCH_ABS64) - buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); } while (0); jump = jump->next; } put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; + addr = (sljit_sw)put_label->label->addr; + buf_ptr = (sljit_ins*)put_label->addr; - buf_ptr[0] |= (addr & 0xffff) << 5; - buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5; + buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; + buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; if (put_label->flags >= 1) - buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5; + buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; if (put_label->flags >= 2) - buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5; + buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; put_label = put_label->next; } compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -426,11 +431,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) value >>= 1; \ } -#define LOGICAL_IMM_CHECK 0x100 +#define LOGICAL_IMM_CHECK (sljit_ins)0x100 -static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) +static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len) { - sljit_s32 negated, ones, right; + sljit_s32 negated; + sljit_u32 ones, right; sljit_uw mask, uimm; sljit_ins ins; @@ -497,30 +503,30 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) { sljit_uw imm = (sljit_uw)simm; - sljit_s32 i, zeros, ones, first; + sljit_u32 i, zeros, ones, first; sljit_ins bitmask; /* Handling simple immediates first. */ if (imm <= 0xffff) - return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5)); if (simm < 0 && simm >= -0x10000) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if (imm <= 0xffffffffl) { if ((imm & 0xffff) == 0) - return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21)); if ((imm & 0xffff0000l) == 0xffff0000) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); bitmask = logical_imm(simm, 16); if (bitmask != 0) return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } bitmask = logical_imm(simm, 32); @@ -529,10 +535,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, if (simm < 0 && simm >= -0x100000000l) { if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ @@ -558,10 +564,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -574,10 +580,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -619,12 +625,11 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (op) { case SLJIT_MUL: - case SLJIT_NEG: case SLJIT_CLZ: case SLJIT_ADDC: case SLJIT_SUBC: @@ -639,40 +644,43 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); goto set_flags; case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = -imm; /* Fall through. */ case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); } if (imm > 0 && imm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10)); } nimm = -imm; if (nimm > 0 && nimm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10)); } if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)); } if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)); } if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10)); } if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10)); } break; case SLJIT_AND: @@ -697,11 +705,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) + | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10))); } goto set_flags; case SLJIT_LSHR: @@ -712,11 +722,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s inv_bits |= 1 << 30; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | ((sljit_ins)imm << 16) | (31 << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10))); } goto set_flags; default: @@ -766,41 +778,38 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (!(flags & INT_OP)) inv_bits |= 1 << 22; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); - case SLJIT_MOV_U32: + case SLJIT_MOV32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) + if (dst == arg2) return SLJIT_SUCCESS; + /* fallthrough */ + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) - return SLJIT_SUCCESS; return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); case SLJIT_NOT: SLJIT_ASSERT(arg1 == TMP_REG1); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); break; /* Set flags. */ - case SLJIT_NEG: - SLJIT_ASSERT(arg1 == TMP_REG1); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if (flags & SET_FLAGS) - inv_bits |= 1 << 29; - return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: @@ -852,7 +861,7 @@ set_flags: #define INT_SIZE 0x2 #define WORD_SIZE 0x3 -#define MEM_SIZE_SHIFT(flags) ((flags) & 0x3) +#define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3) static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) @@ -872,35 +881,34 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, STRB | type | RT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { - if ((argw >> shift) <= 0xfff) { - return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift))); - } + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, tmp_reg, argw)); @@ -915,39 +923,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, offs, prev, saved_regs_size; + sljit_s32 prev, fprev, saved_regs_size, i, tmp; + sljit_s32 word_arg_count = 0; + sljit_ins offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size + saved_regs_size; + local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + compiler->local_size = local_size; - FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + if (local_size <= 512) { + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size = 0; + } else { + saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf; -#ifdef _WIN32 - if (local_size >= 4096) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); - else if (local_size > 256) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10))); -#endif + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10))); + offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size -= saved_regs_size; + SLJIT_ASSERT(local_size > 0); + } - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -957,84 +970,124 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } - if (prev != -1) - FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + fprev = -1; + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10))); + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } - args = get_arg_count(arg_types); + if (fprev != -1) + FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); - if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + arg_types >>= SLJIT_ARG_SHIFT; + +#ifdef _WIN32 + if (local_size > 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); +#endif /* _WIN32 */ + + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + tmp++; + } + word_arg_count++; + } + arg_types >>= SLJIT_ARG_SHIFT; + } #ifdef _WIN32 - if (local_size >= 4096) { + if (local_size > 4096) { if (local_size < 4 * 4096) { /* No need for a loop. */ - if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } + if (local_size >= 3 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - local_size -= 4096; + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } } else { - FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10))); + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10))); FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - - local_size &= 0xfff; } - if (local_size > 256) { - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12))); + local_size &= 0xfff; - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + if (local_size > 0) + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + else + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); } - else if (local_size > 256) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + + if (local_size > 0) { + if (local_size <= 512) + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + else { + if (local_size >= 4096) + local_size = (1 << (22 - 10)); + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12))); #else /* !_WIN32 */ /* The local_size does not include saved registers size. */ - if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); - local_size &= 0xfff; + if (local_size != 0) { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + if (local_size > 512 || local_size == 0) { + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); } - if (local_size != 0) - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); #endif /* _WIN32 */ - return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, @@ -1048,57 +1101,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf); + compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_s32 local_size; - sljit_s32 i, tmp, offs, prev, saved_regs_size; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + sljit_s32 local_size, prev, fprev, i, tmp; + sljit_ins offs; - local_size = compiler->local_size - saved_regs_size; + local_size = compiler->local_size; - /* Load LR as early as possible. */ - if (local_size == 0) + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); - else if (local_size < 63 * sizeof(sljit_sw)) { - FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | (local_size << (15 - 3)))); - } - else { + + if (local_size > 512) { + local_size -= 512; if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) + | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; } - if (local_size) - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + local_size = 512; } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -1108,15 +1153,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } + fprev = -1; + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + if (prev != -1) - FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + /* This and the next call/jump instruction can be executed parallelly. */ + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); - /* These two can be executed in parallel. */ - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10))); return push_inst(compiler, RET | RN(TMP_LR)); } @@ -1126,7 +1206,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1171,13 +1251,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { /* Both operands are registers. */ if (dst_r != TMP_REG1 && FAST_IS_REG(src)) - return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); switch (op) { case SLJIT_MOV: @@ -1210,6 +1290,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_u32)srcw; break; case SLJIT_MOV_S32: + case SLJIT_MOV32: mem_flags = INT_SIZE | SIGNED; if (src & SLJIT_IMM) srcw = (sljit_s32)srcw; @@ -1235,14 +1316,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op_flags & SLJIT_I32_OP) { + if (op_flags & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); src = TMP_REG2; @@ -1263,24 +1341,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 dst_r, flags, mem_flags; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) + if (dst == TMP_REG1) flags |= UNUSED_RETURN; if (src1 & SLJIT_MEM) { @@ -1310,6 +1385,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1363,8 +1452,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1391,34 +1481,34 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { if ((argw >> shift) <= 0xfff) - return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); @@ -1429,13 +1519,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); src = TMP_FREG1; } @@ -1451,7 +1541,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) inv_bits |= W_OP; @@ -1471,7 +1561,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1479,8 +1569,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); @@ -1499,7 +1589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; sljit_ins inv_bits; CHECK_ERROR(); @@ -1507,7 +1597,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { @@ -1531,7 +1621,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); break; } @@ -1545,8 +1635,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1605,7 +1695,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: @@ -1616,10 +1706,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x0; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x2; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x3; @@ -1645,15 +1745,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x6; @@ -1709,9 +1811,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -1724,7 +1832,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; - sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1775,7 +1883,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; @@ -1786,8 +1894,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1825,7 +1950,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -1849,14 +1974,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0; sljit_ins cc; CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - if (dst_reg & SLJIT_I32_OP) + if (dst_reg & SLJIT_32) srcw = (sljit_s32)srcw; FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; @@ -1864,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil } cc = get_cc(compiler, type & 0xff); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); } @@ -1891,17 +2016,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile break; case SLJIT_MOV_S8: sign = 1; + /* fallthrough */ case SLJIT_MOV_U8: inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S16: sign = 1; + /* fallthrough */ case SLJIT_MOV_U16: inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S32: sign = 1; + /* fallthrough */ case SLJIT_MOV_U32: + case SLJIT_MOV32: inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; break; default: @@ -1916,7 +2045,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, @@ -1936,7 +2065,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil inst = STUR_FI | 0x80000400; - if (!(type & SLJIT_F32_OP)) + if (!(type & SLJIT_32)) inst |= 0x40000000; if (!(type & SLJIT_MEM_STORE)) @@ -1945,7 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) @@ -1955,11 +2084,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c CHECK_ERROR(); CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); - - SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + /* Not all instruction forms support accessing SP register. */ if (offset <= 0xffffff && offset >= -0xffffff) { ins = ADDI; if (offset < 0) { @@ -1968,13 +2097,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c } if (offset <= 0xfff) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10))); else { - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); offset &= 0xfff; if (offset != 0) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10))); } } else { @@ -2002,7 +2131,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value)); if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); @@ -2034,17 +2163,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; - sljit_s32 dst; + sljit_u32 dst; SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); dst = inst[0] & 0x1f; SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); - inst[0] = MOVZ | dst | ((new_target & 0xffff) << 5); - inst[1] = MOVK | dst | (((new_target >> 16) & 0xffff) << 5) | (1 << 21); - inst[2] = MOVK | dst | (((new_target >> 32) & 0xffff) << 5) | (2 << 21); - inst[3] = MOVK | dst | ((new_target >> 48) << 5) | (3 << 21); + inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); @@ -2053,5 +2182,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } |