diff options
Diffstat (limited to 'thirdparty/pcre2/src/sljit/sljitNativeS390X.c')
-rw-r--r-- | thirdparty/pcre2/src/sljit/sljitNativeS390X.c | 973 |
1 files changed, 662 insertions, 311 deletions
diff --git a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c index 716491ec72..8eef910c42 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -44,6 +44,9 @@ typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; @@ -97,20 +100,37 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac * link register doesn't need to change */ +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ static const sljit_gpr link_r = 14; /* r14 */ static const sljit_gpr fast_link_r = 0; /* r0 */ -/* Flag register layout: +#define TMP_FREG1 (0) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) - 0 32 33 34 36 64 - +---------------+---+---+-------+-------+ - | ZERO | 0 | 0 | C C |///////| - +---------------+---+---+-------+-------+ -*/ -static const sljit_gpr flag_r = 14; /* r14 */ +#define R0(r) ((sljit_ins)reg_map[r]) + +#define F0(r) ((sljit_ins)freg_map[r]) +#define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F36(r) (R36A((sljit_ins)freg_map[r])) struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -124,19 +144,25 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) return reg_map[r]; } +static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0]))); + return freg_map[r]; +} + /* Size of instruction in bytes. Tags must already be cleared. */ static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) { /* keep faulting instructions */ if (ins == 0) - return 2; + return 2; if ((ins & 0x00000000ffffL) == ins) - return 2; + return 2; if ((ins & 0x0000ffffffffL) == ins) - return 4; + return 4; if ((ins & 0xffffffffffffL) == ins) - return 6; + return 6; SLJIT_UNREACHABLE(); return (sljit_uw)-1; @@ -172,7 +198,8 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) } #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ - (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) /* Map the given type to a 4-bit condition code mask. */ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { @@ -191,6 +218,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc0 | cc3); return (cc0 | cc2); } + /* fallthrough */ case SLJIT_EQUAL_F64: return cc0; @@ -204,6 +232,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc1 | cc2); return (cc1 | cc3); } + /* fallthrough */ case SLJIT_NOT_EQUAL_F64: return (cc1 | cc2 | cc3); @@ -228,10 +257,20 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_LESS_F64: return cc1; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: return (cc0 | cc1); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + case SLJIT_SIG_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -242,6 +281,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc2 | cc3); + /* fallthrough */ case SLJIT_UNORDERED_F64: return cc3; @@ -249,6 +289,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_NOT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc0 | cc1); + /* fallthrough */ case SLJIT_ORDERED_F64: return (cc0 | cc1 | cc2); @@ -444,7 +485,7 @@ SLJIT_S390X_RR(or, 0x1600) #define SLJIT_S390X_RRE(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ { \ - return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ + return (pattern) | R4A(dst) | R0A(src); \ } /* AND */ @@ -504,7 +545,7 @@ SLJIT_S390X_RRE(sgr, 0xb9090000) #define SLJIT_S390X_RIA(name, pattern, imm_type) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ - return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ } /* ADD HALFWORD IMMEDIATE */ @@ -534,7 +575,7 @@ SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ SLJIT_ASSERT(have_eimm()); \ - return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ } /* ADD IMMEDIATE */ @@ -567,17 +608,11 @@ SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) /* RX-a form instructions */ #define SLJIT_S390X_RXA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT((d & 0xfff) == d); \ - ri = (sljit_ins)(r & 0xf) << 20; \ - xi = (sljit_ins)(x & 0xf) << 16; \ - bi = (sljit_ins)(b & 0xf) << 12; \ - di = (sljit_ins)(d & 0xfff); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ } /* LOAD */ @@ -607,15 +642,9 @@ SLJIT_S390X_RXA(sth, 0x40000000) #define SLJIT_S390X_RXYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT(cond); \ - ri = (sljit_ins)(r & 0xf) << 36; \ - xi = (sljit_ins)(x & 0xf) << 32; \ - bi = (sljit_ins)(b & 0xf) << 28; \ - di = disp_s20(d); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \ } /* LOAD */ @@ -660,17 +689,11 @@ SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) /* RSY-a instructions */ #define SLJIT_S390X_RSYA(name, pattern, cond) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \ { \ - sljit_ins r1, r3, b2, d2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r3 = (sljit_ins)(src & 0xf) << 32; \ - b2 = (sljit_ins)(b & 0xf) << 28; \ - d2 = disp_s20(d); \ \ - return (pattern) | r1 | r3 | b2 | d2; \ + return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \ } /* LOAD MULTIPLE */ @@ -691,16 +714,14 @@ SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) #define SLJIT_S390X_RIEF(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ { \ - sljit_ins r1, r2, i3, i4, i5; \ + sljit_ins i3, i4, i5; \ \ SLJIT_ASSERT(have_genext()); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r2 = (sljit_ins)(src & 0xf) << 32; \ i3 = (sljit_ins)start << 24; \ i4 = (sljit_ins)end << 16; \ i5 = (sljit_ins)rot << 8; \ \ - return (pattern) | r1 | r2 | i3 | i4 | i5; \ + return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \ } /* ROTATE THEN AND SELECTED BITS */ @@ -728,14 +749,12 @@ SLJIT_S390X_RIEF(risbhg, 0xec000000005d) #define SLJIT_S390X_RRFC(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ { \ - sljit_ins r1, r2, m3; \ + sljit_ins m3; \ \ SLJIT_ASSERT(have_lscond1()); \ - r1 = (sljit_ins)(dst & 0xf) << 4; \ - r2 = (sljit_ins)(src & 0xf); \ m3 = (sljit_ins)(mask & 0xf) << 12; \ \ - return (pattern) | m3 | r1 | r2; \ + return (pattern) | m3 | R4A(dst) | R0A(src); \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -748,14 +767,13 @@ SLJIT_S390X_RRFC(locgr, 0xb9e20000) #define SLJIT_S390X_RIEG(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ { \ - sljit_ins r1, m3, i2; \ + sljit_ins m3, i2; \ \ SLJIT_ASSERT(have_lscond2()); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ m3 = (sljit_ins)(mask & 0xf) << 32; \ i2 = (sljit_ins)(imm & 0xffffL) << 16; \ \ - return (pattern) | r1 | m3 | i2; \ + return (pattern) | R36A(reg) | m3 | i2; \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -767,13 +785,9 @@ SLJIT_S390X_RIEG(locghi, 0xec0000000046) #define SLJIT_S390X_RILB(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ { \ - sljit_ins r1, ri2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ - ri2 = (sljit_ins)(ri & 0xffffffff); \ \ - return (pattern) | r1 | ri2; \ + return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \ } /* BRANCH RELATIVE AND SAVE LONG */ @@ -808,22 +822,20 @@ SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) { - sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8; - sljit_ins r2 = ((sljit_ins)src & 0xf); SLJIT_ASSERT(have_eimm()); - return 0xb9830000 | r1 | r2; + return 0xb9830000 | R8A(dst) | R0A(src); } /* INSERT PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) { - return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); + return 0xb2220000 | R4A(dst); } /* SET PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) { - return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); + return 0x0400 | R4A(dst); } /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ @@ -842,12 +854,12 @@ static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 1 (non-zero and no overflow) : unchanged 2 (zero and overflow) : decreased by 1 3 (non-zero and overflow) : decreased by 1 if non-zero */ - FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); - FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); - FAIL_IF(push_inst(compiler, spm(flag_r))); + FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(tmp1))); return SLJIT_SUCCESS; } @@ -858,16 +870,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s16(v)) return push_inst(compiler, lghi(target, (sljit_s16)v)); - if ((sljit_uw)v == (v & 0x000000000000ffffU)) + if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0) return push_inst(compiler, llill(target, (sljit_u16)v)); - if ((sljit_uw)v == (v & 0x00000000ffff0000U)) + if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0) return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); - if ((sljit_uw)v == (v & 0x0000ffff00000000U)) + if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0) return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); - if ((sljit_uw)v == (v & 0xffff000000000000U)) + if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); /* 6 byte instructions (requires extended immediate facility) */ @@ -875,15 +887,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s32(v)) return push_inst(compiler, lgfi(target, (sljit_s32)v)); - if ((sljit_uw)v == (v & 0x00000000ffffffffU)) + if (((sljit_uw)v >> 32) == 0) return push_inst(compiler, llilf(target, (sljit_u32)v)); - if ((sljit_uw)v == (v & 0xffffffff00000000U)) - return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32))); + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); } + /* TODO(mundaym): instruction sequences that don't use extended immediates */ abort(); } @@ -891,7 +904,7 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t struct addr { sljit_gpr base; sljit_gpr index; - sljit_sw offset; + sljit_s32 offset; }; /* transform memory operand into D(X,B) form with a signed 20-bit offset */ @@ -911,7 +924,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -923,7 +936,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -944,7 +957,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -956,7 +969,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -1014,16 +1027,16 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler, sljit_gpr dst_r, sljit_s32 src, sljit_sw srcw) { - SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); if (src & SLJIT_IMM) return push_load_imm_inst(compiler, dst_r, srcw); if (src & SLJIT_MEM) - return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); sljit_gpr src_r = gpr(src & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); } static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, @@ -1035,8 +1048,8 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr src_r = tmp1; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1050,17 +1063,32 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (FAST_IS_REG(src2)) - src_r = gpr(src2 & REG_MASK); + src_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r))); if (needs_move != 2) return SLJIT_SUCCESS; dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); } static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, @@ -1068,21 +1096,21 @@ static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src1_r = tmp0; sljit_gpr src2_r = tmp1; if (FAST_IS_REG(src1)) - src1_r = gpr(src1 & REG_MASK); + src1_r = gpr(src1); else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (FAST_IS_REG(src2)) - src2_r = gpr(src2 & REG_MASK); + src2_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); + return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r)); } typedef enum { @@ -1099,8 +1127,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1110,8 +1138,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == RIL_A) - return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); - return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); + return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff)); } static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, @@ -1119,15 +1147,15 @@ static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; sljit_gpr src_r = tmp0; - if (!SLOW_IS_REG(src1)) + if (!FAST_IS_REG(src1)) FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); else src_r = gpr(src1 & REG_MASK); - return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16); } typedef enum { @@ -1147,7 +1175,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, SLJIT_ASSERT(src2 & SLJIT_MEM); - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = gpr(dst); if (dst == src1) @@ -1183,9 +1211,9 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, } if (type == RX_A) - ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w; else - ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w); FAIL_IF(push_inst(compiler, ins)); @@ -1193,7 +1221,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, return SLJIT_SUCCESS; dst_r = gpr(dst); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); } static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, @@ -1226,7 +1254,7 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, else dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); + return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw)); } struct ins_forms { @@ -1240,7 +1268,7 @@ struct ins_forms { }; static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1250,7 +1278,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct if ((src1 | src2) & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1297,7 +1325,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } } - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins = forms->op_r; ins_k = forms->op_rk; } @@ -1308,7 +1336,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct SLJIT_ASSERT(ins != 0 || ins_k != 0); - if (ins && SLOW_IS_REG(dst)) { + if (ins && FAST_IS_REG(dst)) { if (dst == src1) return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); @@ -1323,7 +1351,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1333,7 +1361,7 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str if (src2 & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1354,10 +1382,10 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); } - ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk; - if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) - return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + if (ins == 0 || (FAST_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); } @@ -1376,9 +1404,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_memory_fragment *buf; void *code, *code_ptr; sljit_uw *pool, *pool_ptr; - - sljit_uw source; - sljit_sw offset; /* TODO(carenas): only need 32 bit */ + sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); @@ -1489,38 +1515,41 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil ins &= ~sljit_ins_const; /* update instruction with relative address of constant */ - source = (sljit_uw)code_ptr; - offset = (sljit_uw)pool_ptr - source; + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; /* halfword (not byte) offset */ SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; /* update address */ const_->const_.addr = (sljit_uw)pool_ptr; /* store initial value into pool and update pool address */ - *(pool_ptr++) = const_->init_value; + *(pool_ptr++) = (sljit_uw)const_->init_value; /* move to next constant */ const_ = (struct sljit_s390x_const *)const_->const_.next; } if (jump && jump->addr == j) { - sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { jump->addr = (sljit_uw)pool_ptr; /* load address into tmp1 */ - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; SLJIT_ASSERT(is_s32(offset)); - encode_inst(&code_ptr, - lgrl(tmp1, offset & 0xffffffff)); + + encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); /* store jump target into pool and update pool address */ - *(pool_ptr++) = target; + *(pool_ptr++) = (sljit_uw)target; /* branch to tmp1 */ sljit_ins op = (ins >> 32) & 0xf; @@ -1538,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { jump->addr = (sljit_uw)code_ptr + 2; - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); offset = target - source; /* offset must be halfword aligned */ @@ -1552,14 +1581,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = jump->next; } if (put_label && put_label->addr == j) { - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_ASSERT(put_label->label); put_label->addr = (sljit_uw)code_ptr; /* store target into pool */ *pool_ptr = put_label->label->addr; - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; pool_ptr++; SLJIT_ASSERT(!(offset & 1)); @@ -1594,7 +1623,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CMOV: return have_lscond1() ? 1 : 0; case SLJIT_HAS_FPU: - return 0; + return 1; } return 0; } @@ -1607,36 +1636,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args = get_arg_count(arg_types); - sljit_sw frame_size; + sljit_s32 word_arg_count = 0; + sljit_s32 offset, i, tmp; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* saved registers go in callee allocated save area */ - compiler->local_size = (local_size + 0xf) & ~0xf; - frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE; + /* Saved registers are stored in callee allocated save area. */ + SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); - FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */ - if (frame_size != 0) { - if (is_s16(-frame_size)) - FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size)))); - else if (is_s32(-frame_size)) - FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size)))); - else { - FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); } + + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); } - if (args >= 1) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0)))); - if (args >= 2) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1)))); - if (args >= 3) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2)))); - SLJIT_ASSERT(args < 4); + local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + + arg_types >>= SLJIT_ARG_SHIFT; + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count)))); + tmp++; + } + word_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -1649,37 +1709,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* TODO(mundaym): stack space for saved floating point registers */ - compiler->local_size = (local_size + 0xf) & ~0xf; + compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_sw size; - sljit_gpr end; + sljit_s32 offset, i, tmp; + sljit_s32 local_size = compiler->local_size; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + if (is_u12(local_size)) + FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); + else + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } - size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw)); - if (!is_s20(size)) { - FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); - size = r6 * sizeof(sljit_sw); - end = r14; /* r15 has been restored already */ + if (saveds == 0) { + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } } - else - end = r15; - FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */ - FAIL_IF(push_inst(compiler, br(r14))); /* return */ + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, br(r14)); /* return */ +} + /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ @@ -1692,7 +1782,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); - op = GET_OPCODE(op) | (op & SLJIT_I32_OP); + op = GET_OPCODE(op) | (op & SLJIT_32); switch (op) { case SLJIT_BREAKPOINT: /* The following invalid instruction is emitted by gdb. */ @@ -1786,17 +1876,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) { - /* TODO(carenas): implement prefetch? */ - return SLJIT_SUCCESS; - } - if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { /* LOAD REGISTER */ if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { dst_r = gpr(dst); src_r = gpr(src); - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { /* 32-bit */ case SLJIT_MOV32_U8: ins = llcr(dst_r, src_r); @@ -1811,6 +1896,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lhr(dst_r, src_r); break; case SLJIT_MOV32: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lr(dst_r, src_r); break; /* 64-bit */ @@ -1834,11 +1921,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV: case SLJIT_MOV_P: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lgr(dst_r, src_r); break; default: ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1862,6 +1952,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_sw)((sljit_u32)(srcw)); break; case SLJIT_MOV_S32: + case SLJIT_MOV32: srcw = (sljit_sw)((sljit_s32)(srcw)); break; } @@ -1875,7 +1966,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); /* TODO(carenas): convert all calls below to LEVAL */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_MOV32_U8: ins = llc(reg, mem.offset, mem.index, mem.base); break; @@ -1914,7 +2005,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lg(reg, mem.offset, mem.index, mem.base); break; default: + ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1940,6 +2033,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile WHEN2(is_u12(mem.offset), sth, sthy)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: return push_inst(compiler, WHEN2(is_u12(mem.offset), st, sty)); case SLJIT_MOV_P: @@ -1972,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile EVAL(sthy, tmp0, mem)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: FAIL_IF(push_inst(compiler, EVAL(ly, tmp0, mem))); FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); @@ -1994,15 +2089,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ - dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; + dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32)); compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); /* TODO(mundaym): optimize loads and stores */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_NOT: /* emulate ~x with x^-1 */ FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); @@ -2014,7 +2109,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT32: /* emulate ~x with x^-1 */ if (have_eimm()) - FAIL_IF(push_inst(compiler, xilf(dst_r, -1))); + FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff))); else { FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); if (src_r != dst_r) @@ -2023,14 +2118,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); } break; - case SLJIT_NEG: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); - break; - case SLJIT_NEG32: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); - break; case SLJIT_CLZ: if (have_eimm()) { FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ @@ -2059,8 +2146,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(update_zero_overflow(compiler, op, dst_r)); /* TODO(carenas): doesn't need FAIL_IF */ - if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + if (dst & SLJIT_MEM) + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2084,20 +2171,6 @@ static SLJIT_INLINE int is_shift(sljit_s32 op) { return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; } -static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) -{ - switch (GET_FLAG_TYPE(op)) { - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - return 1; - } - return 0; -} - static const struct ins_forms add_forms = { 0x1a00, /* ar */ 0xb9080000, /* agr */ @@ -2131,24 +2204,24 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, if (src2 & SLJIT_IMM) { if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, src2w); } if (is_s16(src2w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); goto done; } if (!sets_overflow) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2157,22 +2230,22 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } } forms = sets_overflow ? &add_forms : &logical_add_forms; - FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_zero_overflow) - FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2202,78 +2275,85 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - int sets_signed = sets_signed_flag(op); + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); const struct ins_forms *forms; sljit_ins ins; - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; if (src2 & SLJIT_IMM) { if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) { - if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } if (is_s16(src2w)) - return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); } } else if (src2 & SLJIT_MEM) { - if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; else - ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; else - ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); } + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + if (src2 & SLJIT_IMM) { sljit_sw neg_src2w = -src2w; if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, neg_src2w); } if (is_s16(neg_src2w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); goto done; } } if (!sets_signed) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2282,19 +2362,19 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(neg_src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); goto done; } } forms = sets_signed ? &sub_forms : &logical_sub_forms; - FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_signed) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { /* In case of overflow, the sign bit of the two source operands must be different, and @@ -2303,14 +2383,14 @@ done: The -result operation sets the corrent sign, because the result cannot be zero. The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } else if (op & SLJIT_SET_Z) FAIL_IF(update_zero_overflow(compiler, op, dst_r)); } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2336,7 +2416,7 @@ static const struct ins_forms multiply_overflow_forms = { }; static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -2351,29 +2431,29 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 } FAIL_IF(push_inst(compiler, aih(tmp0, 1))); FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */ - return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); } if (src2 & SLJIT_IMM) { if (is_s16(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); } if (is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); } } - return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_uw imm, sljit_s32 count16) { @@ -2381,7 +2461,7 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { + if (IS_GPR_REG(dst)) { dst_r = gpr(dst & REG_MASK); if (dst == src1) needs_move = 0; @@ -2391,38 +2471,38 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == SLJIT_AND) { - if (!(mode & SLJIT_I32_OP)) - FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + if (!(mode & SLJIT_32)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff)); } else if (type == SLJIT_OR) { if (count16 >= 3) { - FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if (count16 >= 2) { if ((imm & 0x00000000ffffffffull) == 0) - return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)); if ((imm & 0xffffffff00000000ull) == 0) - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if ((imm & 0xffff000000000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48))); if ((imm & 0x0000ffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff))); if ((imm & 0x00000000ffff0000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff))); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff)); return SLJIT_SUCCESS; } if ((imm & 0xffffffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32))); if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) - return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff)); return SLJIT_SUCCESS; } @@ -2457,18 +2537,18 @@ static const struct ins_forms bitwise_xor_forms = { }; static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) imm &= 0xffffffffull; if ((imm & 0x000000000000ffffull) != 0 || imm == 0) @@ -2480,7 +2560,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o if ((imm & 0xffff000000000000ull) != 0) count16++; - if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { sljit_gpr src_r = tmp0; if (FAST_IS_REG(src1)) @@ -2489,16 +2569,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm); if ((imm & 0x00000000ffff0000ull) != 0) - return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16)); if ((imm & 0x0000ffff00000000ull) != 0) - return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); - return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); + return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48)); } if (!(op & SLJIT_SET_Z)) - return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); + return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16); } if (type == SLJIT_AND) @@ -2508,16 +2588,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o else forms = &bitwise_xor_forms; - return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src_r = tmp0; sljit_gpr base_r = tmp0; sljit_ins imm = 0; @@ -2529,7 +2609,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (src2 & SLJIT_IMM) - imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); else if (FAST_IS_REG(src2)) base_r = gpr(src2 & REG_MASK); else { @@ -2537,7 +2617,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, base_r = tmp1; } - if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if ((op & SLJIT_32) && dst_r == src_r) { if (type == SLJIT_SHL) ins = 0x89000000 /* sll */; else if (type == SLJIT_LSHR) @@ -2545,21 +2625,21 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, else ins = 0x8a000000 /* sra */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm)); } else { if (type == SLJIT_SHL) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; else if (type == SLJIT_LSHR) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; else - ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16))); } if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) - return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); return SLJIT_SUCCESS; } @@ -2590,20 +2670,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - compiler->mode = op & SLJIT_I32_OP; + compiler->mode = op & SLJIT_32; compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); - if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { src1 ^= src2; src2 ^= src1; @@ -2616,39 +2690,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_MUL: - FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: - FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_SHL: case SLJIT_LSHR: case SLJIT_ASHR: - FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w)); break; } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) @@ -2686,17 +2778,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return gpr(reg); + return (sljit_s32)gpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - abort(); + return (sljit_s32)fgpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { sljit_ins ins = 0; @@ -2711,21 +2803,254 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ +#define FLOAT_LOAD 0 +#define FLOAT_STORE 1 + +static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */; + else + ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */; + + return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */; + else + ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */; + + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins, + sljit_s32 reg, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + + if (!(src & SLJIT_MEM)) + return push_inst(compiler, ins_r | F4(reg) | F0(src)); + + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_ins ins; + + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + /* M3 is set to 5 */ + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */; + else + ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */; + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src))); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins ins; + + if (src & SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src = (sljit_s32)tmp0; + } + else if (src & SLJIT_MEM) { + FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32)); + src = (sljit_s32)tmp0; + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; + else + ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins_r, ins; + + if (src1 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (op & SLJIT_32) { + ins_r = 0xb3090000 /* cebr */; + ins = 0xed0000000009 /* ceb */; + } else { + ins_r = 0xb3190000 /* cdbr */; + ins = 0xed0000000019 /* cdb */; + } + + return emit_float(compiler, ins_r, ins, src1, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { + sljit_s32 dst_r; + sljit_ins ins; + CHECK_ERROR(); - abort(); + + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (op == SLJIT_CONV_F64_FROM_F32) + FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw)); + else { + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (FAST_IS_REG(dst)) { + if (dst == src) + return SLJIT_SUCCESS; + + ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + break; + } + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw); + case SLJIT_CONV_F64_FROM_F32: + /* Only SLJIT_CONV_F32_FROM_F64. */ + ins = 0xb3440000 /* ledbr */; + break; + case SLJIT_NEG_F64: + ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64); + ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */; + break; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(dst_r == TMP_FREG1); + + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); } +#define FLOAT_MOV(op, dst_r, src_r) \ + (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r)) + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_s32 dst_r = TMP_FREG1; + sljit_ins ins_r, ins; + CHECK_ERROR(); - abort(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + do { + if (FAST_IS_REG(dst)) { + dst_r = dst; + + if (dst == src1) + break; + + if (dst == src2) { + if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) { + src2 = src1; + src2w = src1w; + src1 = dst; + break; + } + + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2))); + src2 = TMP_FREG1; + } + } + + if (src1 & SLJIT_MEM) + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1))); + } while (0); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */; + ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */; + break; + case SLJIT_SUB_F64: + ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */; + ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */; + break; + case SLJIT_MUL_F64: + ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */; + ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64); + ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */; + ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */; + break; + } + + FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w)); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + SLJIT_ASSERT(dst_r != TMP_FREG1); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ @@ -2795,6 +3120,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2809,14 +3139,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); if (src & SLJIT_IMM) { SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } - else if (src & SLJIT_MEM) + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + } /* emit jump instruction */ if (type >= SLJIT_FAST_CALL) @@ -2832,6 +3163,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + SLJIT_ASSERT(gpr(TMP_REG2) == tmp1); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2859,11 +3208,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* dst is also source operand */ if (dst & SLJIT_MEM) - FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); break; + case SLJIT_MOV32: + op |= SLJIT_32; + /* fallthrough */ case SLJIT_MOV: - case (SLJIT_MOV32 & ~SLJIT_I32_OP): /* can write straight into destination */ loc_r = dst_r; break; @@ -2876,7 +3227,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (have_lscond2()) { FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lochi, locghi))); + WHEN2(op & SLJIT_32, lochi, locghi))); } else { /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ abort(); @@ -2888,22 +3239,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #define LEVAL(i) i(dst_r, loc_r) case SLJIT_AND: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, nr, ngr))); + WHEN2(op & SLJIT_32, nr, ngr))); break; case SLJIT_OR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, or, ogr))); + WHEN2(op & SLJIT_32, or, ogr))); break; case SLJIT_XOR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, xr, xgr))); + WHEN2(op & SLJIT_32, xr, xgr))); break; #undef LEVAL } /* store result to memory if required */ if (dst & SLJIT_MEM) - return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2913,7 +3264,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw) { sljit_u8 mask = get_cc(compiler, type & 0xff); - sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); + sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; CHECK_ERROR(); @@ -2927,7 +3278,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #define LEVAL(i) i(dst_r, src_r, mask) if (have_lscond1()) return push_inst(compiler, - WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr)); + WHEN2(dst_reg & SLJIT_32, locr, locgr)); #undef LEVAL @@ -2991,7 +3342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( |