diff options
Diffstat (limited to 'thirdparty/pcre2/src/sljit/sljitNativeARM_32.c')
-rw-r--r-- | thirdparty/pcre2/src/sljit/sljitNativeARM_32.c | 945 |
1 files changed, 616 insertions, 329 deletions
diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c index 74cf55fcd2..7b87f5907a 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c @@ -65,12 +65,17 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; -#define RM(rm) (reg_map[rm]) -#define RD(rd) (reg_map[rd] << 12) -#define RN(rn) (reg_map[rn] << 16) +#define RM(rm) ((sljit_uw)reg_map[rm]) +#define RM8(rm) ((sljit_uw)reg_map[rm] << 8) +#define RD(rd) ((sljit_uw)reg_map[rd] << 12) +#define RN(rn) ((sljit_uw)reg_map[rn] << 16) + +#define VM(rm) ((sljit_uw)freg_map[rm]) +#define VD(rd) ((sljit_uw)freg_map[rd] << 12) +#define VN(rn) ((sljit_uw)freg_map[rn] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -107,6 +112,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SBC 0xe0c00000 #define SMULL 0xe0c00090 #define SUB 0xe0400000 +#define TST 0xe1000000 #define UMULL 0xe0800090 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 @@ -115,12 +121,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +213,7 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u cpool_unique_ptr = compiler->cpool_unique; do { if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { - cpool_index = cpool_ptr - compiler->cpool; + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); break; } cpool_ptr++; @@ -293,7 +302,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { - diff = const_pool - last_pc_patch; + diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; /* Must be a load instruction with immediate offset. */ @@ -308,12 +317,12 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ SLJIT_ASSERT(diff >= 1); if (diff >= 2 || ind > 0) { - diff = (diff + ind - 2) << 2; + diff = (diff + (sljit_uw)ind - 2) << 2; SLJIT_ASSERT(diff <= 0xfff); - *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; } else - *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; } last_pc_patch++; } @@ -329,24 +338,24 @@ struct future_patch { static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { - sljit_s32 value; + sljit_u32 value; struct future_patch *curr_patch, *prev_patch; SLJIT_UNUSED_ARG(compiler); /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; else { curr_patch = *first_patch; prev_patch = NULL; while (1) { if (!curr_patch) { - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; break; } if ((sljit_uw)curr_patch->index == cpool_current_index) { - value = curr_patch->value; + value = (sljit_uw)curr_patch->value; if (prev_patch) prev_patch->next = curr_patch->next; else @@ -359,8 +368,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc } } - if (value >= 0) { - if ((sljit_uw)value > cpool_current_index) { + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); if (!curr_patch) { while (*first_patch) { @@ -371,8 +380,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc return SLJIT_ERR_ALLOC_FAILED; } curr_patch->next = *first_patch; - curr_patch->index = value; - curr_patch->value = cpool_start_address[value]; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_address[value]; *first_patch = curr_patch; } cpool_start_address[value] = *buf_ptr; @@ -395,8 +404,8 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { - FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); - return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); } #endif @@ -554,8 +563,9 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut } static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -658,7 +668,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_sw addr; + sljit_uw addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -737,7 +747,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Points after the current instruction. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; next_addr = compute_next_addr(label, jump, const_, put_label); @@ -770,7 +780,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* code_ptr can be affected above. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (code_ptr + 1) - code; + label->size = (sljit_uw)((code_ptr + 1) - code); label = label->next; } if (const_ && const_->addr == word_count) { @@ -799,8 +809,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); if (cpool_current_index > 0) { /* Unconditional branch. */ - *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = cpool_start_address + cpool_current_index; + *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); } cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; cpool_current_index = 0; @@ -822,7 +832,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_start_address = ALIGN_INSTRUCTION(code_ptr); cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); if (cpool_current_index > 0) - code_ptr = cpool_start_address + cpool_current_index; + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); buf_ptr = compiler->cpool; buf_end = buf_ptr + compiler->cpool_fill; @@ -845,15 +855,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr = (sljit_uw *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); if (!(jump->flags & JUMP_ADDR)) { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; } else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; } } else if (jump->flags & SLJIT_REWRITABLE_JUMP) { @@ -923,7 +933,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -972,6 +982,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALLOW_IMM 0x10 #define ALLOW_INV_IMM 0x20 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 /* s/l - store/load (1 bit) u/s - signed/unsigned (1 bit) @@ -999,7 +1010,7 @@ static const sljit_uw data_transfer_insts[16] = { }; #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ - (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg)) + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg)) /* Normal ldr/str instruction. Type2: ldrsb, ldrh, ldrsh */ @@ -1008,6 +1019,26 @@ static const sljit_uw data_transfer_insts[16] = { #define TYPE2_TRANSFER_IMM(imm) \ (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2)) + +/* Flags for emit_op: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define MOVE_REG_CONV 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1017,41 +1048,161 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, size, i, tmp; - sljit_uw push; + sljit_uw imm, offset; + sljit_s32 i, tmp, size, word_arg_count, saved_arg_count; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* Push saved registers, temporary registers - stmdb sp!, {..., lr} */ - push = PUSH | (1 << 14); + imm = 0; - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - push |= 1 << reg_map[i]; + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - push |= 1 << reg_map[i]; + imm |= (sljit_uw)1 << reg_map[i]; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); - FAIL_IF(push_inst(compiler, push)); + /* Push saved and temporary registers + multiple registers: stmdb sp!, {..., lr} + single register: str reg, [sp, #-4]! */ + if (imm != 0) + FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm)); + else + FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); /* Stack must be aligned to 8 bytes: */ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - local_size = ((size + local_size + 7) & ~7) - size; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; compiler->local_size = local_size; - if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - args = get_arg_count(arg_types); + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); + else + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } - if (args >= 1) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2))); + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst(compiler, *(--remap_ptr))); +#endif + + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); return SLJIT_SUCCESS; } @@ -1067,58 +1218,129 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((size + local_size + 7) & ~7) - size; + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) { - sljit_s32 i, tmp; - sljit_uw pop; + sljit_uw imm2 = get_imm(imm); - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + if (imm2 == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + imm2 = RM(TMP_REG2); + } - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list; - if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); - /* Push saved registers, temporary registers - ldmia sp!, {..., pc} */ - pop = POP | (1 << 15); + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - pop |= 1 << reg_map[i]; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) + lr_dst = 0; + + reg_list = 0; + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - pop |= 1 << reg_map[i]; + reg_list |= (sljit_uw)1 << reg_map[i]; + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + local_size += SSIZE_OF(sw); + + if (reg_list != 0) + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (reg_list == 0) + return SLJIT_SUCCESS; + + if (compiler->saveds > 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); + lr_dst = SLJIT_S0; + } else { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); + lr_dst = SLJIT_FIRST_SAVED_REG; + } + + return push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(lr_dst) | (sljit_uw)(frame_size - 2 * SSIZE_OF(sw))); + } - return push_inst(compiler, pop); + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + /* Pop saved and temporary registers + multiple registers: ldmia sp!, {...} + single register: ldr reg, [sp], #4 */ + if ((reg_list & (reg_list - 1)) == 0) { + SLJIT_ASSERT(lr_dst != 0); + SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]); + + return push_inst(compiler, 0xe49d0004 | RD(lr_dst)); + } + + FAIL_IF(push_inst(compiler, POP | reg_list)); + if (frame_size > 0) + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw))); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); } /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ -/* flags: */ - /* Arguments are swapped. */ -#define ARGS_SWAPPED 0x01 - /* Inverted immediate. */ -#define INV_IMM 0x02 - /* Source and destination is register. */ -#define MOVE_REG_CONV 0x04 - /* Unused return value. */ -#define UNUSED_RETURN 0x08 -/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ -#define SET_FLAGS (1 << 20) -/* dst: reg - src1: reg - src2: reg or imm (if allowed) - SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ -#define SRC2_IMM (1 << 25) - #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ if (compiler->shift_imm != 0x20) { \ @@ -1130,11 +1352,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \ return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \ } \ - return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \ - (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) \ + | RM8((flags & ARGS_SWAPPED) ? src1 : src2) | (sljit_uw)(opcode << 5) \ + | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) + sljit_uw dst, sljit_uw src1, sljit_uw src2) { switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1184,9 +1407,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_NOT: - if (src2 & SRC2_IMM) { + if (src2 & SRC2_IMM) return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); - } + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); case SLJIT_CLZ: @@ -1197,9 +1420,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1209,10 +1431,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1227,14 +1449,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl compiler->status_flags_state = 0; if (!HAS_FLAGS(op)) - return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); + return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); - FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); + FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); /* cmp TMP_REG1, dst asr #31. */ return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); case SLJIT_AND: + if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) + return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1266,7 +1490,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl Returns with 0 if not possible. */ static sljit_uw get_imm(sljit_uw imm) { - sljit_s32 rol; + sljit_u32 rol; if (imm <= 0xff) return SRC2_IMM | imm; @@ -1307,7 +1531,7 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl sljit_uw mask; sljit_uw imm1; sljit_uw imm2; - sljit_s32 rol; + sljit_uw rol; /* Step1: Search a zero byte (8 continous zero bit). */ mask = 0xff000000; @@ -1418,7 +1642,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw tmp; #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - if (!(imm & ~0xffff)) + if (!(imm & ~(sljit_uw)0xffff)) return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); #endif @@ -1455,13 +1679,13 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit SLJIT_ASSERT (arg & SLJIT_MEM); SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); - if ((arg & REG_MASK) == SLJIT_UNUSED) { + if (!(arg & REG_MASK)) { if (is_type1_transfer) { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xfff)); argw &= 0xfff; } else { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xff)); argw &= 0xff; } @@ -1475,20 +1699,20 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit argw &= 0x3; if (argw != 0 && !is_type1_transfer) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } /* Bit 25: RM is offset. */ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); + RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | ((sljit_uw)argw << 7))); } arg &= REG_MASK; if (is_type1_transfer) { if (argw > 0xfff) { - imm = get_imm(argw & ~0xfff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xfff; @@ -1496,7 +1720,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xfff) { - imm = get_imm(-argw & ~0xfff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xfff); @@ -1512,7 +1736,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } else { if (argw > 0xff) { - imm = get_imm(argw & ~0xff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xff; @@ -1520,7 +1744,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - imm = get_imm(-argw & ~0xff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xff); @@ -1537,7 +1761,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); } @@ -1554,50 +1778,62 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* We prefers register and simple consts. */ sljit_s32 dst_reg; sljit_s32 src1_reg; - sljit_s32 src2_reg; + sljit_s32 src2_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; - /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + if (dst == TMP_REG2) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); - src2_reg = 0; + if (inp_flags & ALLOW_NEG_IMM) { + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } do { if (!(inp_flags & ALLOW_IMM)) break; if (src2 & SLJIT_IMM) { - src2_reg = get_imm(src2w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); if (src2_reg) break; if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src2w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); if (src2_reg) { flags |= INV_IMM; break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src2w); + if (neg_op != 0) { + src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); if (src2_reg) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB) { - src2_reg = get_imm(-src2w); - if (src2_reg) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } } if (src1 & SLJIT_IMM) { - src2_reg = get_imm(src1w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED; src1 = src2; @@ -1605,7 +1841,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src1w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED | INV_IMM; src1 = src2; @@ -1613,13 +1849,13 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src1w); + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); if (src2_reg) { - /* Note: add is commutative operation. */ src1 = src2; src1w = src2w; - op = SLJIT_SUB | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } @@ -1634,12 +1870,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_reg = TMP_REG1; } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); src1_reg = TMP_REG1; } /* Destination. */ - dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op <= SLJIT_MOV_P) { if (dst & SLJIT_MEM) { @@ -1663,10 +1899,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src2 & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); else - FAIL_IF(load_immediate(compiler, src2_reg, src2w)); + FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); } - FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; @@ -1691,7 +1927,7 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_sw saved_reg_list[3]; + sljit_uw saved_reg_list[3]; sljit_sw saved_reg_count; CHECK_ERROR(); @@ -1708,10 +1944,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 16) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 8) - | reg_map[SLJIT_R1]); + | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1)); case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: @@ -1742,7 +1975,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1756,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile SLJIT_ASSERT(saved_reg_list[1] < 8); FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); } - return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) + return push_inst(compiler, 0xe49d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; @@ -1781,6 +2014,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1799,13 +2033,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); } @@ -1819,19 +2046,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: case SLJIT_SUB: case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: case SLJIT_XOR: return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); @@ -1858,6 +2084,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1905,8 +2145,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1917,23 +2158,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ - #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs)) -#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16)) + ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs)) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7))); arg = TMP_REG2; argw = 0; } @@ -1945,12 +2183,12 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, if (!(-argw & ~0x3fc)) return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); - imm = get_imm(argw & ~0x3fc); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } - imm = get_imm(-argw & ~0x3fc); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm) { argw = -argw; FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); @@ -1959,11 +2197,11 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, } if (arg) { - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); } else - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); } @@ -1972,17 +2210,17 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0))); if (FAST_IS_REG(dst)) - return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16)); + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1)); /* Store the integer value from a VFP register. */ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); @@ -1994,23 +2232,23 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -2018,19 +2256,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); return push_inst(compiler, VMRS); } @@ -2042,16 +2280,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; } @@ -2059,25 +2297,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0))); + op ^= SLJIT_32; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -2094,40 +2332,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); break; } if (dst_r == TMP_FREG1) - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; } @@ -2169,10 +2407,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x10000000; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x30000000; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x20000000; @@ -2198,15 +2446,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x10000000; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x00000000; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x70000000; @@ -2277,111 +2527,124 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #ifdef __SOFTFP__ -static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; - sljit_s32 word_arg_offset = 0; - sljit_s32 float_arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 float_arg_count = 0; sljit_s32 types = 0; - sljit_s32 src_offset = 4 * sizeof(sljit_sw); sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f32); - arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f64); - arg_count++; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); word_arg_offset += sizeof(sljit_sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7))); + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset)); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } /* Process arguments in reversed direction. */ while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: float_arg_count--; - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } - FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; - case SLJIT_ARG_TYPE_F64: - arg_count--; + case SLJIT_ARG_TYPE_F32: float_arg_count--; - stack_offset = offsets[arg_count]; - - SLJIT_ASSERT((stack_offset & 0x7) == 0); + offset = *(--offset_ptr); - if (stack_offset < 16) { - if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } - FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; default: - arg_count--; word_arg_offset -= sizeof(sljit_sw); - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT(stack_offset >= word_arg_offset); + SLJIT_ASSERT(offset >= word_arg_offset); - if (stack_offset != word_arg_offset) { - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } else if (src_offset == word_arg_offset) { - *src = 1 + (stack_offset >> 2); - src_offset = stack_offset; + *src = (sljit_s32)(SLJIT_R0 + (offset >> 2)); + src_offset = offset; } - FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2))); + FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); } else - FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16))); + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); } break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2389,83 +2652,51 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_s32 stack_size = 0; - - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) - FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_size & 0x7) - stack_size += sizeof(sljit_sw); - stack_size += sizeof(sljit_f64); - break; - default: - stack_size += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - if (stack_size <= 16) - return SLJIT_SUCCESS; - - return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7)); + return SLJIT_SUCCESS; } #else /* !__SOFTFP__ */ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_u32 remap = 0; - sljit_u32 offset = 0; - sljit_u32 new_offset, mask; + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { - new_offset = 0; - mask = 1; - - while (remap & mask) { - new_offset++; - mask <<= 1; - } - remap |= mask; - + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (offset != new_offset) FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, - 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0))); + SLJIT_32, new_offset, offset, 0))); - offset += 2; - } - else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { - new_offset = 0; - mask = 3; - - while (remap & mask) { - new_offset += 2; - mask <<= 2; + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0x400000, f32_offset, offset, 0))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, new_offset, offset, 0))); + f32_offset = new_offset; + new_offset++; } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0))); - - offset += 2; + offset++; + break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2480,13 +2711,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile { #ifdef __SOFTFP__ struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; #endif CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2496,9 +2732,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); return jump; #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2535,7 +2790,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) @@ -2555,16 +2810,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#ifdef __SOFTFP__ if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2573,8 +2841,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); return softfloat_post_call_with_args(compiler, arg_types); #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2636,27 +2921,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; cc = get_cc(compiler, type & 0xff); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - tmp = get_imm(srcw); + tmp = get_imm((sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); - tmp = get_imm(~srcw); + tmp = get_imm(~(sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - tmp = (sljit_uw) srcw; + tmp = (sljit_uw)srcw; FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); if (tmp <= 0xffff) return SLJIT_SUCCESS; return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); #else - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); src = TMP_REG1; #endif } @@ -2680,6 +2965,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -2731,7 +3017,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { memw &= 0x3; - inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7)); + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7)); if (is_type1_transfer) inst |= (1 << 25); @@ -2757,7 +3043,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | memw); + return push_inst(compiler, inst | (sljit_uw)memw); } if (memw >= 0) @@ -2765,7 +3051,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw)); + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2777,10 +3063,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value)); compiler->patches++; #else PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); @@ -2804,7 +3091,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); @@ -2829,5 +3116,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, executable_offset, new_constant, 1); + inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); } |