diff options
Diffstat (limited to 'thirdparty/pcre2/src/sljit')
21 files changed, 5827 insertions, 3623 deletions
diff --git a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h index 7bb9990a59..55e4e39f13 100644 --- a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -60,7 +60,7 @@ extern "C" { SLJIT_LITTLE_ENDIAN : little endian architecture SLJIT_BIG_ENDIAN : big endian architecture SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information Constants: SLJIT_NUMBER_OF_REGISTERS : number of available registers @@ -148,7 +148,7 @@ extern "C" { #endif #elif defined (__aarch64__) #define SLJIT_CONFIG_ARM_64 1 -#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__)) +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) #define SLJIT_CONFIG_PPC_64 1 #elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) #define SLJIT_CONFIG_PPC_32 1 @@ -156,7 +156,7 @@ extern "C" { #define SLJIT_CONFIG_MIPS_32 1 #elif defined(__mips64) #define SLJIT_CONFIG_MIPS_64 1 -#elif defined(__sparc__) || defined(__sparc) +#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64) #define SLJIT_CONFIG_SPARC_32 1 #elif defined(__s390x__) #define SLJIT_CONFIG_S390X 1 @@ -274,9 +274,13 @@ extern "C" { #ifndef SLJIT_INLINE /* Inline functions. Some old compilers do not support them. */ -#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 #define SLJIT_INLINE #else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else #define SLJIT_INLINE __inline #endif #endif /* !SLJIT_INLINE */ @@ -319,18 +323,36 @@ extern "C" { /* Instruction cache flush. */ /****************************/ +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware APPLE is known to have removed the code in iOS so + * it will need to be excempted or result in broken builds + */ #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) -#if __has_builtin(__builtin___clear_cache) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power and sparc are broken + */ +#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32) #define SLJIT_CACHE_FLUSH(from, to) \ __builtin___clear_cache((char*)(from), (char*)(to)) +#endif -#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* gcc >= 10 */ #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -340,9 +362,9 @@ extern "C" { /* Supported by all macs since Mac OS 10.5. However, it does not work on non-jailbroken iOS devices, although the compilation is successful. */ - +#include <libkern/OSCacheControl.h> #define SLJIT_CACHE_FLUSH(from, to) \ - sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -351,33 +373,33 @@ extern "C" { ppc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ #define SLJIT_CACHE_FLUSH(from, to) \ - __builtin___clear_cache((char*)(from), (char*)(to)) - -#elif defined __ANDROID__ + sparc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -/* Android lacks __clear_cache; instead, cacheflush should be used. */ +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) #define SLJIT_CACHE_FLUSH(from, to) \ - cacheflush((long)(from), (long)(to), 0) + __builtin___clear_cache((char*)(from), (char*)(to)) -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#elif defined __ANDROID__ -/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include <sys/cachectl.h> #define SLJIT_CACHE_FLUSH(from, to) \ - sparc_cache_flush((from), (to)) -#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + cacheflush((long)(from), (long)(to), 0) #elif defined _WIN32 #define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) #else -/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ #define SLJIT_CACHE_FLUSH(from, to) \ __clear_cache((char*)(from), (char*)(to)) @@ -645,18 +667,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #ifndef _WIN64 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #else /* _WIN64 */ #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 -#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) #endif /* !_WIN64 */ #define SLJIT_PREF_SHIFT_REG SLJIT_R3 @@ -664,31 +691,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) #define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 -#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #define SLJIT_NUMBER_OF_REGISTERS 23 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) -#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) /* Add +1 for double alignment. */ -#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw)) #else -#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw)) #endif /* SLJIT_CONFIG_PPC_64 || _AIX */ #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -696,19 +731,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 21 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6 #else #define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #endif #elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) #define SLJIT_NUMBER_OF_REGISTERS 18 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* saved registers (16), return struct pointer (1), space for 6 argument words (1), 4th double arg (2), double alignment (1). */ -#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw)) #endif #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -736,12 +777,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #define SLJIT_NUMBER_OF_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #endif @@ -751,13 +796,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) -#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 -#else -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 -#endif - #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) @@ -765,8 +803,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); /* CPU status flags management. */ /********************************/ -#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ - || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) diff --git a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c index 6e5bf78e45..6359848cd5 100644 --- a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000u /* alloc_chunk / free_chunk : @@ -112,7 +112,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) static SLJIT_INLINE int get_map_jit_flag() { - sljit_sw page_size; + size_t page_size; void *ptr; struct utsname name; static int map_jit_flag = -1; @@ -139,8 +139,9 @@ static SLJIT_INLINE int get_map_jit_flag() #endif /* MAP_ANON */ #else /* !SLJIT_CONFIG_X86 */ #if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) -#error Unsupported architecture +#error "Unsupported architecture" #endif /* SLJIT_CONFIG_ARM */ +#include <AvailabilityMacros.h> #include <pthread.h> #define SLJIT_MAP_JIT (MAP_JIT) @@ -149,7 +150,14 @@ static SLJIT_INLINE int get_map_jit_flag() static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) { +#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 pthread_jit_write_protect_np(enable_exec); +#elif defined(__clang__) + if (__builtin_available(macOS 11.0, *)) + pthread_jit_write_protect_np(enable_exec); +#else +#error "Must target Big Sur or newer" +#endif /* BigSur */ } #endif /* SLJIT_CONFIG_X86 */ #else /* !TARGET_OS_OSX */ @@ -187,10 +195,13 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) if (retval == MAP_FAILED) return NULL; +#ifdef __FreeBSD__ + /* HardenedBSD's mmap lies, so check permissions again */ if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { munmap(retval, size); return NULL; } +#endif /* FreeBSD */ SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); @@ -227,7 +238,7 @@ struct free_block { #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size; diff --git a/thirdparty/pcre2/src/sljit/sljitLir.c b/thirdparty/pcre2/src/sljit/sljitLir.c index a24a99ab87..313a061dd3 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.c +++ b/thirdparty/pcre2/src/sljit/sljitLir.c @@ -90,26 +90,28 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type)) + #define VARIABLE_FLAG_SHIFT (10) #define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) #define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define HAS_FLAGS(op) \ ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) -#else +#else /* !SLJIT_64BIT_ARCHITECTURE */ #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ #define BUF_SIZE 4096 @@ -126,11 +128,10 @@ #define TO_OFFS_REG(reg) ((reg) << 8) /* When reg cannot be unused. */ #define FAST_IS_REG(reg) ((reg) <= REG_MASK) -/* When reg can be unused. */ -#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) /* Mask for argument types. */ -#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1) +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) /* Jump flags. */ #define JUMP_LABEL 0x1 @@ -247,8 +248,11 @@ #define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ - (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ - extra) * sizeof(sljit_sw)) + (saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw)) + +#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, size) \ + (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ + (fsaveds)) * (sljit_s32)(size)) #define ADJUST_LOCAL_OFFSET(p, i) \ if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ @@ -379,9 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), invalid_integer_types); - SLJIT_COMPILE_ASSERT(SLJIT_I32_OP == SLJIT_F32_OP, - int_op_and_single_op_must_be_the_same); - SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), conditional_flags_must_be_even_numbers); @@ -415,7 +417,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->args = -1; + compiler->args_size = -1; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -439,6 +441,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->delay_slot = UNMOVABLE_INS; #endif +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->last_flags = 0; + compiler->last_return = -1; + compiler->logical_local_size = 0; +#endif + #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { init_compiler(); @@ -488,7 +497,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_d SLJIT_UNUSED_ARG(exec_allocator_data); /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1), exec_allocator_data); + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); } #elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) @@ -511,7 +520,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_d SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { - jump->flags &= ~JUMP_ADDR; + jump->flags &= (sljit_uw)~JUMP_ADDR; jump->flags |= JUMP_LABEL; jump->u.label = label; } @@ -520,7 +529,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { if (SLJIT_LIKELY(!!jump)) { - jump->flags &= ~JUMP_LABEL; + jump->flags &= (sljit_uw)~JUMP_LABEL; jump->flags |= JUMP_ADDR; jump->u.target = target; } @@ -533,7 +542,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la } #define SLJIT_CURRENT_FLAGS_ALL \ - (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { @@ -547,7 +556,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = 0; if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { - compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); } #endif } @@ -607,7 +616,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compile return NULL; size = (size + 3) & ~3; #endif - return ensure_abuf(compiler, size); + return ensure_abuf(compiler, (sljit_uw)size); } static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) @@ -626,20 +635,6 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } -static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types) -{ - sljit_s32 arg_count = 0; - - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - arg_count++; - arg_types >>= SLJIT_DEF_SHIFT; - } - - return arg_count; -} - - /* Only used in RISC architectures where the instruction size is constant */ #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -679,6 +674,7 @@ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -696,6 +692,7 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -711,7 +708,7 @@ static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compi compiler->last_label = label; } -static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_s32 flags) +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_u32 flags) { jump->next = NULL; jump->flags = flags; @@ -751,6 +748,58 @@ static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches) +{ + sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type; + + curr_type = (arg_types & SLJIT_ARG_FULL_MASK); + + if (curr_type >= SLJIT_ARG_TYPE_F64) { + if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) + return 0; + } else if (curr_type >= SLJIT_ARG_TYPE_W) { + if (scratches == 0) + return 0; + } + + arg_types >>= SLJIT_ARG_SHIFT; + + word_arg_count = 0; + scratch_arg_end = 0; + saved_arg_count = 0; + float_arg_count = 0; + while (arg_types != 0) { + if (word_arg_count + float_arg_count >= 4) + return 0; + + curr_type = (arg_types & SLJIT_ARG_MASK); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P) + return 0; + + word_arg_count++; + scratch_arg_end = word_arg_count; + } else { + if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + return 0; + + if (curr_type < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + saved_arg_count++; + } else + float_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (saveds == -1) + return (word_arg_count <= scratches && float_arg_count <= fscratches); + + return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches); +} + #define FUNCTION_CHECK_IS_REG(r) \ (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) @@ -773,14 +822,14 @@ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s if (!(p & SLJIT_MEM)) return 0; - if (!((p & REG_MASK) == SLJIT_UNUSED || FUNCTION_CHECK_IS_REG(p & REG_MASK))) + if (!(!(p & REG_MASK) || FUNCTION_CHECK_IS_REG(p & REG_MASK))) return 0; if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK)) return 0; if (p & OFFS_REG_MASK) { - if ((p & REG_MASK) == SLJIT_UNUSED) + if (!(p & REG_MASK)) return 0; if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p)))) @@ -819,12 +868,12 @@ static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p #define FUNCTION_CHECK_SRC(p, i) \ CHECK_ARGUMENT(function_check_src(compiler, p, i)); -static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 unused) +static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { if (compiler->scratches == -1 || compiler->saveds == -1) return 0; - if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) + if (FUNCTION_CHECK_IS_REG(p)) return (i == 0); if (p == SLJIT_MEM1(SLJIT_SP)) @@ -833,8 +882,8 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p return function_check_src_mem(compiler, p, i); } -#define FUNCTION_CHECK_DST(p, i, unused) \ - CHECK_ARGUMENT(function_check_dst(compiler, p, i, unused)); +#define FUNCTION_CHECK_DST(p, i) \ + CHECK_ARGUMENT(function_check_dst(compiler, p, i)); static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { @@ -910,10 +959,8 @@ static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sl } else fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); - } else if (p) + } else sljit_verbose_reg(compiler, p); - else - fprintf(compiler->verbose, "unused"); } static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) @@ -940,63 +987,61 @@ static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, s } static const char* op0_names[] = { - (char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw", - (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s", - (char*)"endbr", (char*)"skip_frames_before_return" + "breakpoint", "nop", "lmul.uw", "lmul.sw", + "divmod.u", "divmod.s", "div.u", "div.s", + "endbr", "skip_frames_before_return" }; static const char* op1_names[] = { - (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", - (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", - (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", - (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", - (char*)"not", (char*)"neg", (char*)"clz", + "", ".u8", ".s8", ".u16", + ".s16", ".u32", ".s32", "32", + ".p", "not", "clz", }; static const char* op2_names[] = { - (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", - (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", - (char*)"shl", (char*)"lshr", (char*)"ashr", + "add", "addc", "sub", "subc", + "mul", "and", "or", "xor", + "shl", "lshr", "ashr", }; static const char* op_src_names[] = { - (char*)"fast_return", (char*)"skip_frames_before_fast_return", - (char*)"prefetch_l1", (char*)"prefetch_l2", - (char*)"prefetch_l3", (char*)"prefetch_once", + "fast_return", "skip_frames_before_fast_return", + "prefetch_l1", "prefetch_l2", + "prefetch_l3", "prefetch_once", }; static const char* fop1_names[] = { - (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", - (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", - (char*)"abs", + "mov", "conv", "conv", "conv", + "conv", "conv", "cmp", "neg", + "abs", }; static const char* fop2_names[] = { - (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" + "add", "sub", "mul", "div" }; #define JUMP_POSTFIX(type) \ - ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ - : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) - -static char* jump_names[] = { - (char*)"equal", (char*)"not_equal", - (char*)"less", (char*)"greater_equal", - (char*)"greater", (char*)"less_equal", - (char*)"sig_less", (char*)"sig_greater_equal", - (char*)"sig_greater", (char*)"sig_less_equal", - (char*)"overflow", (char*)"not_overflow", - (char*)"carry", (char*)"", - (char*)"equal", (char*)"not_equal", - (char*)"less", (char*)"greater_equal", - (char*)"greater", (char*)"less_equal", - (char*)"unordered", (char*)"ordered", - (char*)"jump", (char*)"fast_call", - (char*)"call", (char*)"call.cdecl" + ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_32) ? "32" : "") \ + : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_32) ? ".f32" : ".f64") : "")) + +static const char* jump_names[] = { + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "sig_less", "sig_greater_equal", + "sig_greater", "sig_less_equal", + "overflow", "not_overflow", + "carry", "", + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "unordered", "ordered", + "jump", "fast_call", + "call", "call.cdecl" }; -static char* call_arg_names[] = { - (char*)"void", (char*)"sw", (char*)"uw", (char*)"s32", (char*)"u32", (char*)"f32", (char*)"f64" +static const char* call_arg_names[] = { + "void", "w", "32", "p", "f64", "f32" }; #endif /* SLJIT_VERBOSE */ @@ -1032,48 +1077,40 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 types, arg_count, curr_type; -#endif - SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - CHECK_ARGUMENT((arg_types & SLJIT_DEF_MASK) == 0); - - types = (arg_types >> SLJIT_DEF_SHIFT); - arg_count = 0; - while (types != 0 && arg_count < 3) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); - arg_count++; - types >>= SLJIT_DEF_SHIFT; - } - CHECK_ARGUMENT(arg_count <= saveds && types == 0); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " enter options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); - - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - if (arg_types) - fprintf(compiler->verbose, ","); + fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); } - fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1084,74 +1121,94 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 types, arg_count, curr_type; -#endif - SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - - types = (arg_types >> SLJIT_DEF_SHIFT); - arg_count = 0; - while (types != 0 && arg_count < 3) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); - arg_count++; - types >>= SLJIT_DEF_SHIFT; - } - CHECK_ARGUMENT(arg_count <= saveds && types == 0); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " set_context options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); - - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - if (arg_types) - fprintf(compiler->verbose, ","); + fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); } - fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_compiler *compiler) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_void\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(compiler->scratches >= 0); - if (op != SLJIT_UNUSED) { - CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); - FUNCTION_CHECK_SRC(src, srcw); + + switch (compiler->last_return) { + case SLJIT_ARG_TYPE_W: + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_S32); + break; + case SLJIT_ARG_TYPE_32: + CHECK_ARGUMENT(op == SLJIT_MOV32 || (op >= SLJIT_MOV32_U8 && op <= SLJIT_MOV32_S16)); + break; + case SLJIT_ARG_TYPE_P: + CHECK_ARGUMENT(op == SLJIT_MOV_P); + break; + default: + /* Context not initialized, void, etc. */ + CHECK_ARGUMENT(0); + break; } - else - CHECK_ARGUMENT(src == 0 && srcw == 0); + FUNCTION_CHECK_SRC(src, srcw); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - if (op == SLJIT_UNUSED) - fprintf(compiler->verbose, " return\n"); - else { - fprintf(compiler->verbose, " return%s ", op1_names[op - SLJIT_OP1_BASE]); - sljit_verbose_param(compiler, src, srcw); - fprintf(compiler->verbose, "\n"); - } + fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); } #endif CHECK_RETURN_OK; @@ -1160,7 +1217,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1177,7 +1234,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) - || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW) + || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) @@ -1188,7 +1245,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler { fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) { - fprintf(compiler->verbose, (op & SLJIT_I32_OP) ? "32" : "w"); + fprintf(compiler->verbose, (op & SLJIT_32) ? "32" : "w"); } fprintf(compiler->verbose, "\n"); } @@ -1210,43 +1267,39 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler switch (GET_OPCODE(op)) { case SLJIT_NOT: - /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */ + /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); break; - case SLJIT_NEG: - CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) - || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); - break; case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_P: /* Nothing allowed */ - CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; default: - /* Only SLJIT_I32_OP is allowed. */ + /* Only SLJIT_32 is allowed. */ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; } - FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + FUNCTION_CHECK_DST(dst, dstw); FUNCTION_CHECK_SRC(src, srcw); if (GET_OPCODE(op) >= SLJIT_NOT) { CHECK_ARGUMENT(src != SLJIT_IMM); - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) <= SLJIT_MOV_P) { - fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_I32_OP) ? "" : "32", - (op != SLJIT_MOV32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ""); + fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); } else { - fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_32) ? "" : "32", !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); } @@ -1260,7 +1313,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1302,24 +1355,31 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); - CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32)); break; default: SLJIT_UNREACHABLE(); break; } - FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + if (unset) { + CHECK_ARGUMENT(HAS_FLAGS(op)); + } else { + FUNCTION_CHECK_DST(dst, dstw); + } FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); - sljit_verbose_param(compiler, dst, dstw); + if (unset) + fprintf(compiler->verbose, "unset"); + else + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); @@ -1376,10 +1436,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - int i; + sljit_u32 i; #endif SLJIT_UNUSED_ARG(compiler); @@ -1431,10 +1491,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32.from.f64" : ".f64.from.f32"); + (op & SLJIT_32) ? ".f32.from.f64" : ".f64.from.f32"); else fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1450,7 +1510,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif if (SLJIT_UNLIKELY(compiler->skip_checks)) { @@ -1469,7 +1529,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); if (op & VARIABLE_FLAG_MASK) { fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]); } @@ -1497,13 +1557,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw); - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", - (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src, srcw); @@ -1532,7 +1592,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32" : ".f64", + (op & SLJIT_32) ? ".f32" : ".f64", (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1558,7 +1618,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src1, src1w); @@ -1598,15 +1658,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); - CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32)); if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); - else + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY); + compiler->last_flags = 0; + } else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); } @@ -1623,49 +1685,27 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile sljit_s32 arg_types) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 i, types, curr_type, scratches, fscratches; - - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); - types = arg_types; - scratches = 0; - fscratches = 0; - for (i = 0; i < 5; i++) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); - if (i > 0) { - if (curr_type == 0) { - break; - } - if (curr_type >= SLJIT_ARG_TYPE_F32) - fscratches++; - else - scratches++; - } else { - if (curr_type >= SLJIT_ARG_TYPE_F32) { - CHECK_ARGUMENT(compiler->fscratches > 0); - } else if (curr_type >= SLJIT_ARG_TYPE_SW) { - CHECK_ARGUMENT(compiler->scratches > 0); - } - } - types >>= SLJIT_DEF_SHIFT; + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); } - CHECK_ARGUMENT(compiler->scratches >= scratches); - CHECK_ARGUMENT(compiler->fscratches >= fscratches); - CHECK_ARGUMENT(types == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ret[%s", jump_names[type & 0xff], - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + fprintf(compiler->verbose, " %s%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); } while (arg_types); @@ -1681,7 +1721,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); @@ -1690,7 +1730,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " cmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_I32_OP) ? "32" : ""); + jump_names[type & 0xff], (type & SLJIT_32) ? "32" : ""); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src2, src2w); @@ -1706,7 +1746,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); @@ -1715,7 +1755,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fcmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_F32_OP) ? ".f32" : ".f64"); + jump_names[type & 0xff], (type & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src2, src2w); @@ -1752,49 +1792,27 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 i, types, curr_type, scratches, fscratches; - - CHECK_ARGUMENT(type == SLJIT_CALL || type == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); FUNCTION_CHECK_SRC(src, srcw); - types = arg_types; - scratches = 0; - fscratches = 0; - for (i = 0; i < 5; i++) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); - if (i > 0) { - if (curr_type == 0) { - break; - } - if (curr_type >= SLJIT_ARG_TYPE_F32) - fscratches++; - else - scratches++; - } else { - if (curr_type >= SLJIT_ARG_TYPE_F32) { - CHECK_ARGUMENT(compiler->fscratches > 0); - } else if (curr_type >= SLJIT_ARG_TYPE_SW) { - CHECK_ARGUMENT(compiler->scratches > 0); - } - } - types >>= SLJIT_DEF_SHIFT; + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); } - CHECK_ARGUMENT(compiler->scratches >= scratches); - CHECK_ARGUMENT(compiler->fscratches >= fscratches); - CHECK_ARGUMENT(types == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); } while (arg_types); @@ -1812,9 +1830,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com sljit_s32 type) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); @@ -1823,19 +1840,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); if (GET_OPCODE(op) >= SLJIT_ADD) - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " flags%s %s%s, ", !(op & SLJIT_SET_Z) ? "" : ".z", GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], - GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : "")); + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : "")); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); } @@ -1848,11 +1866,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_32)); if (src != SLJIT_IMM) { CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); CHECK_ARGUMENT(srcw == 0); @@ -1867,9 +1885,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " cmov%s %s%s, ", - !(dst_reg & SLJIT_I32_OP) ? "" : "32", + !(dst_reg & SLJIT_32) ? "" : "32", jump_names[type & 0xff], JUMP_POSTFIX(type)); - sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_32); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); @@ -1884,15 +1902,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); - CHECK_ARGUMENT(!(type & SLJIT_I32_OP) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); + CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); - CHECK_ARGUMENT((mem & REG_MASK) != SLJIT_UNUSED && (mem & REG_MASK) != reg); + CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1900,7 +1918,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler fprintf(compiler->verbose, " //"); fprintf(compiler->verbose, " mem%s.%s%s%s ", - !(type & SLJIT_I32_OP) ? "" : "32", + !(type & SLJIT_32) ? "" : "32", (type & SLJIT_MEM_STORE) ? "st" : "ld", op1_names[(type & 0xff) - SLJIT_OP1_BASE], (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); @@ -1921,7 +1939,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); @@ -1933,7 +1951,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile fprintf(compiler->verbose, " fmem.%s%s%s ", (type & SLJIT_MEM_STORE) ? "st" : "ld", - !(type & SLJIT_I32_OP) ? ".f64" : ".f32", + !(type & SLJIT_32) ? ".f64" : ".f32", (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); sljit_verbose_freg(compiler, freg); fprintf(compiler->verbose, ", "); @@ -1950,7 +1968,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co SLJIT_UNUSED_ARG(offset); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1967,7 +1985,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil SLJIT_UNUSED_ARG(init_value); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1982,7 +2000,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2023,10 +2041,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - /* Return if don't need to do anything. */ - if (op == SLJIT_UNUSED) - return SLJIT_SUCCESS; - #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) @@ -2043,6 +2057,24 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); } +#if !(defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->skip_checks = 1; +#endif + return sljit_emit_return_void(compiler); +} + +#endif + #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ @@ -2054,7 +2086,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com { struct sljit_label *label; struct sljit_jump *jump; - sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV; + sljit_s32 op = (dst_reg & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2067,7 +2099,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw)); + FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_32, 0, src, srcw)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2183,7 +2215,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler break; } - type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP)); + type = condition | (type & (SLJIT_32 | SLJIT_REWRITABLE_JUMP)); tmp_src = src1; src1 = src2; src2 = tmp_src; @@ -2201,13 +2233,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_I32_OP), - SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); + PTR_FAIL_IF(sljit_emit_op2u(compiler, + SLJIT_SUB | flags | (type & SLJIT_32), src1, src1w, src2, src2w)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_32))); } #endif @@ -2223,7 +2255,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2404,6 +2436,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { SLJIT_UNUSED_ARG(compiler); @@ -2452,6 +2491,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -2470,7 +2523,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(instruction); diff --git a/thirdparty/pcre2/src/sljit/sljitLir.h b/thirdparty/pcre2/src/sljit/sljitLir.h index 0eb62fc21b..1162658156 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.h +++ b/thirdparty/pcre2/src/sljit/sljitLir.h @@ -163,13 +163,6 @@ extern "C" { is not available at all. */ -/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 - or sljit_emit_op2 operations the result is discarded. Some status - flags must be set when the destination is SLJIT_UNUSED, because the - operation would have no effect otherwise. Other SLJIT operations do - not support SLJIT_UNUSED as a destination operand. */ -#define SLJIT_UNUSED 0 - /* Scratch registers. */ #define SLJIT_R0 1 #define SLJIT_R1 2 @@ -231,9 +224,6 @@ extern "C" { value. The FR and FS register sets are overlap in the same way as R and S register sets. See above. */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - /* Floating point scratch registers. */ #define SLJIT_FR0 1 #define SLJIT_FR1 2 @@ -263,39 +253,38 @@ extern "C" { /* Argument type definitions */ /* --------------------------------------------------------------------- */ -/* Argument type definitions. - Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */ - -#define SLJIT_ARG_TYPE_VOID 0 -#define SLJIT_ARG_TYPE_SW 1 -#define SLJIT_ARG_TYPE_UW 2 -#define SLJIT_ARG_TYPE_S32 3 -#define SLJIT_ARG_TYPE_U32 4 -#define SLJIT_ARG_TYPE_F32 5 -#define SLJIT_ARG_TYPE_F64 6 - /* The following argument type definitions are used by sljit_emit_enter, sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. - The following return type definitions are used by sljit_emit_call - and sljit_emit_icall functions. - When a function is called, the first integer argument must be placed - in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first - floating point argument must be placed in SLJIT_FR0, the second in - SLJIT_FR1, and so on. + As for sljit_emit_call and sljit_emit_icall, the first integer argument + must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. + Similarly the first floating point argument must be placed into SLJIT_FR0, + the second one into SLJIT_FR1, and so on. + + As for sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. The first integer argument without _R postfix is + stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer + arguments with _R postfix are placed into scratch registers. The index + of the scratch register is the count of the previous integer arguments + starting from SLJIT_R0. The floating point arguments are always placed + into SLJIT_FR0, SLJIT_FR1, and so on. + + Note: if a function is called by sljit_emit_call/sljit_emit_icall and + an argument is stored in a scratch register by sljit_emit_enter, + that argument uses the same scratch register index for both + integer and floating point arguments. Example function definition: - sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a, + sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); Argument type definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32) - | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64) - | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32) + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) Short form of argument type definition: - SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) - | SLJIT_ARG3(S32) | SLJIT_ARG4(F32) + SLJIT_ARGS4(32, P, F64, 32, F32) Argument passing: arg_a must be placed in SLJIT_R0 @@ -303,34 +292,73 @@ extern "C" { arg_b must be placed in SLJIT_FR0 arg_d must be placed in SLJIT_FR1 -Note: - The SLJIT_ARG_TYPE_VOID type is only supported by - SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the - default value when SLJIT_DEF_RET is not specified. */ -#define SLJIT_DEF_SHIFT 4 -#define SLJIT_DEF_RET(type) (type) -#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT) -#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT)) + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4(VOID, P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + + SLJIT_ARGS4(VOID, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 -/* Short form of the macros above. + SLJIT_ARGS4(VOID, F64, W, F32, W_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 - For example the following definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32) + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(VOID, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 + +/* Void result, can only be used by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_VOID 0 +/* Machine word sized integer argument or result. */ +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 32 bit integer argument or result. */ +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Pointer sized integer argument or result. */ +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 64 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F64 4 +/* 32 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F32 5 + +#define SLJIT_ARG_SHIFT 4 +#define SLJIT_ARG_RETURN(type) (type) +#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) + +/* Simplified argument list definitions. + + The following definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) can be shortened to: - SLJIT_RET(SW) | SLJIT_ARG1(F32) - -Note: - The VOID type is only supported by SLJIT_RET, and - VOID is also the default value when SLJIT_RET is - not specified. */ -#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type) + SLJIT_ARGS1(W, F32) +*/ + +#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type + +#define SLJIT_ARGS0(ret) \ + SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) + +#define SLJIT_ARGS1(ret, arg1) \ + (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) + +#define SLJIT_ARGS2(ret, arg1, arg2) \ + (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) + +#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ + (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) + +#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) /* --------------------------------------------------------------------- */ /* Main structures and functions */ @@ -408,7 +436,7 @@ struct sljit_compiler { /* Code size. */ sljit_uw size; /* Relative offset of the executable mapping from the writable mapping. */ - sljit_uw executable_offset; + sljit_sw executable_offset; /* Executable size for statistical purposes. */ sljit_uw executable_size; @@ -417,17 +445,13 @@ struct sljit_compiler { #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 args; + sljit_s32 args_size; sljit_s32 locals_offset; - sljit_s32 saveds_offset; - sljit_s32 stack_tmp_size; + sljit_s32 scratches_offset; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 mode32; -#ifdef _WIN64 - sljit_s32 locals_offset; -#endif #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -444,10 +468,14 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ sljit_uw shift_imm; +#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) + sljit_uw args_size; #endif #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) - sljit_sw imm; + sljit_u32 imm; #endif #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -456,6 +484,10 @@ struct sljit_compiler { sljit_sw cache_argw; #endif +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif + #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) sljit_s32 delay_slot; sljit_s32 cache_arg; @@ -476,7 +508,9 @@ struct sljit_compiler { /* Flags specified by the last arithmetic instruction. It contains the type of the variable flag. */ sljit_s32 last_flags; - /* Local size passed to the functions. */ + /* Return value type set by entry functions. */ + sljit_s32 last_return; + /* Local size passed to entry functions. */ sljit_s32 logical_local_size; #endif @@ -615,38 +649,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) available options are listed before sljit_emit_enter. The function argument list is the combination of SLJIT_ARGx - (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW - (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported. - The first argument goes to SLJIT_S0, the second goes to SLJIT_S1 - and so on. The register set used by the function must be declared - as well. The number of scratch and saved registers used by the - function must be passed to sljit_emit_enter. Only R registers - between R0 and "scratches" argument can be used later. E.g. if - "scratches" is set to 2, the scratch register set will be limited - to SLJIT_R0 and SLJIT_R1. The S registers and the floating point - registers ("fscratches" and "fsaveds") are specified in a similar - manner. The sljit_emit_enter is also capable of allocating a stack - space for local variables. The "local_size" argument contains the - size in bytes of this local area and its staring address is stored + (SLJIT_DEF_ARG1) macros. Currently maximum 4 arguments are + supported. The first integer argument is loaded into SLJIT_S0, + the second one is loaded into SLJIT_S1, and so on. Similarly, + the first floating point argument is loaded into SLJIT_FR0, + the second one is loaded into SLJIT_FR1, and so on. Furthermore + the register set used by the function must be declared as well. + The number of scratch and saved registers used by the function + must be passed to sljit_emit_enter. Only R registers between R0 + and "scratches" argument can be used later. E.g. if "scratches" + is set to 2, the scratch register set will be limited to SLJIT_R0 + and SLJIT_R1. The S registers and the floating point registers + ("fscratches" and "fsaveds") are specified in a similar manner. + The sljit_emit_enter is also capable of allocating a stack space + for local variables. The "local_size" argument contains the size + in bytes of this local area and its staring address is stored in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and SLJIT_SP + local_size (exclusive) can be modified freely until the function returns. The stack space is not initialized. Note: the following conditions must met: 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS - 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS - 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + Note: the compiler can use saved registers as scratch registers, + but the opposite is not supported + Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ -/* The absolute address returned by sljit_get_local_base with -offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ -#define SLJIT_F64_ALIGNMENT 0x00000001 +/* The compiled function uses cdecl calling + * convention instead of SLJIT_FUNC. */ +#define SLJIT_ENTER_CDECL 0x00000001 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 @@ -657,7 +696,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several - functions (like sljit_emit_return) requres this context to be able to generate + functions (such as sljit_emit_return) requres this context to be able to generate the appropriate code. However, some code fragments (like inline cache) may have no normal entry point so their context is unknown for the compiler. Their context can be provided to the compiler by the sljit_set_context function. @@ -669,11 +708,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); -/* Return from machine code. The op argument can be SLJIT_UNUSED which means the - function does not return with anything or any opcode between SLJIT_MOV and - SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op - is SLJIT_UNUSED, otherwise see below the description about source and - destination arguments. */ +/* Return from machine code. The sljit_emit_return_void function does not return with + any value. The sljit_emit_return function returns with a single value which stores + the result of a data move instruction. The instruction is specified by the op + argument, and must be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw); @@ -766,7 +806,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) #define SLJIT_IMM 0x40 -/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on +/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on 32 bit CPUs. When this option is set for an arithmetic operation, only the lower 32 bit of the input registers are used, and the CPU status flags are set according to the 32 bit result. Although the higher 32 bit @@ -774,12 +814,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU requirements all source registers must be the result of those operations where this option was also set. Memory loads read 32 bit values rather - than 64 bit ones. In other words 32 bit and 64 bit operations cannot - be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source - register can hold any 32 or 64 bit value, and it is converted to a 32 bit - compatible format first. This conversion is free (no instructions are - emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit - value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension). + than 64 bit ones. In other words 32 bit and 64 bit operations cannot be + mixed. The only exception is SLJIT_MOV32 whose source register can hold + any 32 or 64 bit value, and it is converted to a 32 bit compatible format + first. This conversion is free (no instructions are emitted) on most CPUs. + A 32 bit value can also be converted to a 64 bit value by SLJIT_MOV_S32 + (sign extension) or SLJIT_MOV_U32 (zero extension). + + As for floating-point operations, this option sets 32 bit single + precision mode. Similar to the integer operations, all register arguments + must be the result of those operations where this option was also set. Note: memory addressing always uses 64 bit values on 64 bit systems so the result of a 32 bit operation must not be used with SLJIT_MEMx @@ -788,22 +832,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * This option is part of the instruction name, so there is no need to manually set it. E.g: - SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */ -#define SLJIT_I32_OP 0x100 - -/* Set F32 (single) precision mode for floating-point computation. This - option is similar to SLJIT_I32_OP, it just applies to floating point - registers. When this option is passed, the CPU performs 32 bit floating - point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all - register arguments must be the result of those operations where this - option was also set. - - This option is part of the instruction name, so there is no need to - manually set it. E.g: - - SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP) - */ -#define SLJIT_F32_OP SLJIT_I32_OP + SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ +#define SLJIT_32 0x100 /* Many CPUs (x86, ARM, PPC) have status flags which can be set according to the result of an operation. Other CPUs (MIPS) do not have status @@ -887,7 +917,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) -#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP) +#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) /* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. @@ -895,13 +925,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), the behaviour is undefined. */ #define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) -#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP) +#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) /* Flags: - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) -#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP) +#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) /* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. @@ -909,7 +939,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), the behaviour is undefined. */ #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) -#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_I32_OP) +#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) /* Flags: - (does not modify flags) ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 when Intel Control-flow Enforcement Technology (CET) is enabled. @@ -941,16 +971,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #define SLJIT_MOV (SLJIT_OP1_BASE + 0) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) -#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP) +#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) -#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP) +#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) -#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP) +#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) -#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP) +#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) /* Flags: - (does not modify flags) Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) @@ -958,25 +988,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) /* Flags: - (does not modify flags) */ -#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP) +#define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) /* Flags: - (does not modify flags) Note: load a pointer sized data, useful on x32 (a 32 bit mode on x86-64 where all x64 features are available, e.g. 16 register) or similar compiling modes */ -#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) /* Flags: Z Note: immediate source argument is not supported */ -#define SLJIT_NOT (SLJIT_OP1_BASE + 8) -#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP) -/* Flags: Z | OVERFLOW - Note: immediate source argument is not supported */ -#define SLJIT_NEG (SLJIT_OP1_BASE + 9) -#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP) +#define SLJIT_NOT (SLJIT_OP1_BASE + 9) +#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) /* Count leading zeroes Flags: - (may destroy flags) Note: immediate source argument is not supported */ #define SLJIT_CLZ (SLJIT_OP1_BASE + 10) -#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP) +#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -987,58 +1013,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* Flags: Z | OVERFLOW | CARRY */ #define SLJIT_ADD (SLJIT_OP2_BASE + 0) -#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP) +#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) /* Flags: CARRY */ #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) -#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP) +#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) /* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER SIG_LESS_EQUAL | CARRY */ #define SLJIT_SUB (SLJIT_OP2_BASE + 2) -#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP) +#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) /* Flags: CARRY */ #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) -#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) +#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) /* Note: integer mul Flags: OVERFLOW */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) -#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) +#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) /* Flags: Z */ #define SLJIT_AND (SLJIT_OP2_BASE + 5) -#define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP) +#define SLJIT_AND32 (SLJIT_AND | SLJIT_32) /* Flags: Z */ #define SLJIT_OR (SLJIT_OP2_BASE + 6) -#define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP) +#define SLJIT_OR32 (SLJIT_OR | SLJIT_32) /* Flags: Z */ #define SLJIT_XOR (SLJIT_OP2_BASE + 7) -#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP) +#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_SHL (SLJIT_OP2_BASE + 8) -#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP) +#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_LSHR (SLJIT_OP2_BASE + 9) -#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP) +#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_ASHR (SLJIT_OP2_BASE + 10) -#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_I32_OP) +#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +/* The sljit_emit_op2u function is the same as sljit_emit_op2 except the result is discarded. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + /* Starting index of opcodes for sljit_emit_op2. */ #define SLJIT_OP_SRC_BASE 128 @@ -1082,35 +1114,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp /* Flags: - (does not modify flags) */ #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) -#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP) +#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int Rounding mode when the destination is W or I: round towards zero. */ -/* Flags: - (does not modify flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) -#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) -#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) -#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) -#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) -#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP) +#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) /* Note: dst is the left and src is the right operand for SLJIT_CMPD. Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ #define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) -#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) -#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) -#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP) +#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1119,18 +1151,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* Starting index of opcodes for sljit_emit_fop2. */ #define SLJIT_FOP2_BASE 192 -/* Flags: - (does not modify flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) -#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) -#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) -#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) -#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP) +#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1170,33 +1202,35 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 -/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ -#define SLJIT_SET_CARRY SLJIT_SET(12) +/* Unlike other flags, sljit_emit_jump may destroy this flag. */ +#define SLJIT_CARRY 12 +#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) +#define SLJIT_NOT_CARRY 13 /* Floating point comparison types. */ #define SLJIT_EQUAL_F64 14 -#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) #define SLJIT_NOT_EQUAL_F64 15 -#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) #define SLJIT_LESS_F64 16 -#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_32) #define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) #define SLJIT_GREATER_EQUAL_F64 17 -#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) #define SLJIT_GREATER_F64 18 -#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_32) #define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) #define SLJIT_LESS_EQUAL_F64 19 -#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) #define SLJIT_UNORDERED_F64 20 -#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_32) #define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) #define SLJIT_ORDERED_F64 21 -#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_32) #define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) /* Unconditional jump types. */ @@ -1211,6 +1245,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 +/* When this flag is passed, the execution of the current function ends and + the called function returns to the caller of the current function. The + stack usage is reduced before the call, but it is not necessarily reduced + to zero. In the latter case the compiler needs to allocate space for some + arguments and the return register must be kept as well. + + This feature is highly experimental and not supported on SPARC platform + at the moment. */ +#define SLJIT_CALL_RETURN 0x2000 /* Emit a jump instruction. The destination is not set, only the type of the jump. type must be between SLJIT_EQUAL and SLJIT_FAST_CALL @@ -1221,15 +1264,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile /* Emit a C compiler (ABI) compatible function call. type must be SLJIT_CALL or SLJIT_CALL_CDECL - type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_CALL_RETURN arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros Flags: destroy all flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); /* Basic arithmetic comparison. In most architectures it is implemented as - an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting - appropriate flags) followed by a sljit_emit_jump. However some + an compare operation followed by a sljit_emit_jump. However some architectures (i.e: ARM64 or MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL @@ -1271,6 +1313,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi Direct form: set src to SLJIT_IMM() and srcw to the address Indirect form: any other valid addressing mode type must be SLJIT_CALL or SLJIT_CALL_CDECL + type can be combined (or'ed) with SLJIT_CALL_RETURN arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros Flags: destroy all flags. */ @@ -1298,7 +1341,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64 dst_reg must be a valid register and it can be combined - with SLJIT_I32_OP to perform a 32 bit arithmetic operation + with SLJIT_32 to perform a 32 bit arithmetic operation src must be register or immediate (SLJIT_IMM) Flags: - (does not modify flags) */ @@ -1454,26 +1497,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st #if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -/* Get the entry address of a given function. */ -#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name) +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) +#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) #else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ /* All JIT related code should be placed in the same context (library, binary, etc.). */ -#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name) +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) +#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) /* For powerpc64, the function pointers point to a context descriptor. */ struct sljit_function_context { - sljit_sw addr; - sljit_sw r2; - sljit_sw r11; + sljit_uw addr; + sljit_uw r2; + sljit_uw r11; }; /* Fill the context arguments using the addr and the function. If func_ptr is NULL, it will not be set to the address of context If addr is NULL, the function address also comes from the func pointer. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ @@ -1516,17 +1562,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size); + void *instruction, sljit_u32 size); /* Flags were set by a 32 bit operation. */ -#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP +#define SLJIT_CURRENT_FLAGS_32 SLJIT_32 -/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ -#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 +/* Flags were set by an ADD or ADDC operations. */ +#define SLJIT_CURRENT_FLAGS_ADD 0x01 +/* Flags were set by a SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_SUB 0x02 -/* Flags were set by a SUB with unused destination. - Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ -#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 +/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. + Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x04 /* Define the currently available CPU status flags. It is usually used after an sljit_emit_label or sljit_emit_op_custom operations to define which CPU diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c index 74cf55fcd2..7b87f5907a 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c @@ -65,12 +65,17 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; -#define RM(rm) (reg_map[rm]) -#define RD(rd) (reg_map[rd] << 12) -#define RN(rn) (reg_map[rn] << 16) +#define RM(rm) ((sljit_uw)reg_map[rm]) +#define RM8(rm) ((sljit_uw)reg_map[rm] << 8) +#define RD(rd) ((sljit_uw)reg_map[rd] << 12) +#define RN(rn) ((sljit_uw)reg_map[rn] << 16) + +#define VM(rm) ((sljit_uw)freg_map[rm]) +#define VD(rd) ((sljit_uw)freg_map[rd] << 12) +#define VN(rn) ((sljit_uw)freg_map[rn] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -107,6 +112,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SBC 0xe0c00000 #define SMULL 0xe0c00090 #define SUB 0xe0400000 +#define TST 0xe1000000 #define UMULL 0xe0800090 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 @@ -115,12 +121,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +213,7 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u cpool_unique_ptr = compiler->cpool_unique; do { if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { - cpool_index = cpool_ptr - compiler->cpool; + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); break; } cpool_ptr++; @@ -293,7 +302,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { - diff = const_pool - last_pc_patch; + diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; /* Must be a load instruction with immediate offset. */ @@ -308,12 +317,12 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ SLJIT_ASSERT(diff >= 1); if (diff >= 2 || ind > 0) { - diff = (diff + ind - 2) << 2; + diff = (diff + (sljit_uw)ind - 2) << 2; SLJIT_ASSERT(diff <= 0xfff); - *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; } else - *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; } last_pc_patch++; } @@ -329,24 +338,24 @@ struct future_patch { static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { - sljit_s32 value; + sljit_u32 value; struct future_patch *curr_patch, *prev_patch; SLJIT_UNUSED_ARG(compiler); /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; else { curr_patch = *first_patch; prev_patch = NULL; while (1) { if (!curr_patch) { - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; break; } if ((sljit_uw)curr_patch->index == cpool_current_index) { - value = curr_patch->value; + value = (sljit_uw)curr_patch->value; if (prev_patch) prev_patch->next = curr_patch->next; else @@ -359,8 +368,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc } } - if (value >= 0) { - if ((sljit_uw)value > cpool_current_index) { + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); if (!curr_patch) { while (*first_patch) { @@ -371,8 +380,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc return SLJIT_ERR_ALLOC_FAILED; } curr_patch->next = *first_patch; - curr_patch->index = value; - curr_patch->value = cpool_start_address[value]; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_address[value]; *first_patch = curr_patch; } cpool_start_address[value] = *buf_ptr; @@ -395,8 +404,8 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { - FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); - return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); } #endif @@ -554,8 +563,9 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut } static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -658,7 +668,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_sw addr; + sljit_uw addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -737,7 +747,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Points after the current instruction. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; next_addr = compute_next_addr(label, jump, const_, put_label); @@ -770,7 +780,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* code_ptr can be affected above. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (code_ptr + 1) - code; + label->size = (sljit_uw)((code_ptr + 1) - code); label = label->next; } if (const_ && const_->addr == word_count) { @@ -799,8 +809,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); if (cpool_current_index > 0) { /* Unconditional branch. */ - *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = cpool_start_address + cpool_current_index; + *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); } cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; cpool_current_index = 0; @@ -822,7 +832,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_start_address = ALIGN_INSTRUCTION(code_ptr); cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); if (cpool_current_index > 0) - code_ptr = cpool_start_address + cpool_current_index; + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); buf_ptr = compiler->cpool; buf_end = buf_ptr + compiler->cpool_fill; @@ -845,15 +855,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr = (sljit_uw *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); if (!(jump->flags & JUMP_ADDR)) { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; } else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; } } else if (jump->flags & SLJIT_REWRITABLE_JUMP) { @@ -923,7 +933,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -972,6 +982,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALLOW_IMM 0x10 #define ALLOW_INV_IMM 0x20 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 /* s/l - store/load (1 bit) u/s - signed/unsigned (1 bit) @@ -999,7 +1010,7 @@ static const sljit_uw data_transfer_insts[16] = { }; #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ - (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg)) + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg)) /* Normal ldr/str instruction. Type2: ldrsb, ldrh, ldrsh */ @@ -1008,6 +1019,26 @@ static const sljit_uw data_transfer_insts[16] = { #define TYPE2_TRANSFER_IMM(imm) \ (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2)) + +/* Flags for emit_op: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define MOVE_REG_CONV 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1017,41 +1048,161 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, size, i, tmp; - sljit_uw push; + sljit_uw imm, offset; + sljit_s32 i, tmp, size, word_arg_count, saved_arg_count; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* Push saved registers, temporary registers - stmdb sp!, {..., lr} */ - push = PUSH | (1 << 14); + imm = 0; - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - push |= 1 << reg_map[i]; + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - push |= 1 << reg_map[i]; + imm |= (sljit_uw)1 << reg_map[i]; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); - FAIL_IF(push_inst(compiler, push)); + /* Push saved and temporary registers + multiple registers: stmdb sp!, {..., lr} + single register: str reg, [sp, #-4]! */ + if (imm != 0) + FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm)); + else + FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); /* Stack must be aligned to 8 bytes: */ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - local_size = ((size + local_size + 7) & ~7) - size; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; compiler->local_size = local_size; - if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - args = get_arg_count(arg_types); + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); + else + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } - if (args >= 1) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2))); + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst(compiler, *(--remap_ptr))); +#endif + + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); return SLJIT_SUCCESS; } @@ -1067,58 +1218,129 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((size + local_size + 7) & ~7) - size; + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) { - sljit_s32 i, tmp; - sljit_uw pop; + sljit_uw imm2 = get_imm(imm); - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + if (imm2 == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + imm2 = RM(TMP_REG2); + } - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list; - if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); - /* Push saved registers, temporary registers - ldmia sp!, {..., pc} */ - pop = POP | (1 << 15); + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - pop |= 1 << reg_map[i]; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) + lr_dst = 0; + + reg_list = 0; + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - pop |= 1 << reg_map[i]; + reg_list |= (sljit_uw)1 << reg_map[i]; + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + local_size += SSIZE_OF(sw); + + if (reg_list != 0) + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (reg_list == 0) + return SLJIT_SUCCESS; + + if (compiler->saveds > 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); + lr_dst = SLJIT_S0; + } else { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); + lr_dst = SLJIT_FIRST_SAVED_REG; + } + + return push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(lr_dst) | (sljit_uw)(frame_size - 2 * SSIZE_OF(sw))); + } - return push_inst(compiler, pop); + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + /* Pop saved and temporary registers + multiple registers: ldmia sp!, {...} + single register: ldr reg, [sp], #4 */ + if ((reg_list & (reg_list - 1)) == 0) { + SLJIT_ASSERT(lr_dst != 0); + SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]); + + return push_inst(compiler, 0xe49d0004 | RD(lr_dst)); + } + + FAIL_IF(push_inst(compiler, POP | reg_list)); + if (frame_size > 0) + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw))); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); } /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ -/* flags: */ - /* Arguments are swapped. */ -#define ARGS_SWAPPED 0x01 - /* Inverted immediate. */ -#define INV_IMM 0x02 - /* Source and destination is register. */ -#define MOVE_REG_CONV 0x04 - /* Unused return value. */ -#define UNUSED_RETURN 0x08 -/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ -#define SET_FLAGS (1 << 20) -/* dst: reg - src1: reg - src2: reg or imm (if allowed) - SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ -#define SRC2_IMM (1 << 25) - #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ if (compiler->shift_imm != 0x20) { \ @@ -1130,11 +1352,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \ return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \ } \ - return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \ - (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) \ + | RM8((flags & ARGS_SWAPPED) ? src1 : src2) | (sljit_uw)(opcode << 5) \ + | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) + sljit_uw dst, sljit_uw src1, sljit_uw src2) { switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1184,9 +1407,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_NOT: - if (src2 & SRC2_IMM) { + if (src2 & SRC2_IMM) return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); - } + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); case SLJIT_CLZ: @@ -1197,9 +1420,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1209,10 +1431,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1227,14 +1449,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl compiler->status_flags_state = 0; if (!HAS_FLAGS(op)) - return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); + return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); - FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); + FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); /* cmp TMP_REG1, dst asr #31. */ return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); case SLJIT_AND: + if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) + return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1266,7 +1490,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl Returns with 0 if not possible. */ static sljit_uw get_imm(sljit_uw imm) { - sljit_s32 rol; + sljit_u32 rol; if (imm <= 0xff) return SRC2_IMM | imm; @@ -1307,7 +1531,7 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl sljit_uw mask; sljit_uw imm1; sljit_uw imm2; - sljit_s32 rol; + sljit_uw rol; /* Step1: Search a zero byte (8 continous zero bit). */ mask = 0xff000000; @@ -1418,7 +1642,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw tmp; #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - if (!(imm & ~0xffff)) + if (!(imm & ~(sljit_uw)0xffff)) return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); #endif @@ -1455,13 +1679,13 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit SLJIT_ASSERT (arg & SLJIT_MEM); SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); - if ((arg & REG_MASK) == SLJIT_UNUSED) { + if (!(arg & REG_MASK)) { if (is_type1_transfer) { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xfff)); argw &= 0xfff; } else { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xff)); argw &= 0xff; } @@ -1475,20 +1699,20 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit argw &= 0x3; if (argw != 0 && !is_type1_transfer) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } /* Bit 25: RM is offset. */ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); + RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | ((sljit_uw)argw << 7))); } arg &= REG_MASK; if (is_type1_transfer) { if (argw > 0xfff) { - imm = get_imm(argw & ~0xfff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xfff; @@ -1496,7 +1720,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xfff) { - imm = get_imm(-argw & ~0xfff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xfff); @@ -1512,7 +1736,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } else { if (argw > 0xff) { - imm = get_imm(argw & ~0xff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xff; @@ -1520,7 +1744,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - imm = get_imm(-argw & ~0xff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xff); @@ -1537,7 +1761,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); } @@ -1554,50 +1778,62 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* We prefers register and simple consts. */ sljit_s32 dst_reg; sljit_s32 src1_reg; - sljit_s32 src2_reg; + sljit_s32 src2_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; - /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + if (dst == TMP_REG2) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); - src2_reg = 0; + if (inp_flags & ALLOW_NEG_IMM) { + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } do { if (!(inp_flags & ALLOW_IMM)) break; if (src2 & SLJIT_IMM) { - src2_reg = get_imm(src2w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); if (src2_reg) break; if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src2w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); if (src2_reg) { flags |= INV_IMM; break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src2w); + if (neg_op != 0) { + src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); if (src2_reg) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB) { - src2_reg = get_imm(-src2w); - if (src2_reg) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } } if (src1 & SLJIT_IMM) { - src2_reg = get_imm(src1w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED; src1 = src2; @@ -1605,7 +1841,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src1w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED | INV_IMM; src1 = src2; @@ -1613,13 +1849,13 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src1w); + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); if (src2_reg) { - /* Note: add is commutative operation. */ src1 = src2; src1w = src2w; - op = SLJIT_SUB | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } @@ -1634,12 +1870,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_reg = TMP_REG1; } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); src1_reg = TMP_REG1; } /* Destination. */ - dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op <= SLJIT_MOV_P) { if (dst & SLJIT_MEM) { @@ -1663,10 +1899,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src2 & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); else - FAIL_IF(load_immediate(compiler, src2_reg, src2w)); + FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); } - FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; @@ -1691,7 +1927,7 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_sw saved_reg_list[3]; + sljit_uw saved_reg_list[3]; sljit_sw saved_reg_count; CHECK_ERROR(); @@ -1708,10 +1944,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 16) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 8) - | reg_map[SLJIT_R1]); + | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1)); case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: @@ -1742,7 +1975,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1756,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile SLJIT_ASSERT(saved_reg_list[1] < 8); FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); } - return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) + return push_inst(compiler, 0xe49d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; @@ -1781,6 +2014,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1799,13 +2033,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); } @@ -1819,19 +2046,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: case SLJIT_SUB: case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: case SLJIT_XOR: return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); @@ -1858,6 +2084,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1905,8 +2145,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1917,23 +2158,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ - #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs)) -#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16)) + ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs)) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7))); arg = TMP_REG2; argw = 0; } @@ -1945,12 +2183,12 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, if (!(-argw & ~0x3fc)) return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); - imm = get_imm(argw & ~0x3fc); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } - imm = get_imm(-argw & ~0x3fc); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm) { argw = -argw; FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); @@ -1959,11 +2197,11 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, } if (arg) { - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); } else - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); } @@ -1972,17 +2210,17 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0))); if (FAST_IS_REG(dst)) - return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16)); + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1)); /* Store the integer value from a VFP register. */ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); @@ -1994,23 +2232,23 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -2018,19 +2256,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); return push_inst(compiler, VMRS); } @@ -2042,16 +2280,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; } @@ -2059,25 +2297,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0))); + op ^= SLJIT_32; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -2094,40 +2332,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); break; } if (dst_r == TMP_FREG1) - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; } @@ -2169,10 +2407,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x10000000; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x30000000; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x20000000; @@ -2198,15 +2446,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x10000000; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x00000000; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x70000000; @@ -2277,111 +2527,124 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #ifdef __SOFTFP__ -static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; - sljit_s32 word_arg_offset = 0; - sljit_s32 float_arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 float_arg_count = 0; sljit_s32 types = 0; - sljit_s32 src_offset = 4 * sizeof(sljit_sw); sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f32); - arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f64); - arg_count++; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); word_arg_offset += sizeof(sljit_sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7))); + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset)); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } /* Process arguments in reversed direction. */ while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: float_arg_count--; - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } - FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; - case SLJIT_ARG_TYPE_F64: - arg_count--; + case SLJIT_ARG_TYPE_F32: float_arg_count--; - stack_offset = offsets[arg_count]; - - SLJIT_ASSERT((stack_offset & 0x7) == 0); + offset = *(--offset_ptr); - if (stack_offset < 16) { - if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } - FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; default: - arg_count--; word_arg_offset -= sizeof(sljit_sw); - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT(stack_offset >= word_arg_offset); + SLJIT_ASSERT(offset >= word_arg_offset); - if (stack_offset != word_arg_offset) { - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } else if (src_offset == word_arg_offset) { - *src = 1 + (stack_offset >> 2); - src_offset = stack_offset; + *src = (sljit_s32)(SLJIT_R0 + (offset >> 2)); + src_offset = offset; } - FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2))); + FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); } else - FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16))); + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); } break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2389,83 +2652,51 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_s32 stack_size = 0; - - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) - FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_size & 0x7) - stack_size += sizeof(sljit_sw); - stack_size += sizeof(sljit_f64); - break; - default: - stack_size += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - if (stack_size <= 16) - return SLJIT_SUCCESS; - - return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7)); + return SLJIT_SUCCESS; } #else /* !__SOFTFP__ */ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_u32 remap = 0; - sljit_u32 offset = 0; - sljit_u32 new_offset, mask; + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { - new_offset = 0; - mask = 1; - - while (remap & mask) { - new_offset++; - mask <<= 1; - } - remap |= mask; - + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (offset != new_offset) FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, - 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0))); + SLJIT_32, new_offset, offset, 0))); - offset += 2; - } - else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { - new_offset = 0; - mask = 3; - - while (remap & mask) { - new_offset += 2; - mask <<= 2; + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0x400000, f32_offset, offset, 0))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, new_offset, offset, 0))); + f32_offset = new_offset; + new_offset++; } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0))); - - offset += 2; + offset++; + break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2480,13 +2711,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile { #ifdef __SOFTFP__ struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; #endif CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2496,9 +2732,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); return jump; #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2535,7 +2790,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) @@ -2555,16 +2810,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#ifdef __SOFTFP__ if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2573,8 +2841,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); return softfloat_post_call_with_args(compiler, arg_types); #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2636,27 +2921,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; cc = get_cc(compiler, type & 0xff); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - tmp = get_imm(srcw); + tmp = get_imm((sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); - tmp = get_imm(~srcw); + tmp = get_imm(~(sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - tmp = (sljit_uw) srcw; + tmp = (sljit_uw)srcw; FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); if (tmp <= 0xffff) return SLJIT_SUCCESS; return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); #else - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); src = TMP_REG1; #endif } @@ -2680,6 +2965,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -2731,7 +3017,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { memw &= 0x3; - inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7)); + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7)); if (is_type1_transfer) inst |= (1 << 25); @@ -2757,7 +3043,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | memw); + return push_inst(compiler, inst | (sljit_uw)memw); } if (memw >= 0) @@ -2765,7 +3051,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw)); + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2777,10 +3063,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value)); compiler->patches++; #else PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); @@ -2804,7 +3091,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); @@ -2829,5 +3116,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, executable_offset, new_constant, 1); + inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c index 3f0f5fcc30..96453b4abe 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -48,19 +48,20 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 }; -#define W_OP (1u << 31) -#define RD(rd) (reg_map[rd]) -#define RT(rt) (reg_map[rt]) -#define RN(rn) (reg_map[rn] << 5) -#define RT2(rt2) (reg_map[rt2] << 10) -#define RM(rm) (reg_map[rm] << 16) -#define VD(vd) (freg_map[vd]) -#define VT(vt) (freg_map[vt]) -#define VN(vn) (freg_map[vn] << 5) -#define VM(vm) (freg_map[vm] << 16) +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RT(rt) ((sljit_ins)reg_map[rt]) +#define RN(rn) ((sljit_ins)reg_map[rn] << 5) +#define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10) +#define RM(rm) ((sljit_ins)reg_map[rm] << 16) +#define VD(vd) ((sljit_ins)freg_map[vd]) +#define VT(vt) ((sljit_ins)freg_map[vt]) +#define VT2(vt) ((sljit_ins)freg_map[vt] << 10) +#define VN(vn) ((sljit_ins)freg_map[vn] << 5) +#define VM(vm) ((sljit_ins)freg_map[vm] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -96,8 +97,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FNEG 0x1e614000 #define FSUB 0x1e603800 #define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 #define LDP 0xa9400000 -#define LDP_PRE 0xa9c00000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 #define LDR_PRE 0xf8400c00 #define LSLV 0x9ac02000 #define LSRV 0x9ac02400 @@ -117,10 +120,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 #define STP 0xa9000000 +#define STP_F64 0x6d000000 #define STP_PRE 0xa9800000 #define STRB 0x38206800 #define STRBI 0x39000000 #define STRI 0xf9000000 +#define STRI_F64 0xfd000000 #define STR_FI 0x3d000000 #define STR_FR 0x3c206800 #define STUR_FI 0x3c000000 @@ -145,10 +150,10 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); - return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) @@ -171,14 +176,14 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->addr -= sizeof(sljit_ins); jump->flags |= PATCH_COND; return 5; } - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { @@ -231,8 +236,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; - sljit_s32 dst; + sljit_sw addr; + sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; @@ -271,7 +276,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -300,7 +305,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -313,58 +318,58 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); - buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); if (jump->flags & IS_COND) buf_ptr[-1] -= (4 << 5); break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); - buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); break; } - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); dst = buf_ptr[0] & 0x1f; - buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); - buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); if (jump->flags & PATCH_ABS64) - buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); } while (0); jump = jump->next; } put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; + addr = (sljit_sw)put_label->label->addr; + buf_ptr = (sljit_ins*)put_label->addr; - buf_ptr[0] |= (addr & 0xffff) << 5; - buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5; + buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; + buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; if (put_label->flags >= 1) - buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5; + buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; if (put_label->flags >= 2) - buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5; + buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; put_label = put_label->next; } compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -426,11 +431,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) value >>= 1; \ } -#define LOGICAL_IMM_CHECK 0x100 +#define LOGICAL_IMM_CHECK (sljit_ins)0x100 -static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) +static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len) { - sljit_s32 negated, ones, right; + sljit_s32 negated; + sljit_u32 ones, right; sljit_uw mask, uimm; sljit_ins ins; @@ -497,30 +503,30 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) { sljit_uw imm = (sljit_uw)simm; - sljit_s32 i, zeros, ones, first; + sljit_u32 i, zeros, ones, first; sljit_ins bitmask; /* Handling simple immediates first. */ if (imm <= 0xffff) - return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5)); if (simm < 0 && simm >= -0x10000) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if (imm <= 0xffffffffl) { if ((imm & 0xffff) == 0) - return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21)); if ((imm & 0xffff0000l) == 0xffff0000) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); bitmask = logical_imm(simm, 16); if (bitmask != 0) return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } bitmask = logical_imm(simm, 32); @@ -529,10 +535,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, if (simm < 0 && simm >= -0x100000000l) { if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ @@ -558,10 +564,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -574,10 +580,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -619,12 +625,11 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (op) { case SLJIT_MUL: - case SLJIT_NEG: case SLJIT_CLZ: case SLJIT_ADDC: case SLJIT_SUBC: @@ -639,40 +644,43 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); goto set_flags; case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = -imm; /* Fall through. */ case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); } if (imm > 0 && imm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10)); } nimm = -imm; if (nimm > 0 && nimm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10)); } if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)); } if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)); } if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10)); } if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10)); } break; case SLJIT_AND: @@ -697,11 +705,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) + | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10))); } goto set_flags; case SLJIT_LSHR: @@ -712,11 +722,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s inv_bits |= 1 << 30; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | ((sljit_ins)imm << 16) | (31 << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10))); } goto set_flags; default: @@ -766,41 +778,38 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (!(flags & INT_OP)) inv_bits |= 1 << 22; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); - case SLJIT_MOV_U32: + case SLJIT_MOV32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) + if (dst == arg2) return SLJIT_SUCCESS; + /* fallthrough */ + case SLJIT_MOV_U32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) - return SLJIT_SUCCESS; return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); case SLJIT_NOT: SLJIT_ASSERT(arg1 == TMP_REG1); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); break; /* Set flags. */ - case SLJIT_NEG: - SLJIT_ASSERT(arg1 == TMP_REG1); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if (flags & SET_FLAGS) - inv_bits |= 1 << 29; - return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: @@ -852,7 +861,7 @@ set_flags: #define INT_SIZE 0x2 #define WORD_SIZE 0x3 -#define MEM_SIZE_SHIFT(flags) ((flags) & 0x3) +#define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3) static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) @@ -872,35 +881,34 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, STRB | type | RT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { - if ((argw >> shift) <= 0xfff) { - return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift))); - } + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, tmp_reg, argw)); @@ -915,39 +923,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, offs, prev, saved_regs_size; + sljit_s32 prev, fprev, saved_regs_size, i, tmp; + sljit_s32 word_arg_count = 0; + sljit_ins offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size + saved_regs_size; + local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + compiler->local_size = local_size; - FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + if (local_size <= 512) { + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size = 0; + } else { + saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf; -#ifdef _WIN32 - if (local_size >= 4096) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); - else if (local_size > 256) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10))); -#endif + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10))); + offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size -= saved_regs_size; + SLJIT_ASSERT(local_size > 0); + } - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -957,84 +970,124 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } - if (prev != -1) - FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + fprev = -1; + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10))); + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } - args = get_arg_count(arg_types); + if (fprev != -1) + FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); - if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + arg_types >>= SLJIT_ARG_SHIFT; + +#ifdef _WIN32 + if (local_size > 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); +#endif /* _WIN32 */ + + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + tmp++; + } + word_arg_count++; + } + arg_types >>= SLJIT_ARG_SHIFT; + } #ifdef _WIN32 - if (local_size >= 4096) { + if (local_size > 4096) { if (local_size < 4 * 4096) { /* No need for a loop. */ - if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } + if (local_size >= 3 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - local_size -= 4096; + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } } else { - FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10))); + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10))); FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - - local_size &= 0xfff; } - if (local_size > 256) { - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12))); + local_size &= 0xfff; - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + if (local_size > 0) + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + else + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); } - else if (local_size > 256) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + + if (local_size > 0) { + if (local_size <= 512) + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + else { + if (local_size >= 4096) + local_size = (1 << (22 - 10)); + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12))); #else /* !_WIN32 */ /* The local_size does not include saved registers size. */ - if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); - local_size &= 0xfff; + if (local_size != 0) { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + if (local_size > 512 || local_size == 0) { + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); } - if (local_size != 0) - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); #endif /* _WIN32 */ - return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, @@ -1048,57 +1101,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf); + compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_s32 local_size; - sljit_s32 i, tmp, offs, prev, saved_regs_size; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + sljit_s32 local_size, prev, fprev, i, tmp; + sljit_ins offs; - local_size = compiler->local_size - saved_regs_size; + local_size = compiler->local_size; - /* Load LR as early as possible. */ - if (local_size == 0) + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); - else if (local_size < 63 * sizeof(sljit_sw)) { - FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | (local_size << (15 - 3)))); - } - else { + + if (local_size > 512) { + local_size -= 512; if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) + | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; } - if (local_size) - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + local_size = 512; } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -1108,15 +1153,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } + fprev = -1; + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + if (prev != -1) - FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + /* This and the next call/jump instruction can be executed parallelly. */ + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); - /* These two can be executed in parallel. */ - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10))); return push_inst(compiler, RET | RN(TMP_LR)); } @@ -1126,7 +1206,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1171,13 +1251,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { /* Both operands are registers. */ if (dst_r != TMP_REG1 && FAST_IS_REG(src)) - return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); switch (op) { case SLJIT_MOV: @@ -1210,6 +1290,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_u32)srcw; break; case SLJIT_MOV_S32: + case SLJIT_MOV32: mem_flags = INT_SIZE | SIGNED; if (src & SLJIT_IMM) srcw = (sljit_s32)srcw; @@ -1235,14 +1316,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op_flags & SLJIT_I32_OP) { + if (op_flags & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); src = TMP_REG2; @@ -1263,24 +1341,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 dst_r, flags, mem_flags; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) + if (dst == TMP_REG1) flags |= UNUSED_RETURN; if (src1 & SLJIT_MEM) { @@ -1310,6 +1385,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1363,8 +1452,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1391,34 +1481,34 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { if ((argw >> shift) <= 0xfff) - return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); @@ -1429,13 +1519,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); src = TMP_FREG1; } @@ -1451,7 +1541,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) inv_bits |= W_OP; @@ -1471,7 +1561,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1479,8 +1569,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); @@ -1499,7 +1589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; sljit_ins inv_bits; CHECK_ERROR(); @@ -1507,7 +1597,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { @@ -1531,7 +1621,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); break; } @@ -1545,8 +1635,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1605,7 +1695,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: @@ -1616,10 +1706,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x0; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x2; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x3; @@ -1645,15 +1745,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x6; @@ -1709,9 +1811,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -1724,7 +1832,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; - sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1775,7 +1883,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; @@ -1786,8 +1894,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1825,7 +1950,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -1849,14 +1974,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0; sljit_ins cc; CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - if (dst_reg & SLJIT_I32_OP) + if (dst_reg & SLJIT_32) srcw = (sljit_s32)srcw; FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; @@ -1864,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil } cc = get_cc(compiler, type & 0xff); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); } @@ -1891,17 +2016,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile break; case SLJIT_MOV_S8: sign = 1; + /* fallthrough */ case SLJIT_MOV_U8: inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S16: sign = 1; + /* fallthrough */ case SLJIT_MOV_U16: inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S32: sign = 1; + /* fallthrough */ case SLJIT_MOV_U32: + case SLJIT_MOV32: inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; break; default: @@ -1916,7 +2045,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, @@ -1936,7 +2065,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil inst = STUR_FI | 0x80000400; - if (!(type & SLJIT_F32_OP)) + if (!(type & SLJIT_32)) inst |= 0x40000000; if (!(type & SLJIT_MEM_STORE)) @@ -1945,7 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) @@ -1955,11 +2084,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c CHECK_ERROR(); CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); - - SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + /* Not all instruction forms support accessing SP register. */ if (offset <= 0xffffff && offset >= -0xffffff) { ins = ADDI; if (offset < 0) { @@ -1968,13 +2097,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c } if (offset <= 0xfff) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10))); else { - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); offset &= 0xfff; if (offset != 0) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10))); } } else { @@ -2002,7 +2131,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value)); if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); @@ -2034,17 +2163,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; - sljit_s32 dst; + sljit_u32 dst; SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); dst = inst[0] & 0x1f; SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); - inst[0] = MOVZ | dst | ((new_target & 0xffff) << 5); - inst[1] = MOVK | dst | (((new_target >> 16) & 0xffff) << 5) | (1 << 21); - inst[2] = MOVK | dst | (((new_target >> 32) & 0xffff) << 5) | (2 << 21); - inst[3] = MOVK | dst | ((new_target >> 48) << 5) | (3 << 21); + inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); @@ -2053,5 +2182,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c index e35dbe99b3..ed21ea7daa 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -50,40 +50,42 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; #define COPY_BITS(src, from, to, bits) \ - ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to)) + +#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm)) /* Thumb16 encodings. */ -#define RD3(rd) (reg_map[rd]) -#define RN3(rn) (reg_map[rn] << 3) -#define RM3(rm) (reg_map[rm] << 6) -#define RDN3(rdn) (reg_map[rdn] << 8) -#define IMM3(imm) (imm << 6) -#define IMM8(imm) (imm) +#define RD3(rd) ((sljit_ins)reg_map[rd]) +#define RN3(rn) ((sljit_ins)reg_map[rn] << 3) +#define RM3(rm) ((sljit_ins)reg_map[rm] << 6) +#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8) +#define IMM3(imm) ((sljit_ins)imm << 6) +#define IMM8(imm) ((sljit_ins)imm) /* Thumb16 helpers. */ #define SET_REGS44(rd, rn) \ - ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) + (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4)) #define IS_2_LO_REGS(reg1, reg2) \ (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) #define IS_3_LO_REGS(reg1, reg2, reg3) \ (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) /* Thumb32 encodings. */ -#define RD4(rd) (reg_map[rd] << 8) -#define RN4(rn) (reg_map[rn] << 16) -#define RM4(rm) (reg_map[rm]) -#define RT4(rt) (reg_map[rt] << 12) -#define DD4(dd) (freg_map[dd] << 12) -#define DN4(dn) (freg_map[dn] << 16) -#define DM4(dm) (freg_map[dm]) +#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) +#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) +#define RM4(rm) ((sljit_ins)reg_map[rm]) +#define RT4(rt) ((sljit_ins)reg_map[rt] << 12) +#define DD4(dd) ((sljit_ins)freg_map[dd] << 12) +#define DN4(dn) ((sljit_ins)freg_map[dn] << 16) +#define DM4(dm) ((sljit_ins)freg_map[dm]) #define IMM5(imm) \ - (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) + (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) #define IMM12(imm) \ - (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff)) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -100,7 +102,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ADDSI8 0x3000 #define ADD_W 0xeb000000 #define ADDWI 0xf2000000 -#define ADD_SP 0xb000 +#define ADD_SP 0x4485 +#define ADD_SP_I 0xb000 #define ADD_W 0xeb000000 #define ADD_WI 0xf1000000 #define ANDI 0xf0000000 @@ -126,6 +129,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define EORS 0x4040 #define EOR_W 0xea800000 #define IT 0xbf00 +#define LDR_SP 0x9800 +#define LDR 0xf8d00000 #define LDRI 0xf8500800 #define LSLS 0x4080 #define LSLSI 0x0000 @@ -168,13 +173,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SUBSI8 0x3800 #define SUB_W 0xeba00000 #define SUBWI 0xf2a00000 -#define SUB_SP 0xb080 +#define SUB_SP_I 0xb080 #define SUB_WI 0xf1a00000 #define SXTB 0xb240 #define SXTB_W 0xfa4ff080 #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define TSTI 0xf0000f00 +#define TST_W 0xea000f00 #define UDIV 0xfbb0f0f0 #define UMULL 0xfba00000 #define UXTB 0xb2c0 @@ -188,12 +195,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +214,7 @@ static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); FAIL_IF(!ptr); - *ptr = inst; + *ptr = (sljit_u16)(inst); compiler->size++; return SLJIT_SUCCESS; } @@ -213,8 +223,8 @@ static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) { sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); - *ptr++ = inst >> 16; - *ptr = inst; + *ptr++ = (sljit_u16)(inst >> 16); + *ptr = (sljit_u16)(inst); compiler->size += 2; return SLJIT_SUCCESS; } @@ -229,12 +239,12 @@ static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) { - sljit_s32 dst = inst[1] & 0x0f00; + sljit_ins dst = inst[1] & 0x0f00; SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); - inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); - inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); - inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); - inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); + inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); + inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); + inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); + inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); } static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) @@ -318,24 +328,24 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw case 1: /* Encoding T1 of 'B' instruction */ SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); - jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); return; case 2: /* Encoding T3 of 'B' instruction */ SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); - jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); - jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); + jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); return; case 3: SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); diff--; type = 5; break; case 4: /* Encoding T2 of 'B' instruction */ SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); - jump_inst[0] = 0xe000 | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); return; } @@ -345,8 +355,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw s = (diff >> 23) & 0x1; j1 = (~(diff >> 22) ^ s) & 0x1; j2 = (~(diff >> 21) ^ s) & 0x1; - jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); - jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); + jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); /* The others have a common form. */ if (type == 5) /* Encoding T4 of 'B' instruction */ @@ -405,7 +415,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == half_count) { label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == half_count) { @@ -433,7 +443,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == half_count) { label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -457,7 +467,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -592,7 +602,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (flags & 0xffff) { @@ -610,8 +620,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ break; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - nimm = -(sljit_sw)imm; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); @@ -633,18 +643,18 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; imm = get_imm(imm); if (imm != INVALID_IMM) return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; case SLJIT_SUB: - /* SUB operation can be replaced by ADD because of the negative carry flag. */ - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) { if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); @@ -659,11 +669,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, CMPI_W | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); + break; } - nimm = -(sljit_sw)imm; + nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); @@ -685,11 +696,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = get_imm(imm); @@ -699,8 +711,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_AND: nimm = get_imm(imm); if (nimm != INVALID_IMM) - return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - imm = get_imm(imm); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(~imm); if (imm != INVALID_IMM) return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; @@ -708,7 +720,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - imm = get_imm(imm); + imm = get_imm(~imm); if (imm != INVALID_IMM) return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; @@ -752,12 +764,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (flags & ARG2_IMM) { imm = arg2; arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; - FAIL_IF(load_immediate(compiler, arg2, imm)); + FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm)); } else { imm = arg1; arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; - FAIL_IF(load_immediate(compiler, arg1, imm)); + FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm)); } SLJIT_ASSERT(arg1 != arg2); @@ -768,9 +780,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); - if (dst == arg2) + if (dst == (sljit_s32)arg2) return SLJIT_SUCCESS; return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); case SLJIT_MOV_U8: @@ -803,18 +816,19 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); return SLJIT_SUCCESS; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); - if (dst == arg1 && !(flags & SET_FLAGS)) + if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ADDC: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & UNUSED_RETURN) { if (IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); @@ -824,7 +838,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUBC: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MUL: @@ -836,29 +851,29 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* cmp TMP_REG2, dst asr #31. */ return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); case SLJIT_AND: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); - return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_OR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_XOR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SHL: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_LSHR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ASHR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); } @@ -951,20 +966,22 @@ static const sljit_ins sljit_mem32[13] = { /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) { + sljit_uw imm; + if (value >= 0) { if (value <= 0xfff) return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm); } else { value = -value; if (value <= 0xfff) return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm); } return SLJIT_ERR_UNSUPPORTED; } @@ -980,13 +997,13 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { - tmp = get_imm(argw & ~0xfff); + tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (tmp != INVALID_IMM) { FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp)); return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); } - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); @@ -999,11 +1016,11 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit if (!argw && IS_3_LO_REGS(reg, arg, other_r)) return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4)); } if (argw > 0xfff) { - tmp = get_imm(argw & ~0xfff); + tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (tmp != INVALID_IMM) { push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1011,7 +1028,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - tmp = get_imm(-argw & ~0xff); + tmp = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); if (tmp != INVALID_IMM) { push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1037,21 +1054,21 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } if (tmp < 3) - return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp))); + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp))); } else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) { /* SP based immediate. */ - return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)); + return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2)); } if (argw >= 0 && argw <= 0xfff) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw); else if (argw < 0 && argw >= -0xff) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw); SLJIT_ASSERT(arg != tmp_reg); - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); if (IS_3_LO_REGS(reg, arg, tmp_reg)) return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); @@ -1065,114 +1082,203 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, size, i, tmp; - sljit_ins push = 0; -#ifdef _WIN32 - sljit_uw imm; + sljit_s32 size, i, tmp, word_arg_count, saved_arg_count; + sljit_uw offset; + sljit_uw imm = 0; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; #endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - push |= 1 << reg_map[i]; + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - push |= 1 << reg_map[i]; + imm |= (sljit_uw)1 << reg_map[i]; - FAIL_IF((push & 0xff00) - ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | (1 << 8) | push)); + /* At least two registers must be set for PUSH_W and one for PUSH instruction. */ + FAIL_IF((imm & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | imm) + : push_inst16(compiler, PUSH | (1 << 8) | imm)); /* Stack must be aligned to 8 bytes: (LR, R4) */ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - local_size = ((size + local_size + 7) & ~7) - size; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; compiler->local_size = local_size; -#ifdef _WIN32 - if (local_size >= 256) { - if (local_size > 4096) - imm = get_imm(4096); - else - imm = get_imm(local_size & ~0xff); + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); - SLJIT_ASSERT(imm != INVALID_IMM); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm)); + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + SLJIT_ASSERT(reg_map[tmp] <= 7); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst16(compiler, MOV | RD3(tmp) | (offset << 1))); + else + FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; } + + compiler->args_size = offset; #else - if (local_size > 0) { - if (local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | SLJIT_32 | DD4(offset) | DM4(old_offset); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = VMOV_F32 | 0x20 | DD4(offset) | DM4(f32_offset); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | DD4(offset) | DM4(old_offset); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; } -#endif - args = get_arg_count(arg_types); + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); - if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); + while (remap_ptr > remap) + FAIL_IF(push_inst32(compiler, *(--remap_ptr))); +#endif #ifdef _WIN32 - if (local_size >= 256) { - if (local_size > 4096) { - imm = get_imm(4096); - SLJIT_ASSERT(imm != INVALID_IMM); - - if (local_size < 4 * 4096) { - if (local_size > 2 * 4096) { - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - local_size -= 4096; - } + if (local_size >= 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); - if (local_size > 2 * 4096) { - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - local_size -= 4096; - } + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - local_size -= 4096; + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + if (local_size > 3 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } - SLJIT_ASSERT(local_size > 0); - } - else { - FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1)); - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - SLJIT_ASSERT(reg_map[SLJIT_R3] < 7); - FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1)); - FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff))); - - local_size &= 0xfff; - - if (local_size != 0) - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff))); + } - if (local_size >= 256) { - imm = get_imm(local_size & ~0xff); - SLJIT_ASSERT(imm != INVALID_IMM); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + local_size &= 0xfff; + } - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - } - } + if (local_size >= 256) { + SLJIT_ASSERT(local_size < 4096); - local_size &= 0xff; - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size)); + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1))); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + } else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size)); +#else /* !_WIN32 */ + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); } - else if (local_size > 0) - FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size)); -#endif +#endif /* _WIN32 */ return SLJIT_SUCCESS; } @@ -1188,37 +1294,143 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((size + local_size + 7) & ~7) - size; + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) { - sljit_s32 i, tmp; - sljit_ins pop = 0; + sljit_uw imm2; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + /* The TMP_REG1 register must keep its value. */ + if (imm <= (127u << 2)) + return push_inst16(compiler, ADD_SP_I | (imm >> 2)); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm)); - if (compiler->local_size > 0) { - if (compiler->local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); + imm2 = get_imm(imm); + + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2); + + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + return push_inst16(compiler, ADD_SP | RN3(TMP_REG2)); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPOP | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - pop |= 1 << reg_map[i]; + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) + lr_dst = 0; + + reg_list = 0; + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - pop |= 1 << reg_map[i]; + reg_list |= (sljit_uw)1 << reg_map[i]; + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + local_size += SSIZE_OF(sw); + + if (reg_list != 0) + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)(frame_size - local_size) >> 2))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (reg_list == 0) + return SLJIT_SUCCESS; + + if (compiler->saveds > 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); + lr_dst = SLJIT_S0; + } else { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); + lr_dst = SLJIT_FIRST_SAVED_REG; + } + + frame_size -= 2 * SSIZE_OF(sw); + + if (reg_map[lr_dst] <= 7) + return push_inst16(compiler, STR_SP | 0x800 | RDN3(lr_dst) | (sljit_uw)(frame_size >> 2)); + + return push_inst32(compiler, LDR | RT4(lr_dst) | RN4(SLJIT_SP) | (sljit_uw)frame_size); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) { + if (lr_dst == TMP_PC) + reg_list |= 1u << 8; + + /* At least one register must be set for POP instruction. */ + SLJIT_ASSERT(reg_list != 0); - return (pop & 0xff00) - ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | (1 << 8) | pop); + FAIL_IF(push_inst16(compiler, POP | reg_list)); + } else { + if (lr_dst != 0) { + if (reg_list == 0) + return push_inst32(compiler, 0xf85d0b04 | RT4(lr_dst)); + + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + } + + /* At least two registers must be set for POP_W instruction. */ + SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0); + + FAIL_IF(push_inst32(compiler, POP_W | reg_list)); + } + + if (frame_size > 0) + return push_inst16(compiler, SUB_SP_I | (((sljit_uw)frame_size - sizeof(sljit_sw)) >> 2)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); } /* --------------------------------------------------------------------- */ @@ -1250,8 +1462,8 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) - sljit_sw saved_reg_list[3]; - sljit_sw saved_reg_count; + sljit_uw saved_reg_list[3]; + sljit_uw saved_reg_count; #endif CHECK_ERROR(); @@ -1266,10 +1478,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 8) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 16) - | reg_map[SLJIT_R1]); + | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: @@ -1314,10 +1523,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv)))); #elif defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1356,7 +1565,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { @@ -1364,6 +1573,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -1394,12 +1604,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } if (src & SLJIT_IMM) - FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw)); + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); else if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); } else { if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG2, src); + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); dst_r = src; } @@ -1409,14 +1619,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); } - if (op == SLJIT_NEG) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); - } - flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; if (src & SLJIT_MEM) { @@ -1424,7 +1626,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile src = TMP_REG1; } - emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, src); + emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src); if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); @@ -1439,17 +1641,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 dst_reg, flags, src2_reg; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + if (src1 & SLJIT_IMM) flags |= ARG1_IMM; else if (src1 & SLJIT_MEM) { @@ -1469,16 +1671,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile else src2w = src2; - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1521,7 +1734,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1540,22 +1753,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); /* Fast loads and stores. */ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6))); arg = SLJIT_MEM | TMP_REG1; argw = 0; } if ((arg & REG_MASK) && (argw & 0x3) == 0) { if (!(argw & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)argw >> 2)); if (!(-argw & ~0x3fc)) - return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)-argw >> 2)); } if (arg & REG_MASK) { @@ -1563,20 +1776,22 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, FAIL_IF(compiler->error); return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); } - imm = get_imm(argw & ~0x3fc); + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } - imm = get_imm(-argw & ~0x3fc); + + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { argw = -argw; FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } } - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); if (arg & REG_MASK) FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); @@ -1586,14 +1801,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | DD4(TMP_FREG1) | DM4(src))); if (FAST_IS_REG(dst)) return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); @@ -1608,7 +1823,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (FAST_IS_REG(src)) FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); @@ -1617,14 +1832,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); } - FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_32) | DD4(dst_r) | DM4(TMP_FREG1))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1632,19 +1847,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_F32_OP) | DD4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | DD4(src1) | DM4(src2))); return push_inst32(compiler, VMRS); } @@ -1656,16 +1871,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw); src = dst_r; } @@ -1673,25 +1888,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_32; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -1708,36 +1923,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; } if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); } #undef FPU_LOAD @@ -1776,10 +1991,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x1; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x3; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x2; @@ -1805,15 +2030,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x6; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x7; @@ -1874,113 +2101,126 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #ifdef __SOFTFP__ -static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; - sljit_s32 word_arg_offset = 0; - sljit_s32 float_arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 float_arg_count = 0; sljit_s32 types = 0; - sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 src_offset = 4 * sizeof(sljit_sw); sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f32); - arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f64); - arg_count++; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); word_arg_offset += sizeof(sljit_sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } - if (stack_offset > 16) - FAIL_IF(push_inst16(compiler, SUB_SP | (((stack_offset - 16) + 0x7) & ~0x7) >> 2)); + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2))); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } SLJIT_ASSERT(reg_map[TMP_REG1] == 12); /* Process arguments in reversed direction. */ while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: float_arg_count--; - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); + + SLJIT_ASSERT((offset & 0x7) == 0); - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); *src = TMP_REG1; } - FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); } else - FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; - case SLJIT_ARG_TYPE_F64: - arg_count--; + case SLJIT_ARG_TYPE_F32: float_arg_count--; - stack_offset = offsets[arg_count]; - - SLJIT_ASSERT((stack_offset & 0x7) == 0); + offset = *(--offset_ptr); - if (stack_offset < 16) { - if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); *src = TMP_REG1; } - FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); } else - FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; default: - arg_count--; word_arg_offset -= sizeof(sljit_sw); - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT(stack_offset >= word_arg_offset); + SLJIT_ASSERT(offset >= word_arg_offset); - if (stack_offset != word_arg_offset) { - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); *src = TMP_REG1; } else if (src_offset == word_arg_offset) { - *src = 1 + (stack_offset >> 2); - src_offset = stack_offset; + *src = (sljit_s32)(1 + (offset >> 2)); + src_offset = offset; } - FAIL_IF(push_inst16(compiler, MOV | (stack_offset >> 2) | (word_arg_offset << 1))); + FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1))); } else - FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); } break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -1988,83 +2228,48 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_s32 stack_size = 0; - - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) - FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_size & 0x7) - stack_size += sizeof(sljit_sw); - stack_size += sizeof(sljit_f64); - break; - default: - stack_size += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - if (stack_size <= 16) - return SLJIT_SUCCESS; - - return push_inst16(compiler, ADD_SP | ((((stack_size - 16) + 0x7) & ~0x7) >> 2)); + return SLJIT_SUCCESS; } #else static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_u32 remap = 0; - sljit_u32 offset = 0; - sljit_u32 new_offset, mask; + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { - new_offset = 0; - mask = 1; - - while (remap & mask) { - new_offset++; - mask <<= 1; - } - remap |= mask; - + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | DD4((new_offset >> 1) + 1) - | ((new_offset & 0x1) ? 0x400000 : 0) | DM4((offset >> 1) + 1))); - - offset += 2; - } - else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { - new_offset = 0; - mask = 3; + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | DD4(new_offset) | DM4(offset))); - while (remap & mask) { - new_offset += 2; - mask <<= 2; + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(f32_offset) | DM4(offset))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(new_offset) | DM4(offset))); + f32_offset = new_offset; + new_offset++; } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_F32_OP | DD4((new_offset >> 1) + 1) | DM4((offset >> 1) + 1))); - - offset += 2; + offset++; + break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2077,13 +2282,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile { #ifdef __SOFTFP__ struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; #endif CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2093,9 +2303,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); return jump; #else + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2132,7 +2362,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; @@ -2143,16 +2373,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#ifdef __SOFTFP__ if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2161,8 +2404,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) + return push_inst16(compiler, BX | RN3(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); return softfloat_post_call_with_args(compiler, arg_types); #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2236,7 +2497,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; cc = get_cc(compiler, type & 0xff); @@ -2254,13 +2515,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); } - tmp = get_imm(srcw); + tmp = get_imm((sljit_uw)srcw); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); } - tmp = get_imm(~srcw); + tmp = get_imm(~(sljit_uw)srcw); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); @@ -2295,6 +2556,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -2329,7 +2591,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | memw); + return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2346,7 +2608,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value)); if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); @@ -2388,5 +2650,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c index a90345f1f8..1a06b17d12 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -73,50 +73,49 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: @@ -438,131 +437,120 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; sljit_s32 float_arg_count = 0; sljit_s32 word_arg_count = 0; sljit_s32 types = 0; - sljit_s32 arg_count_save, types_save; sljit_ins prev_ins = NOP; sljit_ins ins = NOP; sljit_u8 offsets[4]; + sljit_u8 *offsets_ptr = offsets; SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + + /* See ABI description in sljit_emit_enter. */ while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + *offsets_ptr = (sljit_u8)offset; - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) { + offset += sizeof(sljit_sw); + *offsets_ptr = (sljit_u8)offset; + } - if (word_arg_count == 0 && arg_count <= 1) - offsets[arg_count] = 254 + arg_count; + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - stack_offset += sizeof(sljit_f32); - arg_count++; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - - if (word_arg_count == 0 && arg_count <= 1) - offsets[arg_count] = 254 + arg_count; + case SLJIT_ARG_TYPE_F32: + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - stack_offset += sizeof(sljit_f64); - arg_count++; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + offset += sizeof(sljit_sw); word_arg_count++; break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + offsets_ptr++; } - /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + /* Stack is aligned to 16 bytes. */ + SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw)); - types_save = types; - arg_count_save = arg_count; + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + if (is_tail_call) { + offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf; + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins)); + *extra_space = offset; + } else { + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + *extra_space = 16; + } + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins)); + *extra_space = 0; + } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - if (offsets[arg_count] < 254) - ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); - float_arg_count--; - break; - case SLJIT_ARG_TYPE_F64: - arg_count--; - if (offsets[arg_count] < 254) - ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); - float_arg_count--; - break; - default: - if (offsets[arg_count - 1] >= 16) - ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]); - else if (arg_count != word_arg_count) - ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); - else if (arg_count == 1) - ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); + --offsets_ptr; - arg_count--; - word_arg_count--; - break; - } - - if (ins != NOP) { - if (prev_ins != NOP) - FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); - prev_ins = ins; - ins = NOP; - } + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); - types >>= SLJIT_DEF_SHIFT; - } + /* Must be preceded by at least one other argument, + * and its starting offset must be 8 because of alignment. */ + SLJIT_ASSERT((*offsets_ptr >> 2) == 2); - types = types_save; - arg_count = arg_count_save; + prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11); + ins = MFC1 | TA(7) | FS(float_arg_count); + } else if (*offsets_ptr < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); - while (types) { - switch (types & SLJIT_DEF_MASK) { + float_arg_count--; + break; case SLJIT_ARG_TYPE_F32: - arg_count--; - if (offsets[arg_count] == 254) + if (*offsets_ptr < 4 * sizeof (sljit_sw)) + ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count); + else if (*offsets_ptr < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); - else if (offsets[arg_count] < 16) - ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); - break; - case SLJIT_ARG_TYPE_F64: - arg_count--; - if (offsets[arg_count] == 254) - ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); - else if (offsets[arg_count] < 16) { - if (prev_ins != NOP) - FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); - prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); - ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw)); - } + + float_arg_count--; break; default: - arg_count--; + if (*offsets_ptr >= 4 * sizeof (sljit_sw)) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr); + else if ((*offsets_ptr >> 2) != word_arg_count - 1) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2)); + else if (*offsets_ptr == 0) + ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); + + word_arg_count--; break; } @@ -573,7 +561,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t ins = NOP; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } *ins_ptr = prev_ins; @@ -581,41 +569,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t return SLJIT_SUCCESS; } -static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) -{ - sljit_s32 stack_offset = 0; - - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_offset += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - stack_offset += sizeof(sljit_f64); - break; - default: - stack_offset += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ - if (stack_offset > 16) - return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP)); - - return SLJIT_SUCCESS; -} - SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; sljit_ins ins; CHECK_ERROR_PTR(); @@ -624,21 +582,34 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); - jump->flags |= IS_JAL | IS_CALL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) { + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); - PTR_FAIL_IF(post_call_with_args(compiler, arg_types)); + if (extra_space == 0) + return jump; + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP))); return jump; } @@ -646,6 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + sljit_u32 extra_space = (sljit_u32)type; sljit_ins ins; CHECK_ERROR(); @@ -662,10 +634,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); } - FAIL_IF(call_with_args(compiler, arg_types, &ins)); + FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); /* Register input. */ - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); - return post_call_with_args(compiler, arg_types); + + if (extra_space == 0) + return SLJIT_SUCCESS; + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c index 1f22e49ed9..c2b3d839c2 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -46,9 +46,9 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a } /* Zero extended number. */ - uimm = imm; + uimm = (sljit_uw)imm; if (imm < 0) { - uimm = ~imm; + uimm = ~(sljit_uw)imm; inv = 1; } @@ -119,7 +119,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a } #define SELECT_OP(a, b) \ - (!(op & SLJIT_I32_OP) ? a : b) + (!(op & SLJIT_32) ? a : b) #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ @@ -138,19 +138,19 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a #define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ if (flags & SRC2_IMM) { \ if (src2 >= 32) { \ - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \ + SLJIT_ASSERT(!(op & SLJIT_32)); \ ins = op_dimm32; \ src2 -= 32; \ } \ else \ - ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ + ins = (op & SLJIT_32) ? op_imm : op_dimm; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ - ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ + ins = (op & SLJIT_32) ? op_v : op_dv; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ @@ -165,50 +165,71 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U32: - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); - return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -234,7 +255,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); /* Check zero. */ FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); /* Loop for searching the highest bit. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); @@ -462,7 +483,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); return push_inst(compiler, MFLO | D(dst), DR(dst)); @@ -528,10 +549,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); - inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); @@ -539,7 +560,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) @@ -548,19 +569,19 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_s32 word_arg_count = 0; sljit_s32 float_arg_count = 0; sljit_s32 types = 0; - sljit_ins prev_ins = NOP; + sljit_ins prev_ins = *ins_ptr; sljit_ins ins = NOP; SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; float_arg_count++; break; @@ -570,24 +591,24 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (arg_count != float_arg_count) - ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); else if (arg_count == 1) - ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); arg_count--; float_arg_count--; break; - case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: if (arg_count != float_arg_count) - ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); + ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); else if (arg_count == 1) - ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); arg_count--; float_arg_count--; break; @@ -608,7 +629,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t ins = NOP; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } *ins_ptr = prev_ins; @@ -620,7 +641,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile sljit_s32 arg_types) { struct sljit_jump *jump; - sljit_ins ins; + sljit_ins ins = NOP; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -628,7 +649,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); @@ -636,8 +659,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); - jump->flags |= IS_JAL | IS_CALL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN)) { + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); @@ -648,7 +675,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { - sljit_ins ins; + sljit_ins ins = NOP; CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); @@ -664,9 +691,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); } + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + FAIL_IF(call_with_args(compiler, arg_types, &ins)); /* Register input. */ - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN)) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); return push_inst(compiler, ins, UNMOVABLE_INS); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c index fd747695a7..be5cb22a23 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -86,13 +86,13 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { - 0, 0, 14, 2, 4, 6, 8, 12, 10, 16 + 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16 }; #else static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { - 0, 0, 13, 14, 15, 16, 17, 12, 18, 10 + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 }; #endif @@ -101,23 +101,23 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define S(s) (reg_map[s] << 21) -#define T(t) (reg_map[t] << 16) -#define D(d) (reg_map[d] << 11) -#define FT(t) (freg_map[t] << 16) -#define FS(s) (freg_map[s] << 11) -#define FD(d) (freg_map[d] << 6) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define T(t) ((sljit_ins)reg_map[t] << 16) +#define D(d) ((sljit_ins)reg_map[d] << 11) +#define FT(t) ((sljit_ins)freg_map[t] << 16) +#define FS(s) ((sljit_ins)freg_map[s] << 11) +#define FD(d) ((sljit_ins)freg_map[d] << 6) /* Absolute registers. */ -#define SA(s) ((s) << 21) -#define TA(t) ((t) << 16) -#define DA(d) ((d) << 11) -#define IMM(imm) ((imm) & 0xffff) -#define SH_IMM(imm) ((imm) << 6) +#define SA(s) ((sljit_ins)(s) << 21) +#define TA(t) ((sljit_ins)(t) << 16) +#define DA(d) ((sljit_ins)(d) << 11) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define SH_IMM(imm) ((sljit_ins)(imm) << 6) #define DR(dr) (reg_map[dr]) #define FR(dr) (freg_map[dr]) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) (opcode) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) /* CMP.cond.fmt */ /* S = (20 << 21) D = (21 << 21) */ @@ -186,6 +186,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define DMULTU (HI(0) | LO(29)) #endif /* SLJIT_MIPS_REV >= 6 */ #define DIV_S (HI(17) | FMT_S | LO(3)) +#define DINSU (HI(31) | LO(6)) #define DSLL (HI(0) | LO(56)) #define DSLL32 (HI(0) | LO(60)) #define DSLLV (HI(0) | LO(20)) @@ -205,8 +206,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define JR (HI(0) | LO(8)) #endif /* SLJIT_MIPS_REV >= 6 */ #define LD (HI(55)) +#define LDC1 (HI(53)) #define LUI (HI(15)) #define LW (HI(35)) +#define LWC1 (HI(49)) #define MFC1 (HI(17)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) #define MOD (HI(0) | (3 << 6) | LO(26)) @@ -292,7 +295,8 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS - || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); + || (sljit_ins)delay_slot == ((ins >> 11) & 0x1f) + || (sljit_ins)delay_slot == ((ins >> 16) & 0x1f)); FAIL_IF(!ptr); *ptr = ins; compiler->size++; @@ -300,7 +304,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) +static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) { if (flags & IS_BIT26_COND) return (1 << 26); @@ -371,7 +375,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i inst[1] = NOP; return inst + 1; } - inst[0] = inst[0] ^ invert_branch(jump->flags); + inst[0] ^= invert_branch(jump->flags); inst[1] = NOP; jump->addr -= sizeof(sljit_ins); return inst + 1; @@ -379,7 +383,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } if (jump->flags & IS_COND) { - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; saved_inst = inst[0]; inst[0] = inst[-1]; @@ -388,7 +392,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i inst[2] = NOP; return inst + 2; } - else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + else if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = (inst[0] & 0xffff0000) | 3; inst[1] = NOP; @@ -400,7 +404,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } else { /* J instuctions. */ - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == (jump->addr & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = inst[-1]; inst[-1] = (jump->flags & IS_JAL) ? JAL : J; @@ -408,7 +412,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i return inst; } - if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = (jump->flags & IS_JAL) ? JAL : J; inst[1] = NOP; @@ -472,7 +476,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x80000000l); @@ -548,7 +552,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -584,7 +588,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -601,39 +605,46 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; + addr = (sljit_uw)((sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) - sizeof(sljit_ins)) >> 2); SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((sljit_ins)addr & 0xffff); break; } if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); - buf_ptr[0] |= (addr >> 2) & 0x03ffffff; + SLJIT_ASSERT((addr & ~(sljit_uw)0xfffffff) + == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)); + buf_ptr[0] |= (sljit_ins)(addr >> 2) & 0x03ffffff; break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; + break; } - else if (jump->flags & PATCH_ABS48) { + + if (jump->flags & PATCH_ABS48) { SLJIT_ASSERT(addr <= 0x7fffffffffffl); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); - } - else { - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[3] |= (sljit_ins)addr & 0xffff; + break; } + + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[5]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[5] |= (sljit_ins)addr & 0xffff; #endif } while (0); jump = jump->next; @@ -656,7 +667,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -673,7 +684,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { +#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE) sljit_sw fir = 0; +#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */ switch (feature_type) { case SLJIT_HAS_FPU: @@ -696,7 +709,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #endif /* SLJIT_MIPS_REV >= 1 */ default: - return fir; + return 0; } } @@ -723,15 +736,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define CUMULATIVE_OP 0x00080 #define LOGICAL_OP 0x00100 #define IMM_OP 0x00200 -#define SRC2_IMM 0x00400 +#define MOVE_OP 0x00400 +#define SRC2_IMM 0x00800 -#define UNUSED_DEST 0x00800 -#define REG_DEST 0x01000 -#define REG1_SOURCE 0x02000 -#define REG2_SOURCE 0x04000 -#define SLOW_SRC1 0x08000 -#define SLOW_SRC2 0x10000 -#define SLOW_DEST 0x20000 +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define STACK_STORE SW @@ -741,7 +755,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define STACK_LOAD LD #endif -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #include "sljitNativeMIPS_32.c" @@ -754,56 +769,195 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_ins base; - sljit_s32 args, i, tmp, offs; + sljit_s32 i, tmp, offset; + sljit_s32 arg_count, word_arg_count, saved_arg_count, float_arg_count; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - local_size = (local_size + 15) & ~0xf; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; #else - local_size = (local_size + 31) & ~0x1f; + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; #endif compiler->local_size = local_size; - if (local_size <= SIMM_MAX) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + tmp = arg_types >> SLJIT_ARG_SHIFT; + arg_count = 0; + offset = 0; + + while (tmp) { + offset = arg_count; + if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { + if ((arg_count & 0x1) != 0) + arg_count++; + arg_count++; + } + + arg_count++; + tmp >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = (sljit_uw)arg_count << 2; + offset = (offset >= 4) ? (offset << 2) : 0; +#else /* !SLJIT_CONFIG_MIPS_32 */ + offset = 0; +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (local_size + offset <= -SIMM_MIN) { /* Frequent case. */ FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); base = S(SLJIT_SP); - offs = local_size - (sljit_sw)sizeof(sljit_sw); - } - else { + offset = local_size - SSIZE_OF(sw); + } else { FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size)); FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); + offset = -SSIZE_OF(sw); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = 0; - offs = -(sljit_sw)sizeof(sljit_sw); +#endif } - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offset), MOVABLE_INS)); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offset), MOVABLE_INS)); } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offset), MOVABLE_INS)); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + arg_types >>= SLJIT_ARG_SHIFT; + arg_count = 0; + word_arg_count = 0; + saved_arg_count = 0; + float_arg_count = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* The first maximum two floating point arguments are passed in floating point + registers if no integer argument precedes them. The first 16 byte data is + passed in four integer registers, the rest is placed onto the stack. + The floating point registers are also part of the first 16 byte data, so + their corresponding integer registers are not used when they are present. */ + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if ((arg_count & 0x1) != 0) + arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) { + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count + 1 || arg_count == 0) + tmp = word_arg_count; + else + break; + + if (arg_count < 4) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp))); + else + FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp))); + break; + } + arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; } - args = get_arg_count(arg_types); + SLJIT_ASSERT(compiler->args_size == (sljit_uw)arg_count << 2); +#else /* !SLJIT_CONFIG_MIPS_32 */ + while (arg_types) { + arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count || word_arg_count <= 1) + tmp = word_arg_count; + else + break; - if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp))); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ return SLJIT_SUCCESS; } @@ -816,57 +970,110 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - compiler->local_size = (local_size + 15) & ~0xf; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; #else - compiler->local_size = (local_size + 31) & ~0x1f; + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr) { - sljit_s32 local_size, i, tmp, offs; - sljit_ins base; + sljit_s32 local_size, i, tmp, offset; + sljit_s32 scratches = compiler->scratches; + sljit_s32 saveds = compiler->saveds; + sljit_s32 fsaveds = compiler->fsaveds; + sljit_s32 fscratches = compiler->fscratches; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + local_size = compiler->local_size; - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((tmp & SSIZE_OF(sw)) != 0) + tmp += SSIZE_OF(sw); + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif - local_size = compiler->local_size; - if (local_size <= SIMM_MAX) - base = S(SLJIT_SP); - else { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); - base = S(TMP_REG1); - local_size = 0; + if (local_size <= SIMM_MAX) { + if (local_size < frame_size) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size), DR(SLJIT_SP))); + local_size = frame_size; + } + } else { + if (tmp < frame_size) + tmp = frame_size; + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + local_size = tmp; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_s32)sizeof(sljit_sw)), RETURN_ADDR_REG)); - offs = local_size - (sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + SLJIT_ASSERT(local_size >= frame_size); + + offset = local_size - SSIZE_OF(sw); + if (frame_size == 0) + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | TA(RETURN_ADDR_REG) | IMM(offset), RETURN_ADDR_REG)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); + } - tmp = compiler->scratches; - for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_s32)(sizeof(sljit_sw)); + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_s32)(sizeof(sljit_sw)); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); } - SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } - FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); - if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); + if (local_size > frame_size) + *ins_ptr = ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size); else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); + *ins_ptr = NOP; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + emit_stack_frame_release(compiler, 0, &ins); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); } #undef STACK_STORE @@ -1041,7 +1248,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { sljit_s32 tmp_ar, base, delay_slot; @@ -1104,14 +1311,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (dst == TMP_REG2) { SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if (flags & MOVE_OP) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) @@ -1165,8 +1372,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) - dst_r = src2_r; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; } else if (src2 & SLJIT_IMM) { if (!(flags & SRC2_IMM)) { @@ -1176,8 +1383,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) - dst_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } } } } @@ -1221,7 +1432,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - sljit_s32 int_op = op & SLJIT_I32_OP; + sljit_s32 int_op = op & SLJIT_32; #endif CHECK_ERROR(); @@ -1326,11 +1537,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#else sljit_s32 flags = 0; -#endif CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1338,58 +1545,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src, srcw); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) - flags |= INT_DATA | SIGNED_DATA; + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; #endif switch (GET_OPCODE(op)) { case SLJIT_MOV: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) case SLJIT_MOV_U32: -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); -#else - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); -#endif + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); -#else - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); } SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags -#endif } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, @@ -1397,23 +1596,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#else sljit_s32 flags = 0; -#endif CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) src1w = (sljit_s32)src1w; @@ -1425,12 +1617,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: @@ -1450,7 +1642,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile src2w &= 0x1f; #else if (src2 & SLJIT_IMM) { - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) src2w &= 0x1f; else src2w &= 0x3f; @@ -1461,10 +1653,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; +} -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; #endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, @@ -1512,7 +1714,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1524,17 +1726,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) -#define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8)) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) << (21 - 8)) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 +# define flags (sljit_u32)0 #else - sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; #endif if (src & SLJIT_MEM) { @@ -1560,9 +1762,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 +# define flags (sljit_u32)0 #else - sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; #endif sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1582,7 +1784,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); @@ -1640,11 +1842,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1669,8 +1871,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_F32_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst(compiler, CVT_S_S | (sljit_ins)((op & SLJIT_32) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_32; break; } @@ -1841,7 +2043,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile { struct sljit_jump *jump; sljit_ins inst; - sljit_s32 flags = 0; + sljit_u32 flags = 0; sljit_s32 delay_check = UNMOVABLE_INS; CHECK_ERROR_PTR(); @@ -1864,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: + case SLJIT_CARRY: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -1871,6 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: BR_NZ(OTHER_FLAG); break; case SLJIT_NOT_EQUAL_F64: @@ -1947,7 +2151,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler compiler->cache_arg = 0; compiler->cache_argw = 0; - flags = ((type & SLJIT_I32_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; if (src1 & SLJIT_MEM) { PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); src1 = TMP_REG1; @@ -2074,7 +2278,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; if (compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -2103,7 +2307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) sljit_s32 mem_type = WORD_DATA; #else - sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; #endif CHECK_ERROR(); @@ -2111,10 +2315,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op == SLJIT_MOV_S32) - mem_type = INT_DATA | SIGNED_DATA; -#endif dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); compiler->cache_arg = 0; @@ -2131,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_OVERFLOW: case SLJIT_NOT_OVERFLOW: - if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { src_ar = OTHER_FLAG; break; } @@ -2142,6 +2342,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_GREATER_F64: case SLJIT_LESS_EQUAL_F64: type ^= 0x1; /* Flip type bit for the XORI below. */ + /* fallthrough */ case SLJIT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64: case SLJIT_LESS_F64: @@ -2203,7 +2404,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (dst_reg & SLJIT_I32_OP) + if (dst_reg & SLJIT_32) srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); @@ -2211,7 +2412,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil srcw = 0; } - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; switch (type & 0xff) { case SLJIT_EQUAL: @@ -2298,7 +2499,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); #else - PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); compiler->size += 5; #endif diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c index 6ddb5508ec..95fe6bbe0e 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c @@ -86,11 +86,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, CNTLZW | S(src2) | A(dst)); @@ -158,7 +153,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); } if (flags & ALT_FORM4) { @@ -167,11 +164,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); @@ -277,5 +280,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c index cbdf2dd8a2..d104f6d75f 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c @@ -57,20 +57,20 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, } /* Count leading zeroes. */ - tmp = (imm >= 0) ? imm : ~imm; + tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); ASM_SLJIT_CLZ(tmp, shift); SLJIT_ASSERT(shift > 0); shift--; - tmp = (imm << shift); + tmp = ((sljit_uw)imm << shift); if ((tmp & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift += 15; return PUSH_RLDICR(reg, shift); } if ((tmp & ~0xffffffff00000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); shift += 31; return PUSH_RLDICR(reg, shift); @@ -78,18 +78,18 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, /* Cut out the 16 bit from immediate. */ shift += 15; - tmp2 = imm & ((1ul << (63 - shift)) - 1); + tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1); if (tmp2 <= 0xffff) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2); } if (tmp2 <= 0xffffffff) { FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; } @@ -97,16 +97,16 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, tmp2 <<= shift2; if ((tmp2 & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift2 += 15; shift += (63 - shift2); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48))); return PUSH_RLDICR(reg, shift2); } /* The general version. */ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); FAIL_IF(PUSH_RLDICR(reg, 31)); FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); @@ -199,19 +199,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl UN_EXTS(); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - - if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); - return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); - } - - UN_EXTS(); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); if (flags & ALT_FORM1) @@ -299,13 +286,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); - src1 = TMP_REG1; - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - src2 = TMP_REG2; + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } } + /* Setting XER SO is not enough, CR SO is also needed. */ - FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + if (flags & ALT_SIGN_EXT) return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); return SLJIT_SUCCESS; @@ -317,12 +313,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + BIN_EXTS(); if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: BIN_EXTS(); @@ -432,14 +434,14 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t if (src) reg = *src & REG_MASK; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; break; default: @@ -453,13 +455,13 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count--; break; default: @@ -471,7 +473,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -492,10 +494,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); - inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff); + inst[0] = (inst[0] & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000u) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); @@ -503,5 +505,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c index 2174dbb07b..8bfdc69522 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c @@ -109,32 +109,32 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 1, 2, 3, 4, 5, 6, 0, 7 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 0, 13 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_map[d] << 21) -#define S(s) (reg_map[s] << 21) -#define A(a) (reg_map[a] << 16) -#define B(b) (reg_map[b] << 11) -#define C(c) (reg_map[c] << 6) -#define FD(fd) (freg_map[fd] << 21) -#define FS(fs) (freg_map[fs] << 21) -#define FA(fa) (freg_map[fa] << 16) -#define FB(fb) (freg_map[fb] << 11) -#define FC(fc) (freg_map[fc] << 6) -#define IMM(imm) ((imm) & 0xffff) -#define CRD(d) ((d) << 21) +#define D(d) ((sljit_ins)reg_map[d] << 21) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define A(a) ((sljit_ins)reg_map[a] << 16) +#define B(b) ((sljit_ins)reg_map[b] << 11) +#define C(c) ((sljit_ins)reg_map[c] << 6) +#define FD(fd) ((sljit_ins)freg_map[fd] << 21) +#define FS(fs) ((sljit_ins)freg_map[fs] << 21) +#define FA(fa) ((sljit_ins)freg_map[fa] << 16) +#define FB(fb) ((sljit_ins)freg_map[fb] << 11) +#define FC(fc) ((sljit_ins)freg_map[fc] << 6) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define CRD(d) ((sljit_ins)(d) << 21) /* Instruction bit sections. OE and Rc flag (see ALT_SET_FLAGS). */ #define OE(flags) ((flags) & ALT_SET_FLAGS) /* Rc flag (see ALT_SET_FLAGS). */ #define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) ((opcode) << 1) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode) << 1) #define ADD (HI(31) | LO(266)) #define ADDC (HI(31) | LO(10)) @@ -182,6 +182,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FSUB (HI(63) | LO(20)) #define FSUBS (HI(59) | LO(20)) #define LD (HI(58) | 0) +#define LFD (HI(50)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) #define MFLR (HI(31) | LO(339) | 0x80000) @@ -215,6 +216,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) #define STFIWX (HI(31) | LO(983)) #define STW (HI(36)) #define STWU (HI(37)) @@ -232,15 +234,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define UIMM_MAX (0xffff) #define RLDI(dst, src, sh, mb, type) \ - (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + (HI(30) | S(src) | A(dst) | ((sljit_ins)(type) << 2) | (((sljit_ins)(sh) & 0x1f) << 11) \ + | (((sljit_ins)(sh) & 0x20) >> 4) | (((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20)) #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func) { - sljit_sw* ptrs; + sljit_uw* ptrs; + if (func_ptr) *func_ptr = (void*)context; - ptrs = (sljit_sw*)func; + + ptrs = (sljit_uw*)func; context->addr = addr ? addr : ptrs[0]; context->r2 = ptrs[1]; context->r11 = ptrs[2]; @@ -260,7 +265,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in { sljit_sw diff; sljit_uw target_addr; - sljit_sw extra_jump_flags; + sljit_uw extra_jump_flags; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) @@ -296,7 +301,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in } extra_jump_flags = REMOVE_COND; - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x01ffffff && diff >= -0x02000000) { @@ -349,7 +354,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x100000000l); @@ -433,7 +438,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -501,7 +506,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -511,7 +516,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!put_label); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); #else SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); #endif @@ -527,22 +532,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } else { SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); } } else { if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } else { SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); } } break; @@ -550,26 +555,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; break; } + if (jump->flags & PATCH_ABS48) { SLJIT_ASSERT(addr <= 0x7fffffffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[3] |= (sljit_ins)addr & 0xffff; break; } - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); + + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[4]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[4] |= (sljit_ins)addr & 0xffff; #endif } while (0); jump = jump->next; @@ -592,7 +603,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -601,7 +612,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (((sljit_sw)code_ptr) & 0x4) code_ptr++; #endif - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_uw)code, (void*)sljit_generate_code); #endif code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -696,69 +707,116 @@ ALT_FORM5 0x010000 */ #define STACK_LOAD LD #endif +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) +#define LR_SAVE_OFFSET 2 * SSIZE_OF(sw) +#else +#define LR_SAVE_OFFSET SSIZE_OF(sw) +#endif + +#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, offs; + sljit_s32 i, tmp, base, offset; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 arg_count = 0; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + FAIL_IF(push_inst(compiler, MFLR | D(0))); - offs = -(sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + base = SLJIT_SP; + offset = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#else + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#endif + } else { + base = TMP_REG1; + FAIL_IF(push_inst(compiler, OR | S(SLJIT_SP) | A(TMP_REG1) | B(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, TMP_REG2, -local_size)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#else + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#endif + local_size = 0; + offset = 0; } - for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); } - SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); -#else - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); -#endif + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); - FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } - args = get_arg_count(arg_types); + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); + } - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET))); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size; + arg_types >>= SLJIT_ARG_SHIFT; -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); - } + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + do { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (arg_count != word_arg_count) + tmp = SLJIT_R0 + word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count))); + } while (0); #else - if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); - } + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } #endif + word_arg_count++; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + arg_count++; +#endif + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -771,54 +829,74 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; - compiler->local_size = (local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 i, tmp, offs; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +{ + sljit_s32 i, tmp, base, offset; + sljit_s32 local_size = compiler->local_size; + + base = SLJIT_SP; + if (local_size > STACK_MAX_DISTANCE) { + base = TMP_REG1; + if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + local_size = STACK_MAX_DISTANCE; + } + } - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + offset = local_size; + FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET))); - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); } -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); -#else - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); -#endif + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } - offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); - tmp = compiler->scratches; - for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_s32)(sizeof(sljit_sw)); + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_s32)(sizeof(sljit_sw)); + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); } - FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); - SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); + push_inst(compiler, MTLR | S(0)); - FAIL_IF(push_inst(compiler, MTLR | S(0))); - FAIL_IF(push_inst(compiler, BLR)); + if (local_size > 0) + return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); - return SLJIT_SUCCESS; + SLJIT_ASSERT(base == TMP_REG1); + return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, BLR); } #undef STACK_STORE @@ -843,11 +921,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #define ARCH_32_64(a, b) a #define INST_CODE_AND_DST(inst, flags, reg) \ - ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) + ((sljit_ins)(inst) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #else #define ARCH_32_64(a, b) b #define INST_CODE_AND_DST(inst, flags, reg) \ - (((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) + (((sljit_ins)(inst) & ~(sljit_ins)INT_ALIGNED) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif static const sljit_ins data_transfer_insts[64 + 16] = { @@ -1000,7 +1078,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag if (argw != 0) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | ((sljit_ins)argw << 11) | ((31 - (sljit_ins)argw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1))); #endif @@ -1073,8 +1151,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); /* Destination check. */ - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = dst; + /* The REG_DEST is only used by SLJIT_MOV operations, although + * it is set for op2 operations with unset destination. */ flags |= REG_DEST; if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) @@ -1087,8 +1167,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG1_SOURCE; } else if (src1 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); @@ -1104,8 +1187,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = src2_r; } else if (src2 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; + src2_r = TMP_ZERO; + if (src2w != 0) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); @@ -1123,7 +1209,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_s32 int_op = op & SLJIT_I32_OP; + sljit_s32 int_op = op & SLJIT_32; #endif CHECK_ERROR(); @@ -1174,7 +1260,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { if (!(src & OFFS_REG_MASK)) { - if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED) + if (srcw == 0 && (src & REG_MASK)) return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); @@ -1188,7 +1274,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | ((sljit_ins)srcw << 11) | ((31 - (sljit_ins)srcw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1))); #endif @@ -1211,8 +1297,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src, srcw); op = GET_OPCODE(op); - if ((src & SLJIT_IMM) && srcw == 0) - src = TMP_ZERO; if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); @@ -1223,7 +1307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op_flags & SLJIT_I32_OP) { + if (op_flags & SLJIT_32) { if (op < SLJIT_NOT) { if (src & SLJIT_MEM) { if (op == SLJIT_MOV_S32) @@ -1245,11 +1329,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile switch (op) { case SLJIT_MOV: - case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif + case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1257,6 +1342,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); case SLJIT_MOV_S32: + case SLJIT_MOV32: return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); #endif @@ -1275,12 +1361,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); #else return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); #endif @@ -1306,7 +1389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #endif #define TEST_UH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) + (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000)) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_IMM(src, srcw) \ @@ -1327,13 +1410,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z | SLJIT_SET_CARRY)) + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY)) #define TEST_SUB_FORM2(op) \ ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z)) + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z)) #define TEST_SUB_FORM3(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) + || (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) #else #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) @@ -1351,21 +1434,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - if ((src1 & SLJIT_IMM) && src1w == 0) - src1 = TMP_ZERO; - if ((src2 & SLJIT_IMM) && src2w == 0) - src2 = TMP_ZERO; - #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { /* Most operations expect sign extended arguments. */ flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) @@ -1381,45 +1456,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffffffff; + compiler->imm = (sljit_ins)src2w & 0xffffffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_ADD_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffffffff; + compiler->imm = (sljit_ins)src1w & 0xffffffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if ((op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) { + if ((op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); @@ -1427,39 +1504,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile #endif if (HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { - if (dst == SLJIT_UNUSED) { + if (dst == TMP_REG2) { if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); } if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG2 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); @@ -1467,7 +1547,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (TEST_SUB_FORM2(op)) { if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); @@ -1477,45 +1557,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; + compiler->imm = (sljit_ins)(-src2w) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } if (!HAS_FLAGS(op)) { if (TEST_SH_IMM(src2, -src2w)) { - compiler->imm = ((-src2w) >> 16) & 0xffff; + compiler->imm = (sljit_ins)((-src2w) >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, -src2w)) { - compiler->imm = -src2w & 0xffffffff; + compiler->imm = (sljit_ins)-src2w; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } - /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ + /* We know ALT_SIGN_EXT is set if it is an SLJIT_32 on 64 bit systems. */ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) flags |= ALT_FORM2; #endif if (!HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } } @@ -1529,30 +1610,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile /* Commutative unsigned operations. */ if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UL_IMM(src1, src1w)) { - compiler->imm = src1w; + compiler->imm = (sljit_ins)src1w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_UH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } - if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) { - /* Unlike or and xor, and resets unwanted bits as well. */ + if (GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, the and resets unwanted bits as well. */ if (TEST_UI_IMM(src2, src2w)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UI_IMM(src1, src1w)) { - compiler->imm = src1w; + compiler->imm = (sljit_ins)src1w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } @@ -1562,11 +1643,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_LSHR: case SLJIT_ASHR: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) flags |= ALT_FORM2; #endif if (src2 & SLJIT_IMM) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); @@ -1575,6 +1656,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + #undef TEST_ADD_FORM1 #undef TEST_SUB_FORM2 #undef TEST_SUB_FORM3 @@ -1621,7 +1716,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1633,8 +1728,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) +#define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double)) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) @@ -1688,7 +1783,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp dstw &= 0x3; if (dstw) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | ((sljit_ins)dstw << 11) | ((31 - (sljit_ins)dstw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); #endif @@ -1745,7 +1840,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp if (dst & SLJIT_MEM) return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - if (op & SLJIT_F32_OP) + if (op & SLJIT_32) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; @@ -1755,7 +1850,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 invert_sign = 1; if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); src = TMP_REG1; invert_sign = 0; } @@ -1783,7 +1878,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp if (dst & SLJIT_MEM) return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - if (op & SLJIT_F32_OP) + if (op & SLJIT_32) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; @@ -1815,11 +1910,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1830,8 +1925,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_CONV_F64_FROM_F32: - op ^= SLJIT_F32_OP; - if (op & SLJIT_F32_OP) { + op ^= SLJIT_32; + if (op & SLJIT_32) { FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); break; } @@ -1946,12 +2041,22 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_bo_bi_flags(sljit_s32 type) +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (4 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_EQUAL: return (12 << 21) | (2 << 16); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (12 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_NOT_EQUAL: return (4 << 21) | (2 << 16); @@ -2015,15 +2120,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - bo_bi_flags = get_bo_bi_flags(type & 0xff); + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); if (!bo_bi_flags) return NULL; jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); type &= 0xff; + if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY) + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + /* In PPC, we don't need to touch the arguments. */ if (type < SLJIT_JUMP) jump->flags |= IS_COND; @@ -2049,6 +2157,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); #endif + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2068,25 +2181,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (FAST_IS_REG(src)) { #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL) { + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); src_r = TMP_CALL_REG; } else src_r = src; -#else +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ src_r = src; -#endif +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ } else if (src & SLJIT_IMM) { /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; + #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) if (type >= SLJIT_CALL) jump->flags |= IS_CALL; -#endif +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); src_r = TMP_CALL_REG; } @@ -2108,13 +2223,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); src = TMP_CALL_REG; } + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) FAIL_IF(call_with_args(compiler, arg_types, &src)); #endif @@ -2130,20 +2255,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 reg, input_flags, cr_bit, invert; + sljit_s32 reg, invert; + sljit_u32 bit, from_xer; sljit_s32 saved_op = op; sljit_sw saved_dstw = dstw; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 input_flags = ((op & SLJIT_32) || op == SLJIT_MOV32) ? INT_DATA : WORD_DATA; +#else + sljit_s32 input_flags = WORD_DATA; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; -#else - input_flags = WORD_DATA; -#endif - op = GET_OPCODE(op); reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; @@ -2151,7 +2276,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); invert = 0; - cr_bit = 0; + bit = 0; + from_xer = 0; switch (type & 0xff) { case SLJIT_LESS: @@ -2165,66 +2291,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_GREATER: case SLJIT_SIG_GREATER: - cr_bit = 1; + bit = 1; break; case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: - cr_bit = 1; + bit = 1; invert = 1; break; case SLJIT_EQUAL: - cr_bit = 2; + bit = 2; break; case SLJIT_NOT_EQUAL: - cr_bit = 2; + bit = 2; invert = 1; break; case SLJIT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; break; case SLJIT_NOT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; invert = 1; break; + case SLJIT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0; + break; + + case SLJIT_NOT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; + break; + case SLJIT_LESS_F64: - cr_bit = 4 + 0; + bit = 4 + 0; break; case SLJIT_GREATER_EQUAL_F64: - cr_bit = 4 + 0; + bit = 4 + 0; invert = 1; break; case SLJIT_GREATER_F64: - cr_bit = 4 + 1; + bit = 4 + 1; break; case SLJIT_LESS_EQUAL_F64: - cr_bit = 4 + 1; + bit = 4 + 1; invert = 1; break; case SLJIT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; break; case SLJIT_NOT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; invert = 1; break; case SLJIT_UNORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; break; case SLJIT_ORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; invert = 1; break; @@ -2233,8 +2373,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; } - FAIL_IF(push_inst(compiler, MFCR | D(reg))); - FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1))); + FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg))); + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1))); if (invert) FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); @@ -2283,19 +2423,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif mem_flags = WORD_DATA; break; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) case SLJIT_MOV_U32: + case SLJIT_MOV32: mem_flags = INT_DATA; break; case SLJIT_MOV_S32: mem_flags = INT_DATA; - if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) { + if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_32)) { if (mem & OFFS_REG_MASK) mem_flags |= SIGNED_DATA; else @@ -2436,7 +2578,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); #else - PTR_FAIL_IF(push_inst(compiler, dst_r)); + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); compiler->size += 4; #endif diff --git a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c index 716491ec72..8eef910c42 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -44,6 +44,9 @@ typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; @@ -97,20 +100,37 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac * link register doesn't need to change */ +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ static const sljit_gpr link_r = 14; /* r14 */ static const sljit_gpr fast_link_r = 0; /* r0 */ -/* Flag register layout: +#define TMP_FREG1 (0) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) - 0 32 33 34 36 64 - +---------------+---+---+-------+-------+ - | ZERO | 0 | 0 | C C |///////| - +---------------+---+---+-------+-------+ -*/ -static const sljit_gpr flag_r = 14; /* r14 */ +#define R0(r) ((sljit_ins)reg_map[r]) + +#define F0(r) ((sljit_ins)freg_map[r]) +#define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F36(r) (R36A((sljit_ins)freg_map[r])) struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -124,19 +144,25 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) return reg_map[r]; } +static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0]))); + return freg_map[r]; +} + /* Size of instruction in bytes. Tags must already be cleared. */ static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) { /* keep faulting instructions */ if (ins == 0) - return 2; + return 2; if ((ins & 0x00000000ffffL) == ins) - return 2; + return 2; if ((ins & 0x0000ffffffffL) == ins) - return 4; + return 4; if ((ins & 0xffffffffffffL) == ins) - return 6; + return 6; SLJIT_UNREACHABLE(); return (sljit_uw)-1; @@ -172,7 +198,8 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) } #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ - (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) /* Map the given type to a 4-bit condition code mask. */ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { @@ -191,6 +218,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc0 | cc3); return (cc0 | cc2); } + /* fallthrough */ case SLJIT_EQUAL_F64: return cc0; @@ -204,6 +232,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc1 | cc2); return (cc1 | cc3); } + /* fallthrough */ case SLJIT_NOT_EQUAL_F64: return (cc1 | cc2 | cc3); @@ -228,10 +257,20 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_LESS_F64: return cc1; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: return (cc0 | cc1); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + case SLJIT_SIG_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -242,6 +281,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc2 | cc3); + /* fallthrough */ case SLJIT_UNORDERED_F64: return cc3; @@ -249,6 +289,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_NOT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc0 | cc1); + /* fallthrough */ case SLJIT_ORDERED_F64: return (cc0 | cc1 | cc2); @@ -444,7 +485,7 @@ SLJIT_S390X_RR(or, 0x1600) #define SLJIT_S390X_RRE(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ { \ - return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ + return (pattern) | R4A(dst) | R0A(src); \ } /* AND */ @@ -504,7 +545,7 @@ SLJIT_S390X_RRE(sgr, 0xb9090000) #define SLJIT_S390X_RIA(name, pattern, imm_type) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ - return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ } /* ADD HALFWORD IMMEDIATE */ @@ -534,7 +575,7 @@ SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ SLJIT_ASSERT(have_eimm()); \ - return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ } /* ADD IMMEDIATE */ @@ -567,17 +608,11 @@ SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) /* RX-a form instructions */ #define SLJIT_S390X_RXA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT((d & 0xfff) == d); \ - ri = (sljit_ins)(r & 0xf) << 20; \ - xi = (sljit_ins)(x & 0xf) << 16; \ - bi = (sljit_ins)(b & 0xf) << 12; \ - di = (sljit_ins)(d & 0xfff); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ } /* LOAD */ @@ -607,15 +642,9 @@ SLJIT_S390X_RXA(sth, 0x40000000) #define SLJIT_S390X_RXYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT(cond); \ - ri = (sljit_ins)(r & 0xf) << 36; \ - xi = (sljit_ins)(x & 0xf) << 32; \ - bi = (sljit_ins)(b & 0xf) << 28; \ - di = disp_s20(d); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \ } /* LOAD */ @@ -660,17 +689,11 @@ SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) /* RSY-a instructions */ #define SLJIT_S390X_RSYA(name, pattern, cond) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \ { \ - sljit_ins r1, r3, b2, d2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r3 = (sljit_ins)(src & 0xf) << 32; \ - b2 = (sljit_ins)(b & 0xf) << 28; \ - d2 = disp_s20(d); \ \ - return (pattern) | r1 | r3 | b2 | d2; \ + return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \ } /* LOAD MULTIPLE */ @@ -691,16 +714,14 @@ SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) #define SLJIT_S390X_RIEF(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ { \ - sljit_ins r1, r2, i3, i4, i5; \ + sljit_ins i3, i4, i5; \ \ SLJIT_ASSERT(have_genext()); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r2 = (sljit_ins)(src & 0xf) << 32; \ i3 = (sljit_ins)start << 24; \ i4 = (sljit_ins)end << 16; \ i5 = (sljit_ins)rot << 8; \ \ - return (pattern) | r1 | r2 | i3 | i4 | i5; \ + return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \ } /* ROTATE THEN AND SELECTED BITS */ @@ -728,14 +749,12 @@ SLJIT_S390X_RIEF(risbhg, 0xec000000005d) #define SLJIT_S390X_RRFC(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ { \ - sljit_ins r1, r2, m3; \ + sljit_ins m3; \ \ SLJIT_ASSERT(have_lscond1()); \ - r1 = (sljit_ins)(dst & 0xf) << 4; \ - r2 = (sljit_ins)(src & 0xf); \ m3 = (sljit_ins)(mask & 0xf) << 12; \ \ - return (pattern) | m3 | r1 | r2; \ + return (pattern) | m3 | R4A(dst) | R0A(src); \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -748,14 +767,13 @@ SLJIT_S390X_RRFC(locgr, 0xb9e20000) #define SLJIT_S390X_RIEG(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ { \ - sljit_ins r1, m3, i2; \ + sljit_ins m3, i2; \ \ SLJIT_ASSERT(have_lscond2()); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ m3 = (sljit_ins)(mask & 0xf) << 32; \ i2 = (sljit_ins)(imm & 0xffffL) << 16; \ \ - return (pattern) | r1 | m3 | i2; \ + return (pattern) | R36A(reg) | m3 | i2; \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -767,13 +785,9 @@ SLJIT_S390X_RIEG(locghi, 0xec0000000046) #define SLJIT_S390X_RILB(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ { \ - sljit_ins r1, ri2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ - ri2 = (sljit_ins)(ri & 0xffffffff); \ \ - return (pattern) | r1 | ri2; \ + return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \ } /* BRANCH RELATIVE AND SAVE LONG */ @@ -808,22 +822,20 @@ SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) { - sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8; - sljit_ins r2 = ((sljit_ins)src & 0xf); SLJIT_ASSERT(have_eimm()); - return 0xb9830000 | r1 | r2; + return 0xb9830000 | R8A(dst) | R0A(src); } /* INSERT PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) { - return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); + return 0xb2220000 | R4A(dst); } /* SET PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) { - return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); + return 0x0400 | R4A(dst); } /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ @@ -842,12 +854,12 @@ static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 1 (non-zero and no overflow) : unchanged 2 (zero and overflow) : decreased by 1 3 (non-zero and overflow) : decreased by 1 if non-zero */ - FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); - FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); - FAIL_IF(push_inst(compiler, spm(flag_r))); + FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(tmp1))); return SLJIT_SUCCESS; } @@ -858,16 +870,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s16(v)) return push_inst(compiler, lghi(target, (sljit_s16)v)); - if ((sljit_uw)v == (v & 0x000000000000ffffU)) + if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0) return push_inst(compiler, llill(target, (sljit_u16)v)); - if ((sljit_uw)v == (v & 0x00000000ffff0000U)) + if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0) return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); - if ((sljit_uw)v == (v & 0x0000ffff00000000U)) + if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0) return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); - if ((sljit_uw)v == (v & 0xffff000000000000U)) + if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); /* 6 byte instructions (requires extended immediate facility) */ @@ -875,15 +887,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s32(v)) return push_inst(compiler, lgfi(target, (sljit_s32)v)); - if ((sljit_uw)v == (v & 0x00000000ffffffffU)) + if (((sljit_uw)v >> 32) == 0) return push_inst(compiler, llilf(target, (sljit_u32)v)); - if ((sljit_uw)v == (v & 0xffffffff00000000U)) - return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32))); + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); } + /* TODO(mundaym): instruction sequences that don't use extended immediates */ abort(); } @@ -891,7 +904,7 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t struct addr { sljit_gpr base; sljit_gpr index; - sljit_sw offset; + sljit_s32 offset; }; /* transform memory operand into D(X,B) form with a signed 20-bit offset */ @@ -911,7 +924,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -923,7 +936,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -944,7 +957,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -956,7 +969,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -1014,16 +1027,16 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler, sljit_gpr dst_r, sljit_s32 src, sljit_sw srcw) { - SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); if (src & SLJIT_IMM) return push_load_imm_inst(compiler, dst_r, srcw); if (src & SLJIT_MEM) - return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); sljit_gpr src_r = gpr(src & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); } static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, @@ -1035,8 +1048,8 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr src_r = tmp1; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1050,17 +1063,32 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (FAST_IS_REG(src2)) - src_r = gpr(src2 & REG_MASK); + src_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r))); if (needs_move != 2) return SLJIT_SUCCESS; dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); } static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, @@ -1068,21 +1096,21 @@ static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src1_r = tmp0; sljit_gpr src2_r = tmp1; if (FAST_IS_REG(src1)) - src1_r = gpr(src1 & REG_MASK); + src1_r = gpr(src1); else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (FAST_IS_REG(src2)) - src2_r = gpr(src2 & REG_MASK); + src2_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); + return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r)); } typedef enum { @@ -1099,8 +1127,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1110,8 +1138,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == RIL_A) - return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); - return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); + return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff)); } static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, @@ -1119,15 +1147,15 @@ static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; sljit_gpr src_r = tmp0; - if (!SLOW_IS_REG(src1)) + if (!FAST_IS_REG(src1)) FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); else src_r = gpr(src1 & REG_MASK); - return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16); } typedef enum { @@ -1147,7 +1175,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, SLJIT_ASSERT(src2 & SLJIT_MEM); - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = gpr(dst); if (dst == src1) @@ -1183,9 +1211,9 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, } if (type == RX_A) - ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w; else - ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w); FAIL_IF(push_inst(compiler, ins)); @@ -1193,7 +1221,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, return SLJIT_SUCCESS; dst_r = gpr(dst); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); } static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, @@ -1226,7 +1254,7 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, else dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); + return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw)); } struct ins_forms { @@ -1240,7 +1268,7 @@ struct ins_forms { }; static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1250,7 +1278,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct if ((src1 | src2) & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1297,7 +1325,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } } - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins = forms->op_r; ins_k = forms->op_rk; } @@ -1308,7 +1336,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct SLJIT_ASSERT(ins != 0 || ins_k != 0); - if (ins && SLOW_IS_REG(dst)) { + if (ins && FAST_IS_REG(dst)) { if (dst == src1) return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); @@ -1323,7 +1351,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1333,7 +1361,7 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str if (src2 & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1354,10 +1382,10 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); } - ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk; - if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) - return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + if (ins == 0 || (FAST_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); } @@ -1376,9 +1404,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_memory_fragment *buf; void *code, *code_ptr; sljit_uw *pool, *pool_ptr; - - sljit_uw source; - sljit_sw offset; /* TODO(carenas): only need 32 bit */ + sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); @@ -1489,38 +1515,41 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil ins &= ~sljit_ins_const; /* update instruction with relative address of constant */ - source = (sljit_uw)code_ptr; - offset = (sljit_uw)pool_ptr - source; + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; /* halfword (not byte) offset */ SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; /* update address */ const_->const_.addr = (sljit_uw)pool_ptr; /* store initial value into pool and update pool address */ - *(pool_ptr++) = const_->init_value; + *(pool_ptr++) = (sljit_uw)const_->init_value; /* move to next constant */ const_ = (struct sljit_s390x_const *)const_->const_.next; } if (jump && jump->addr == j) { - sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { jump->addr = (sljit_uw)pool_ptr; /* load address into tmp1 */ - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; SLJIT_ASSERT(is_s32(offset)); - encode_inst(&code_ptr, - lgrl(tmp1, offset & 0xffffffff)); + + encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); /* store jump target into pool and update pool address */ - *(pool_ptr++) = target; + *(pool_ptr++) = (sljit_uw)target; /* branch to tmp1 */ sljit_ins op = (ins >> 32) & 0xf; @@ -1538,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { jump->addr = (sljit_uw)code_ptr + 2; - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); offset = target - source; /* offset must be halfword aligned */ @@ -1552,14 +1581,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = jump->next; } if (put_label && put_label->addr == j) { - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_ASSERT(put_label->label); put_label->addr = (sljit_uw)code_ptr; /* store target into pool */ *pool_ptr = put_label->label->addr; - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; pool_ptr++; SLJIT_ASSERT(!(offset & 1)); @@ -1594,7 +1623,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CMOV: return have_lscond1() ? 1 : 0; case SLJIT_HAS_FPU: - return 0; + return 1; } return 0; } @@ -1607,36 +1636,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args = get_arg_count(arg_types); - sljit_sw frame_size; + sljit_s32 word_arg_count = 0; + sljit_s32 offset, i, tmp; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* saved registers go in callee allocated save area */ - compiler->local_size = (local_size + 0xf) & ~0xf; - frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE; + /* Saved registers are stored in callee allocated save area. */ + SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); - FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */ - if (frame_size != 0) { - if (is_s16(-frame_size)) - FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size)))); - else if (is_s32(-frame_size)) - FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size)))); - else { - FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); } + + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); } - if (args >= 1) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0)))); - if (args >= 2) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1)))); - if (args >= 3) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2)))); - SLJIT_ASSERT(args < 4); + local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + + arg_types >>= SLJIT_ARG_SHIFT; + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count)))); + tmp++; + } + word_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -1649,37 +1709,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* TODO(mundaym): stack space for saved floating point registers */ - compiler->local_size = (local_size + 0xf) & ~0xf; + compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_sw size; - sljit_gpr end; + sljit_s32 offset, i, tmp; + sljit_s32 local_size = compiler->local_size; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + if (is_u12(local_size)) + FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); + else + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } - size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw)); - if (!is_s20(size)) { - FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); - size = r6 * sizeof(sljit_sw); - end = r14; /* r15 has been restored already */ + if (saveds == 0) { + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } } - else - end = r15; - FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */ - FAIL_IF(push_inst(compiler, br(r14))); /* return */ + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, br(r14)); /* return */ +} + /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ @@ -1692,7 +1782,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); - op = GET_OPCODE(op) | (op & SLJIT_I32_OP); + op = GET_OPCODE(op) | (op & SLJIT_32); switch (op) { case SLJIT_BREAKPOINT: /* The following invalid instruction is emitted by gdb. */ @@ -1786,17 +1876,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) { - /* TODO(carenas): implement prefetch? */ - return SLJIT_SUCCESS; - } - if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { /* LOAD REGISTER */ if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { dst_r = gpr(dst); src_r = gpr(src); - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { /* 32-bit */ case SLJIT_MOV32_U8: ins = llcr(dst_r, src_r); @@ -1811,6 +1896,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lhr(dst_r, src_r); break; case SLJIT_MOV32: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lr(dst_r, src_r); break; /* 64-bit */ @@ -1834,11 +1921,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV: case SLJIT_MOV_P: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lgr(dst_r, src_r); break; default: ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1862,6 +1952,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_sw)((sljit_u32)(srcw)); break; case SLJIT_MOV_S32: + case SLJIT_MOV32: srcw = (sljit_sw)((sljit_s32)(srcw)); break; } @@ -1875,7 +1966,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); /* TODO(carenas): convert all calls below to LEVAL */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_MOV32_U8: ins = llc(reg, mem.offset, mem.index, mem.base); break; @@ -1914,7 +2005,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lg(reg, mem.offset, mem.index, mem.base); break; default: + ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1940,6 +2033,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile WHEN2(is_u12(mem.offset), sth, sthy)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: return push_inst(compiler, WHEN2(is_u12(mem.offset), st, sty)); case SLJIT_MOV_P: @@ -1972,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile EVAL(sthy, tmp0, mem)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: FAIL_IF(push_inst(compiler, EVAL(ly, tmp0, mem))); FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); @@ -1994,15 +2089,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ - dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; + dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32)); compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); /* TODO(mundaym): optimize loads and stores */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_NOT: /* emulate ~x with x^-1 */ FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); @@ -2014,7 +2109,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT32: /* emulate ~x with x^-1 */ if (have_eimm()) - FAIL_IF(push_inst(compiler, xilf(dst_r, -1))); + FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff))); else { FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); if (src_r != dst_r) @@ -2023,14 +2118,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); } break; - case SLJIT_NEG: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); - break; - case SLJIT_NEG32: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); - break; case SLJIT_CLZ: if (have_eimm()) { FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ @@ -2059,8 +2146,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(update_zero_overflow(compiler, op, dst_r)); /* TODO(carenas): doesn't need FAIL_IF */ - if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + if (dst & SLJIT_MEM) + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2084,20 +2171,6 @@ static SLJIT_INLINE int is_shift(sljit_s32 op) { return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; } -static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) -{ - switch (GET_FLAG_TYPE(op)) { - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - return 1; - } - return 0; -} - static const struct ins_forms add_forms = { 0x1a00, /* ar */ 0xb9080000, /* agr */ @@ -2131,24 +2204,24 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, if (src2 & SLJIT_IMM) { if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, src2w); } if (is_s16(src2w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); goto done; } if (!sets_overflow) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2157,22 +2230,22 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } } forms = sets_overflow ? &add_forms : &logical_add_forms; - FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_zero_overflow) - FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2202,78 +2275,85 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - int sets_signed = sets_signed_flag(op); + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); const struct ins_forms *forms; sljit_ins ins; - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; if (src2 & SLJIT_IMM) { if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) { - if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } if (is_s16(src2w)) - return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); } } else if (src2 & SLJIT_MEM) { - if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; else - ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; else - ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); } + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + if (src2 & SLJIT_IMM) { sljit_sw neg_src2w = -src2w; if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, neg_src2w); } if (is_s16(neg_src2w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); goto done; } } if (!sets_signed) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2282,19 +2362,19 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(neg_src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); goto done; } } forms = sets_signed ? &sub_forms : &logical_sub_forms; - FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_signed) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { /* In case of overflow, the sign bit of the two source operands must be different, and @@ -2303,14 +2383,14 @@ done: The -result operation sets the corrent sign, because the result cannot be zero. The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } else if (op & SLJIT_SET_Z) FAIL_IF(update_zero_overflow(compiler, op, dst_r)); } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2336,7 +2416,7 @@ static const struct ins_forms multiply_overflow_forms = { }; static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -2351,29 +2431,29 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 } FAIL_IF(push_inst(compiler, aih(tmp0, 1))); FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */ - return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); } if (src2 & SLJIT_IMM) { if (is_s16(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); } if (is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); } } - return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_uw imm, sljit_s32 count16) { @@ -2381,7 +2461,7 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { + if (IS_GPR_REG(dst)) { dst_r = gpr(dst & REG_MASK); if (dst == src1) needs_move = 0; @@ -2391,38 +2471,38 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == SLJIT_AND) { - if (!(mode & SLJIT_I32_OP)) - FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + if (!(mode & SLJIT_32)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff)); } else if (type == SLJIT_OR) { if (count16 >= 3) { - FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if (count16 >= 2) { if ((imm & 0x00000000ffffffffull) == 0) - return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)); if ((imm & 0xffffffff00000000ull) == 0) - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if ((imm & 0xffff000000000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48))); if ((imm & 0x0000ffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff))); if ((imm & 0x00000000ffff0000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff))); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff)); return SLJIT_SUCCESS; } if ((imm & 0xffffffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32))); if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) - return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff)); return SLJIT_SUCCESS; } @@ -2457,18 +2537,18 @@ static const struct ins_forms bitwise_xor_forms = { }; static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) imm &= 0xffffffffull; if ((imm & 0x000000000000ffffull) != 0 || imm == 0) @@ -2480,7 +2560,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o if ((imm & 0xffff000000000000ull) != 0) count16++; - if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { sljit_gpr src_r = tmp0; if (FAST_IS_REG(src1)) @@ -2489,16 +2569,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm); if ((imm & 0x00000000ffff0000ull) != 0) - return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16)); if ((imm & 0x0000ffff00000000ull) != 0) - return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); - return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); + return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48)); } if (!(op & SLJIT_SET_Z)) - return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); + return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16); } if (type == SLJIT_AND) @@ -2508,16 +2588,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o else forms = &bitwise_xor_forms; - return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src_r = tmp0; sljit_gpr base_r = tmp0; sljit_ins imm = 0; @@ -2529,7 +2609,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (src2 & SLJIT_IMM) - imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); else if (FAST_IS_REG(src2)) base_r = gpr(src2 & REG_MASK); else { @@ -2537,7 +2617,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, base_r = tmp1; } - if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if ((op & SLJIT_32) && dst_r == src_r) { if (type == SLJIT_SHL) ins = 0x89000000 /* sll */; else if (type == SLJIT_LSHR) @@ -2545,21 +2625,21 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, else ins = 0x8a000000 /* sra */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm)); } else { if (type == SLJIT_SHL) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; else if (type == SLJIT_LSHR) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; else - ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16))); } if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) - return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); return SLJIT_SUCCESS; } @@ -2590,20 +2670,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - compiler->mode = op & SLJIT_I32_OP; + compiler->mode = op & SLJIT_32; compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); - if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { src1 ^= src2; src2 ^= src1; @@ -2616,39 +2690,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_MUL: - FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: - FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_SHL: case SLJIT_LSHR: case SLJIT_ASHR: - FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w)); break; } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) @@ -2686,17 +2778,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return gpr(reg); + return (sljit_s32)gpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - abort(); + return (sljit_s32)fgpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { sljit_ins ins = 0; @@ -2711,21 +2803,254 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ +#define FLOAT_LOAD 0 +#define FLOAT_STORE 1 + +static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */; + else + ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */; + + return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */; + else + ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */; + + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins, + sljit_s32 reg, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + + if (!(src & SLJIT_MEM)) + return push_inst(compiler, ins_r | F4(reg) | F0(src)); + + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_ins ins; + + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + /* M3 is set to 5 */ + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */; + else + ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */; + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src))); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins ins; + + if (src & SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src = (sljit_s32)tmp0; + } + else if (src & SLJIT_MEM) { + FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32)); + src = (sljit_s32)tmp0; + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; + else + ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins_r, ins; + + if (src1 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (op & SLJIT_32) { + ins_r = 0xb3090000 /* cebr */; + ins = 0xed0000000009 /* ceb */; + } else { + ins_r = 0xb3190000 /* cdbr */; + ins = 0xed0000000019 /* cdb */; + } + + return emit_float(compiler, ins_r, ins, src1, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { + sljit_s32 dst_r; + sljit_ins ins; + CHECK_ERROR(); - abort(); + + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (op == SLJIT_CONV_F64_FROM_F32) + FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw)); + else { + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (FAST_IS_REG(dst)) { + if (dst == src) + return SLJIT_SUCCESS; + + ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + break; + } + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw); + case SLJIT_CONV_F64_FROM_F32: + /* Only SLJIT_CONV_F32_FROM_F64. */ + ins = 0xb3440000 /* ledbr */; + break; + case SLJIT_NEG_F64: + ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64); + ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */; + break; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(dst_r == TMP_FREG1); + + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); } +#define FLOAT_MOV(op, dst_r, src_r) \ + (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r)) + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_s32 dst_r = TMP_FREG1; + sljit_ins ins_r, ins; + CHECK_ERROR(); - abort(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + do { + if (FAST_IS_REG(dst)) { + dst_r = dst; + + if (dst == src1) + break; + + if (dst == src2) { + if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) { + src2 = src1; + src2w = src1w; + src1 = dst; + break; + } + + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2))); + src2 = TMP_FREG1; + } + } + + if (src1 & SLJIT_MEM) + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1))); + } while (0); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */; + ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */; + break; + case SLJIT_SUB_F64: + ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */; + ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */; + break; + case SLJIT_MUL_F64: + ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */; + ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64); + ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */; + ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */; + break; + } + + FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w)); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + SLJIT_ASSERT(dst_r != TMP_FREG1); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ @@ -2795,6 +3120,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2809,14 +3139,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); if (src & SLJIT_IMM) { SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } - else if (src & SLJIT_MEM) + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + } /* emit jump instruction */ if (type >= SLJIT_FAST_CALL) @@ -2832,6 +3163,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + SLJIT_ASSERT(gpr(TMP_REG2) == tmp1); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2859,11 +3208,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* dst is also source operand */ if (dst & SLJIT_MEM) - FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); break; + case SLJIT_MOV32: + op |= SLJIT_32; + /* fallthrough */ case SLJIT_MOV: - case (SLJIT_MOV32 & ~SLJIT_I32_OP): /* can write straight into destination */ loc_r = dst_r; break; @@ -2876,7 +3227,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (have_lscond2()) { FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lochi, locghi))); + WHEN2(op & SLJIT_32, lochi, locghi))); } else { /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ abort(); @@ -2888,22 +3239,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #define LEVAL(i) i(dst_r, loc_r) case SLJIT_AND: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, nr, ngr))); + WHEN2(op & SLJIT_32, nr, ngr))); break; case SLJIT_OR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, or, ogr))); + WHEN2(op & SLJIT_32, or, ogr))); break; case SLJIT_XOR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, xr, xgr))); + WHEN2(op & SLJIT_32, xr, xgr))); break; #undef LEVAL } /* store result to memory if required */ if (dst & SLJIT_MEM) - return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2913,7 +3264,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw) { sljit_u8 mask = get_cc(compiler, type & 0xff); - sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); + sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; CHECK_ERROR(); @@ -2927,7 +3278,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #define LEVAL(i) i(dst_r, src_r, mask) if (have_lscond1()) return push_inst(compiler, - WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr)); + WHEN2(dst_reg & SLJIT_32, locr, locgr)); #undef LEVAL @@ -2991,7 +3342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c index 28886405af..218992b355 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c @@ -35,16 +35,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); @@ -59,8 +56,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -70,13 +66,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags)); case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -89,22 +84,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl /* Loop. */ FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS)); return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_ADDC: - return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUBC: - return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_MUL: compiler->status_flags_state = 0; @@ -116,13 +113,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: - return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_OR: - return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_XOR: - return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SHL: FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); @@ -147,7 +144,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_s32 word_reg_index = 8; sljit_s32 float_arg_index = 1; sljit_s32 double_arg_count = 0; - sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw); + sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw); sljit_s32 types = 0; sljit_s32 reg = 0; sljit_s32 move_to_tmp2 = 0; @@ -155,18 +152,12 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t if (src) reg = reg_map[*src & REG_MASK]; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index++; - if (reg_index == reg) - move_to_tmp2 = 1; - reg_index++; - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index++; double_arg_count++; @@ -174,36 +165,37 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t move_to_tmp2 = 1; reg_index += 2; break; + case SLJIT_ARG_TYPE_F32: + float_arg_index++; + if (reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + break; default: - if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg) + if (reg_index != word_reg_index && reg_index == reg) move_to_tmp2 = 1; reg_index++; word_reg_index++; break; } - if (move_to_tmp2) { - move_to_tmp2 = 0; - if (reg < 14) - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); - *src = TMP_REG1; - } + arg_types >>= SLJIT_ARG_SHIFT; + } - arg_types >>= SLJIT_DEF_SHIFT; + if (move_to_tmp2) { + if (reg < 14) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); + *src = TMP_REG1; } arg_types = types; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index--; - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset -= sizeof(sljit_f64); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index--; if (float_arg_index == 4 && double_arg_count == 4) { + /* The address is not doubleword aligned, so two instructions are required to store the double. */ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); } @@ -211,36 +203,41 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); float_offset -= sizeof(sljit_f64); break; + case SLJIT_ARG_TYPE_F32: + float_arg_index--; + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; default: break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } float_offset = (16 + 6) * sizeof(sljit_sw); while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - reg_index--; - if (reg_index < 14) - FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - float_offset -= sizeof(sljit_f64); - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: reg_index -= 2; if (reg_index < 14) { if ((reg_index & 0x1) != 0) { FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - if (reg_index < 13) + if (reg_index < 8 + 6 - 1) FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1)); } - else + else FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); } float_offset -= sizeof(sljit_f64); break; + case SLJIT_ARG_TYPE_F32: + reg_index--; + if (reg_index < 8 + 6) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + float_offset -= sizeof(sljit_f64); + break; default: reg_index--; word_reg_index--; @@ -254,7 +251,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -282,5 +279,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c index e833f09d7a..c8d19e16c6 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c @@ -98,36 +98,37 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15 + 0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15 }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 2, 4, 6, 8, 10, 12, 14 + 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_map[d] << 25) -#define FD(d) (freg_map[d] << 25) -#define FDN(d) ((freg_map[d] | 0x1) << 25) -#define DA(d) ((d) << 25) -#define S1(s1) (reg_map[s1] << 14) -#define FS1(s1) (freg_map[s1] << 14) -#define S1A(s1) ((s1) << 14) -#define S2(s2) (reg_map[s2]) -#define FS2(s2) (freg_map[s2]) -#define FS2N(s2) (freg_map[s2] | 0x1) -#define S2A(s2) (s2) +#define D(d) ((sljit_ins)reg_map[d] << 25) +#define FD(d) ((sljit_ins)freg_map[d] << 25) +#define FDN(d) (((sljit_ins)freg_map[d] | 0x1) << 25) +#define DA(d) ((sljit_ins)(d) << 25) +#define S1(s1) ((sljit_ins)reg_map[s1] << 14) +#define FS1(s1) ((sljit_ins)freg_map[s1] << 14) +#define S1A(s1) ((sljit_ins)(s1) << 14) +#define S2(s2) ((sljit_ins)reg_map[s2]) +#define FS2(s2) ((sljit_ins)freg_map[s2]) +#define FS2N(s2) ((sljit_ins)freg_map[s2] | 0x1) +#define S2A(s2) ((sljit_ins)(s2)) #define IMM_ARG 0x2000 -#define DOP(op) ((op) << 5) -#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) +#define DOP(op) ((sljit_ins)(op) << 5) +#define IMM(imm) (((sljit_ins)(imm) & 0x1fff) | IMM_ARG) #define DR(dr) (reg_map[dr]) -#define OPC1(opcode) ((opcode) << 30) -#define OPC2(opcode) ((opcode) << 22) -#define OPC3(opcode) ((opcode) << 19) +#define DRF(dr, flags) ((sljit_s32)(reg_map[dr] | ((flags) & SET_FLAGS))) +#define OPC1(opcode) ((sljit_ins)(opcode) << 30) +#define OPC2(opcode) ((sljit_ins)(opcode) << 22) +#define OPC3(opcode) ((sljit_ins)(opcode) << 19) #define SET_FLAGS OPC3(0x10) #define ADD (OPC1(0x2) | OPC3(0x00)) @@ -156,6 +157,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) #define LDD (OPC1(0x3) | OPC3(0x03)) +#define LDDF (OPC1(0x3) | OPC3(0x23)) +#define LDF (OPC1(0x3) | OPC3(0x20)) #define LDUW (OPC1(0x3) | OPC3(0x00)) #define NOP (OPC1(0x0) | OPC2(0x04)) #define OR (OPC1(0x2) | OPC3(0x02)) @@ -170,6 +173,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) #define SRL (OPC1(0x2) | OPC3(0x26)) #define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define STD (OPC1(0x3) | OPC3(0x07)) #define STDF (OPC1(0x3) | OPC3(0x27)) #define STF (OPC1(0x3) | OPC3(0x24)) #define STW (OPC1(0x3) | OPC3(0x04)) @@ -183,7 +187,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #define MAX_DISP (0x1fffff) #define MIN_DISP (-0x200000) -#define DISP_MASK (0x3fffff) +#define DISP_MASK ((sljit_ins)0x3fffff) #define BICC (OPC1(0x0) | OPC2(0x2)) #define FBFCC (OPC1(0x0) | OPC2(0x6)) @@ -274,7 +278,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; @@ -300,7 +304,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; + sljit_sw addr; struct sljit_label *label; struct sljit_jump *jump; @@ -340,7 +344,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -373,7 +377,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -386,27 +390,27 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); - buf_ptr[0] = CALL | (addr & 0x3fffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000); + buf_ptr[0] = CALL | ((sljit_ins)addr & 0x3fffffff); break; } if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); - buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP); + buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK); break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); - buf_ptr[0] |= (addr >> 10) & 0x3fffff; - buf_ptr[1] |= addr & 0x3ff; + buf_ptr[0] |= (sljit_ins)(addr >> 10) & 0x3fffff; + buf_ptr[1] |= (sljit_ins)addr & 0x3ff; #else #error "Implementation required" #endif @@ -416,7 +420,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; + addr = (sljit_sw)put_label->label->addr; buf_ptr = (sljit_ins *)put_label->addr; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) @@ -431,7 +435,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -487,13 +491,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALT_KEEP_CACHE 0x00040 #define CUMULATIVE_OP 0x00080 #define IMM_OP 0x00100 -#define SRC2_IMM 0x00200 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 -#define REG_DEST 0x00400 -#define REG2_SOURCE 0x00800 -#define SLOW_SRC1 0x01000 -#define SLOW_SRC2 0x02000 -#define SLOW_DEST 0x04000 +#define REG_DEST 0x00800 +#define REG2_SOURCE 0x01000 +#define SLOW_SRC1 0x02000 +#define SLOW_SRC2 0x04000 +#define SLOW_DEST 0x08000 /* SET_FLAGS (0x10 << 19) also belong here! */ @@ -507,6 +512,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { + sljit_s32 reg_index, types, tmp; + sljit_u32 float_offset, args_offset; + sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index; + CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); @@ -514,7 +523,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; compiler->local_size = local_size; - if (local_size <= SIMM_MAX) { + if (local_size <= -SIMM_MIN) { FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); } else { @@ -522,7 +531,88 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); } - /* Arguments are in their appropriate registers. */ + arg_types >>= SLJIT_ARG_SHIFT; + + types = arg_types; + float_offset = 16 * sizeof(sljit_sw); + reg_index = 24; + + while (types && reg_index < 24 + 6) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (reg_index & 0x1) { + FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + if (reg_index >= 24 + 6 - 1) + break; + FAIL_IF(push_inst(compiler, STW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), MOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, STD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + + float_offset += sizeof(sljit_f64); + reg_index++; + break; + case SLJIT_ARG_TYPE_F32: + FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset += sizeof(sljit_f64); + break; + } + + reg_index++; + types >>= SLJIT_ARG_SHIFT; + } + + args_offset = (16 + 1 + 6) * sizeof(sljit_sw); + float_offset = 16 * sizeof(sljit_sw); + reg_index = 24; + saved_arg_index = 24; + scratch_arg_index = 8 - 1; + float_arg_index = 1; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (reg_index < 24 + 6 - 1) { + FAIL_IF(push_inst(compiler, LDDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + } else if (reg_index < 24 + 6) { + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + } else { + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset + sizeof(sljit_sw)), MOVABLE_INS)); + } + + float_arg_index++; + float_offset += sizeof(sljit_f64); + reg_index++; + break; + case SLJIT_ARG_TYPE_F32: + if (reg_index < 24 + 6) + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + float_arg_index++; + float_offset += sizeof(sljit_f64); + break; + default: + scratch_arg_index++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = saved_arg_index++; + if (tmp == reg_index) + break; + } else + tmp = scratch_arg_index; + + if (reg_index < 24 + 6) + FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp)); + else + FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp)); + break; + } + + reg_index++; + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -539,12 +629,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(SLJIT_R0) | S2(0), UNMOVABLE_INS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - if (op != SLJIT_MOV || !FAST_IS_REG(src)) { + if (TYPE_CAST_NEEDED(op) || !FAST_IS_REG(src)) { FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); src = SLJIT_R0; } @@ -591,7 +690,7 @@ static const sljit_ins data_transfer_insts[16 + 4] = { #undef ARCH_32_64 /* Can perform an operation using at most 1 instruction. */ -static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { SLJIT_ASSERT(arg & SLJIT_MEM); @@ -632,7 +731,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj } /* Emit the necessary instructions. See can_cache above. */ -static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base, arg2, delay_slot; sljit_ins dest; @@ -660,7 +759,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl arg2 = reg; else /* It must be a mov operation, so tmp1 must be free to use. */ arg2 = TMP_REG1; - FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2))); + FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | (sljit_ins)argw, DR(arg2))); } } else { @@ -692,7 +791,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); } -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -701,14 +800,14 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -727,11 +826,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (dst != SLJIT_UNUSED) { + if (dst != TMP_REG2) { if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if (flags & MOVE_OP) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) @@ -782,7 +881,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { @@ -793,8 +892,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) - dst_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } } } } @@ -888,7 +991,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -898,33 +1001,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile op = GET_OPCODE(op); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_S32: - return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); } return SLJIT_SUCCESS; @@ -935,17 +1034,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - op = GET_OPCODE(op); switch (op) { case SLJIT_ADD: @@ -975,6 +1071,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1015,7 +1125,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1027,8 +1137,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) +#define FLOAT_DATA(op) ((sljit_ins)DOUBLE_DATA | (((sljit_ins)(op) & SLJIT_32) >> 7)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_32) ? single : double) #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -1108,11 +1218,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1126,7 +1236,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (src != dst_r) { if (dst_r != TMP_FREG1) { FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (!(op & SLJIT_F32_OP)) + if (!(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); } else @@ -1135,17 +1245,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil break; case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_F32_OP)) + if (dst_r != src && !(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_F32_OP)) + if (dst_r != src && !(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS)); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; break; } @@ -1288,10 +1398,12 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_LESS: case SLJIT_GREATER_F64: /* Unordered. */ + case SLJIT_CARRY: return DA(0x5); case SLJIT_GREATER_EQUAL: case SLJIT_LESS_EQUAL_F64: + case SLJIT_NOT_CARRY: return DA(0xd); case SLJIT_GREATER: @@ -1315,15 +1427,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return DA(0x2); case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x9); + /* fallthrough */ case SLJIT_UNORDERED_F64: return DA(0x7); case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x1); + /* fallthrough */ case SLJIT_ORDERED_F64: return DA(0xf); @@ -1412,7 +1526,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -1460,7 +1574,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 reg; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c index 79a7e8bba5..b9a7b39789 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c @@ -26,6 +26,10 @@ /* x86 32-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) { sljit_u8 *inst; @@ -38,314 +42,8 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s return SLJIT_SUCCESS; } -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) -{ - sljit_s32 type = jump->flags >> TYPE_SHIFT; - - if (type == SLJIT_JUMP) { - *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { - *code_ptr++ = CALL_i32; - jump->addr++; - } - else { - *code_ptr++ = GROUP_0F; - *code_ptr++ = get_jump_code(type); - jump->addr += 2; - } - - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MW; - else - sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); - code_ptr += 4; - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 args, size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - /* Emit ENDBR32 at function entry if needed. */ - FAIL_IF(emit_endbranch(compiler)); - - args = get_arg_count(arg_types); - compiler->args = args; - - /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * sizeof(sljit_sw); - -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * sizeof(sljit_sw); -#endif - - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); - - compiler->locals_offset = compiler->saveds_offset; - - if (saveds > 3) - compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); - - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); - - size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); -#else - size += (args > 0 ? (2 + args * 3) : 0); -#endif - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - PUSH_REG(reg_map[TMP_REG1]); -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; - } -#endif - if (saveds > 2 || scratches > 9) - PUSH_REG(reg_map[SLJIT_S2]); - if (saveds > 1 || scratches > 10) - PUSH_REG(reg_map[SLJIT_S1]); - if (saveds > 0 || scratches > 11) - PUSH_REG(reg_map[SLJIT_S0]); - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - inst[0] = MOV_r_rm; - inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; - inst += 2; - } - if (args > 1) { - inst[0] = MOV_r_rm; - inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; - inst += 2; - } - if (args > 2) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; - inst[2] = 0x24; - inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ - } -#else - if (args > 0) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 2; - inst += 3; - } - if (args > 1) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 3; - inst += 3; - } - if (args > 2) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 4; - } -#endif - - SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); - -#if defined(__APPLE__) - /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); - else - local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); -#endif - - compiler->local_size = local_size; - -#ifdef _WIN32 - if (local_size > 0) { - if (local_size <= 4 * 4096) { - if (local_size > 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); - if (local_size > 2 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); - if (local_size > 3 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); - } - else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); - - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - - INC_SIZE(2); - inst[0] = JNE_i8; - inst[1] = (sljit_s8) -16; - } - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); - } -#endif - - SLJIT_ASSERT(local_size > 0); - -#if !defined(__APPLE__) - if (options & SLJIT_F64_ALIGNMENT) { - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); - - /* Some space might allocated during sljit_grow_stack() above on WIN32. */ - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); - -#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->local_size > 1024) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - - INC_SIZE(6); - inst[0] = GROUP_BINARY_81; - inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; - sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1)); - - /* The real local size must be used. */ - return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); - } -#endif - return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - compiler->args = get_arg_count(arg_types); - - /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * sizeof(sljit_sw); - -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * sizeof(sljit_sw); -#endif - - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); - - compiler->locals_offset = compiler->saveds_offset; - - if (saveds > 3) - compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); - - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); - -#if defined(__APPLE__) - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); - else - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); -#endif - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - SLJIT_ASSERT(compiler->args >= 0); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - SLJIT_ASSERT(compiler->local_size > 0); - -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) - EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) - else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#endif - - size = 2 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + - (compiler->saveds <= 3 ? compiler->saveds : 3); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - size += 2; -#endif - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - - if (compiler->saveds > 0 || compiler->scratches > 11) - POP_REG(reg_map[SLJIT_S0]); - if (compiler->saveds > 1 || compiler->scratches > 10) - POP_REG(reg_map[SLJIT_S1]); - if (compiler->saveds > 2 || compiler->scratches > 9) - POP_REG(reg_map[SLJIT_S2]); - POP_REG(reg_map[TMP_REG1]); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - RET_I16(sizeof(sljit_sw)); - else - RET(); -#else - RET(); -#endif - - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - /* Size contains the flags as well. */ -static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, /* The register or immediate operand. */ sljit_s32 a, sljit_sw imma, /* The general operand (not immediate). */ @@ -353,8 +51,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 { sljit_u8 *inst; sljit_u8 *buf_ptr; - sljit_s32 flags = size & ~0xf; - sljit_s32 inst_size; + sljit_u8 reg_map_b; + sljit_uw flags = size; + sljit_uw inst_size; /* Both cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); @@ -367,8 +66,6 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); - /* We don't support (%ebp). */ - SLJIT_ASSERT(!(b & SLJIT_MEM) || immb || reg_map[b & REG_MASK] != 5); size &= 0xf; inst_size = size; @@ -381,7 +78,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 /* Calculate size of b. */ inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { - if ((b & REG_MASK) == SLJIT_UNUSED) + if (!(b & REG_MASK)) inst_size += sizeof(sljit_sw); else if (immb != 0 && !(b & OFFS_REG_MASK)) { /* Immediate operand. */ @@ -390,11 +87,13 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 else inst_size += sizeof(sljit_sw); } + else if (reg_map[b & REG_MASK] == 5) + inst_size += sizeof(sljit_s8); if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) b |= TO_OFFS_REG(SLJIT_SP); - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) + if (b & OFFS_REG_MASK) inst_size += 1; /* SIB byte. */ } @@ -445,9 +144,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (a & SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) - *buf_ptr = reg_map[a] << 3; + *buf_ptr = U8(reg_map[a] << 3); else - *buf_ptr = a << 3; + *buf_ptr = U8(a << 3); } else { if (a & SLJIT_IMM) { @@ -460,27 +159,30 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 *buf_ptr = 0; } - if (!(b & SLJIT_MEM)) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); - else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { - if (immb != 0) { + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b)); + buf_ptr++; + } else if (b & REG_MASK) { + reg_map_b = reg_map[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) { + if (immb != 0 || reg_map_b == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else *buf_ptr |= 0x80; } - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) - *buf_ptr++ |= reg_map[b & REG_MASK]; + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_map_b; else { *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3); + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); } - if (immb != 0) { + if (immb != 0 || reg_map_b == 5) { if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ else { sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); @@ -489,7 +191,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } else { *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6); + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); } } else { @@ -500,9 +202,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (a & SLJIT_IMM) { if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; + *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) - sljit_unaligned_store_s16(buf_ptr, imma); + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); else if (!(flags & EX86_SHIFT_INS)) sljit_unaligned_store_sw(buf_ptr, imma); } @@ -511,34 +213,449 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } /* --------------------------------------------------------------------- */ -/* Call / return instructions */ +/* Enter / return */ /* --------------------------------------------------------------------- */ +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + code_ptr += 4; + + return code_ptr; +} + +#define ENTER_R2_USED 0x00001 +#define ENTER_R2_TO_S 0x00002 +#define ENTER_R2_TO_R0 0x00004 +#define ENTER_R1_TO_S 0x00008 +#define ENTER_TMP_TO_R4 0x00010 +#define ENTER_TMP_TO_S 0x00020 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 size, locals_offset, args_size, types, status; + sljit_u8 *inst; +#ifdef _WIN32 + sljit_s32 r2_offset = -1; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR32 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + arg_types >>= SLJIT_ARG_SHIFT; + types = arg_types; + word_arg_count = 0; + saved_arg_count = 0; + float_arg_count = 0; + args_size = SSIZE_OF(sw); + status = 0; + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + saved_arg_count++; + if (saved_arg_count == 4) + status |= ENTER_TMP_TO_S; + } else { + if (word_arg_count == 4) + status |= ENTER_TMP_TO_R4; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count == 3) + status |= ENTER_R2_USED; +#endif + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL)) + break; +#endif + + args_size += SSIZE_OF(sw); + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + args_size -= SSIZE_OF(sw); + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and function calls. */ + locals_offset = 2 * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#endif + + compiler->scratches_offset = locals_offset; + + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + + if (saveds > 3) + locals_offset += (saveds - 3) * SSIZE_OF(sw); + + compiler->locals_offset = locals_offset; + + size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); + FAIL_IF(!inst); + + INC_SIZE((sljit_uw)size); + PUSH_REG(reg_map[TMP_REG1]); + if (saveds > 2 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + + size *= SSIZE_OF(sw); + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); -static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) + size += SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size += args_size; +#endif + + local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size; + compiler->local_size = local_size; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size -= args_size; +#endif + + word_arg_count = 0; + saved_arg_count = 0; + args_size = size; + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) { + if (word_arg_count == 1) { + if (status & ENTER_R2_USED) { + EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0); + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R2_TO_S; + saved_arg_count++; + } else + status |= ENTER_R2_TO_R0; + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R1_TO_S; + saved_arg_count++; + } + break; + } +#endif + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4)); + + if (word_arg_count <= 3) { +#ifdef _WIN32 + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else +#endif + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + } + } else { + SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S)); + + if (saved_arg_count <= 3) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + saved_arg_count++; + } + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) { + if (status & ENTER_R2_TO_R0) + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0); + + saved_arg_count = 0; + if (status & ENTER_R2_TO_S) { + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); + saved_arg_count++; + } + + if (status & ENTER_R1_TO_S) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0); + } +#endif + + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + +#ifdef _WIN32 + SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096); + + if (local_size > 4096) { + if (local_size <= 4 * 4096) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + + if (local_size > 2 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); + + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = LOOP_i8; + inst[1] = (sljit_u8)-16; + local_size &= 0xfff; + } + } + + if (local_size > 0) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + } + + if (r2_offset != -1) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + +#else /* !_WIN32 */ + + SLJIT_ASSERT(local_size > 0); + + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#endif /* _WIN32 */ + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { + size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 stack_size = 0; + sljit_s32 args_size, locals_offset; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) sljit_s32 word_arg_count = 0; +#endif - arg_types >>= SLJIT_DEF_SHIFT; + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + arg_types >>= SLJIT_ARG_SHIFT; + args_size = 0; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); + args_size += SSIZE_OF(f32); + break; + default: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count >= 2) + args_size += SSIZE_OF(sw); + word_arg_count++; +#else + args_size += SSIZE_OF(sw); +#endif break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and function calls. */ + locals_offset = 2 * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#endif + + compiler->scratches_offset = locals_offset; + + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + + if (saveds > 3) + locals_offset += (saveds - 3) * SSIZE_OF(sw); + + compiler->locals_offset = locals_offset; + + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + saveds += args_size; +#endif + + compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +{ + sljit_uw size; + sljit_u8 *inst; + + size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + if (compiler->saveds > 0 || compiler->scratches > 11) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 10) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S2]); + POP_REG(reg_map[TMP_REG1]); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_uw size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); + + FAIL_IF(emit_stack_frame_release(compiler)); + + size = 1; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) + size = 3; +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) { + RET_I16(U8(compiler->args_size)); + return SLJIT_SUCCESS; + } +#endif + + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + +static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_sw stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; if (word_arg_count > 2) - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count_ptr) @@ -548,12 +665,12 @@ static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word } static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, - sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) { sljit_u8 *inst; sljit_s32 float_arg_count; - if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) { + if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); @@ -561,41 +678,40 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, } else if (stack_size > 0) { if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; float_arg_count = 0; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_count++; FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; if (word_arg_count == 3) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); } else if (word_arg_count == 4) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); } break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } } @@ -605,7 +721,7 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, FAIL_IF(!inst); INC_SIZE(1); - *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2]; + *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]); } else { inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); @@ -613,7 +729,7 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, INC_SIZE(2); *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; + *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]); } } @@ -624,77 +740,73 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) { - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count_ptr) *word_arg_count_ptr = word_arg_count; - if (stack_size <= compiler->stack_tmp_size) + if (stack_size <= compiler->scratches_offset) return 0; -#if defined(__APPLE__) - return ((stack_size - compiler->stack_tmp_size + 15) & ~15); -#else - return stack_size - compiler->stack_tmp_size; -#endif + return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf); } static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, - sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count) + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count) { sljit_s32 float_arg_count = 0; + sljit_u8 *inst; if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); if (stack_size > 0) - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; word_arg_count = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_count++; FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -707,13 +819,12 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 single; if (stack_size > 0) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0); - if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) return SLJIT_SUCCESS; - single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32); + single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32); inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); FAIL_IF(!inst); @@ -725,16 +836,399 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); } +static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, + sljit_s32 *extra_space, sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw args_size, prev_args_size, saved_regs_size; + sljit_sw types, word_arg_count, float_arg_count; + sljit_sw stack_size, prev_stack_size, min_size, offset; + sljit_sw word_arg4_offset; + sljit_u8 r2_offset = 0; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL; +#endif + sljit_u8* inst; + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + word_arg_count = 0; + float_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + types = 0; + args_size = 0; + + while (arg_types != 0) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + float_arg_count++; + break; + default: + word_arg_count++; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!fast_call || word_arg_count > 2) + args_size += SSIZE_OF(sw); +#else + args_size += SSIZE_OF(sw); +#endif + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (args_size <= compiler->args_size +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call) +#endif /* SLJIT_X86_32_FASTCALL */ + && 1) { +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + *extra_space = fast_call ? 0 : args_size; + prev_args_size = compiler->args_size; + stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; +#else /* !SLJIT_X86_32_FASTCALL */ + *extra_space = 0; + stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; +#endif /* SLJIT_X86_32_FASTCALL */ + + offset = stack_size + compiler->local_size; + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + r2_offset = sizeof(sljit_sw); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(compiler->options & SLJIT_ENTER_CDECL)) { + if (!fast_call) + offset -= SSIZE_OF(sw); + + if (word_arg_count >= 3) { + word_arg4_offset = SSIZE_OF(sw); + + if (word_arg_count + float_arg_count >= 4) { + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw); + if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64); + } + + /* In cdecl mode, at least one more word value must + * be present on the stack before the return address. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0); + } + + if (fast_call) { + if (args_size < prev_args_size) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + } + } else if (prev_args_size > 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + } +#endif /* SLJIT_X86_32_FASTCALL */ + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); + break; + } +#endif + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) + break; +#endif + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + break; + case 4: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); + FAIL_IF(emit_stack_frame_release(compiler)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args_size < prev_args_size) + BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0); +#endif + + return SLJIT_SUCCESS; + } + + stack_size = args_size + SSIZE_OF(sw); + + if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) { + r2_offset = SSIZE_OF(sw); + stack_size += SSIZE_OF(sw); + } + + if (word_arg_count >= 3) + stack_size += SSIZE_OF(sw); + + prev_args_size = 0; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(compiler->options & SLJIT_ENTER_CDECL)) + prev_args_size = compiler->args_size; +#endif + + prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; + min_size = prev_stack_size + compiler->local_size; + + word_arg4_offset = compiler->scratches_offset; + + if (stack_size > min_size) { + BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0); + if (src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size - min_size; + word_arg4_offset += stack_size - min_size; + } + else + stack_size = min_size; + + if (word_arg_count >= 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); + + if (word_arg_count >= 4) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); + } + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + SLJIT_ASSERT(r2_offset == sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + /* Restore saved registers. */ + offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + + if (compiler->saveds > 2 || compiler->scratches > 9) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if (compiler->saveds > 1 || compiler->scratches > 10) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if (compiler->saveds > 0 || compiler->scratches > 11) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + /* Copy fourth argument and return address. */ +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + offset = stack_size; + *extra_space = 0; + + if (word_arg_count >= 4 && prev_args_size == 0) { + offset -= SSIZE_OF(sw); + inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + + SLJIT_ASSERT(args_size != prev_args_size); + } else { + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + if (args_size != prev_args_size) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); + } + + if (args_size != prev_args_size) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + } else { +#endif /* SLJIT_X86_32_FASTCALL */ + offset = stack_size - SSIZE_OF(sw); + *extra_space = args_size; + + if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) { + offset -= SSIZE_OF(sw); + inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + + SLJIT_ASSERT(prev_args_size > 0); + } else { + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + if (prev_args_size > 0) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); + } + + /* Copy return address. */ + if (prev_args_size > 0) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + } +#endif /* SLJIT_X86_32_FASTCALL */ + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); + break; + } +#endif + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) + break; +#endif + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + /* Skip return address. */ + if (fast_call) + offset -= SSIZE_OF(sw); +#endif + + SLJIT_ASSERT(offset >= 0); + + if (offset == 0) + return SLJIT_SUCCESS; + + BINARY_IMM32(ADD, offset, SLJIT_SP, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) +{ + /* Called when stack consumption cannot be reduced to 0. */ + sljit_u8 *inst; + + BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { struct sljit_jump *jump; - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + stack_size = type; + PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + if (stack_size == 0) { + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + return sljit_emit_jump(compiler, type); + } + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size)); + return jump; + } + #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if ((type & 0xff) == SLJIT_CALL) { stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); @@ -772,7 +1266,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) sljit_s32 swap_args; @@ -781,6 +1275,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + if (type & SLJIT_CALL_RETURN) { + stack_size = type; + FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); + + if (!(src & SLJIT_IMM)) { + src = SLJIT_R0; + srcw = 0; + } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + if (stack_size == 0) + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + return emit_tail_call_end(compiler, stack_size); + } + #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); @@ -800,7 +1315,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -809,7 +1324,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, 0); @@ -819,7 +1334,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -828,7 +1343,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, stack_size); @@ -844,10 +1359,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK_EXTRA_REGS(dst, dstw, (void)0); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { /* Unused dest is possible here. */ inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); @@ -895,34 +1406,18 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) { - sljit_s32 size, saved_size; - sljit_s32 has_f64_aligment; + sljit_sw size; /* Don't adjust shadow stack if it isn't enabled. */ - if (!cpu_has_shadow_stack ()) + if (!cpu_has_shadow_stack()) return SLJIT_SUCCESS; - SLJIT_ASSERT(compiler->args >= 0); + SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->local_size > 0); -#if !defined(__APPLE__) - has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT; -#else - has_f64_aligment = 0; -#endif - size = compiler->local_size; - saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * sizeof(sljit_uw); - if (has_f64_aligment) { - /* mov TMP_REG1, [esp + local_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size); - /* mov TMP_REG1, [TMP_REG1+ saved_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size); - /* Move return address to [esp]. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0); - size = 0; - } else - size += saved_size; - - return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); + size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c index e85b56a61a..f37df6e1bf 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c @@ -26,6 +26,10 @@ /* x86 64-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { sljit_u8 *inst; @@ -34,14 +38,246 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); + *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7)); sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } +static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) +{ + sljit_u8 *inst; + sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + sljit_unaligned_store_s32(inst, (sljit_s32)imm); + return SLJIT_SUCCESS; +} + +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, + /* The register or immediate operand. */ + sljit_s32 a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 *buf_ptr; + sljit_u8 rex = 0; + sljit_u8 reg_lmap_b; + sljit_uw flags = size; + sljit_uw inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; + } + else if (reg_lmap[b & REG_MASK] == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + + if (!(b & REG_MASK)) + inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ + else { + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_s32); + } + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_s8); + + if (b & OFFS_REG_MASK) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } + } + } + else if (!(flags & EX86_SSE2_OP2)) { + if (reg_map[b] >= 8) + rex |= REX_B; + } + else if (freg_map[b] >= 8) + rex |= REX_B; + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= compiler->mode32 ? 0x1f : 0x3f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_s32); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ + if (!(flags & EX86_SSE2_OP1)) { + if (reg_map[a] >= 8) + rex |= REX_R; + } + else if (freg_map[a] >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + if (rex) + *inst++ = rex; + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if (a & SLJIT_IMM) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = U8(reg_lmap[a] << 3); + else + *buf_ptr = U8(freg_lmap[a] << 3); + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b])); + buf_ptr++; + } else if (b & REG_MASK) { + reg_lmap_b = reg_lmap[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) { + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_lmap_b; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + } + + if (immb != 0 || reg_lmap_b == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ + else { + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = 0x25; + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_s32); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = U8(imma); + else if (flags & EX86_HALF_ARG) + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); + else if (!(flags & EX86_SHIFT_INS)) + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) { - sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_uw type = jump->flags >> TYPE_SHIFT; int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); @@ -50,7 +286,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ if (type < SLJIT_JUMP) { /* Invert type. */ - *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; + *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); } @@ -63,13 +299,13 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ else if (short_addr) sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); else - sljit_unaligned_store_sw(code_ptr, jump->u.target); + sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target); code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); *code_ptr++ = REX_B; *code_ptr++ = GROUP_FF; - *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]; + *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); return code_ptr; } @@ -90,7 +326,7 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); if ((code_ptr[0] & 0x07) != 0) { - code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08); + code_ptr[0] = U8(code_ptr[0] & ~0x08); code_ptr += 2 + sizeof(sljit_s32); } else { @@ -114,9 +350,9 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); - code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4); + code_ptr[0] = U8(code_ptr[0] & ~0x4); code_ptr[1] = MOV_rm_i32; - code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3)); + code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); put_label->addr = (sljit_uw)code_ptr; @@ -128,7 +364,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, size, saved_register_size; + sljit_uw size; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; + sljit_s32 saved_regs_size, tmp, i; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; + sljit_s32 saved_float_regs_offset = 0; + sljit_s32 float_arg_count = 0; +#endif /* _WIN64 */ sljit_u8 *inst; CHECK_ERROR(); @@ -140,19 +384,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi compiler->mode32 = 0; -#ifdef _WIN64 - /* Two/four register slots for parameters plus space for xmm6 register if needed. */ - if (fscratches >= 6 || fsaveds >= 1) - compiler->locals_offset = 6 * sizeof(sljit_sw); - else - compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); -#endif - /* Including the return address saved by the call instruction. */ - saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -172,55 +408,75 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi PUSH_REG(reg_lmap[i]); } - args = get_arg_count(arg_types); +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); - if (args > 0) { - size = args * 3; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); + if (saved_float_regs_size > 0) { + saved_float_regs_offset = ((local_size + 0xf) & ~0xf); + local_size = saved_float_regs_offset + saved_float_regs_size; + } +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ - INC_SIZE(size); + arg_types >>= SLJIT_ARG_SHIFT; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + tmp = 0; #ifndef _WIN64 - if (args > 0) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; - inst += 3; - } - if (args > 1) { - inst[0] = REX_W | REX_R; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; - inst += 3; - } - if (args > 2) { - inst[0] = REX_W | REX_R; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; - } -#else - if (args > 0) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; - inst += 3; - } - if (args > 1) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; - inst += 3; - } - if (args > 2) { - inst[0] = REX_W | REX_B; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; + switch (word_arg_count) { + case 0: + tmp = SLJIT_R2; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = TMP_REG1; + break; + default: + tmp = SLJIT_R3; + break; + } +#else /* !_WIN64 */ + switch (word_arg_count + float_arg_count) { + case 0: + tmp = SLJIT_R3; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = SLJIT_R2; + break; + default: + tmp = TMP_REG1; + break; + } +#endif /* _WIN64 */ + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (tmp != SLJIT_R0 + word_arg_count) + EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0); + saved_arg_count++; + } + word_arg_count++; + } else { +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + float_arg_count++; + if (float_arg_count != float_arg_count + word_arg_count) + FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32, + float_arg_count, float_arg_count + word_arg_count, 0)); +#endif /* _WIN64 */ } -#endif + arg_types >>= SLJIT_ARG_SHIFT; } - local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; compiler->local_size = local_size; #ifdef _WIN64 @@ -234,44 +490,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); } else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12); - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1)); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + BINARY_IMM32(SUB, 1, TMP_REG1, 0); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); inst[0] = JNE_i8; - inst[1] = (sljit_s8) -19; + inst[1] = (sljit_u8)-21; + local_size &= 0xfff; } - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + if (local_size > 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); } -#endif +#endif /* _WIN64 */ - if (local_size > 0) { - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - } + if (local_size > 0) + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); #ifdef _WIN64 - /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ - if (fscratches >= 6 || fsaveds >= 1) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - sljit_unaligned_store_s32(inst, 0x20247429); + if (saved_float_regs_size > 0) { + compiler->mode32 = 1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } } -#endif +#endif /* _WIN64 */ return SLJIT_SUCCESS; } @@ -280,46 +541,65 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 saved_register_size; + sljit_s32 saved_regs_size; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; +#endif /* _WIN64 */ CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); #ifdef _WIN64 - /* Two/four register slots for parameters plus space for xmm6 register if needed. */ - if (fscratches >= 6 || fsaveds >= 1) - compiler->locals_offset = 6 * sizeof(sljit_sw); - else - compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); -#endif + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_size > 0) + local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size; +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ /* Including the return address saved by the call instruction. */ - saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_s32 i, tmp, size; + sljit_uw size; + sljit_s32 i, tmp; sljit_u8 *inst; +#ifdef _WIN64 + sljit_s32 saved_float_regs_offset; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 fsaveds = compiler->fsaveds; +#endif /* _WIN64 */ - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); +#ifdef _WIN64 + saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + if (saved_float_regs_offset > 0) { + compiler->mode32 = 1; + saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } -#ifdef _WIN64 - /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ - if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - sljit_unaligned_store_s32(inst, 0x20247428); + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } } -#endif +#endif /* _WIN64 */ if (compiler->local_size > 0) { if (compiler->local_size <= 127) { @@ -329,7 +609,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp *inst++ = REX_W; *inst++ = GROUP_BINARY_83; *inst++ = MOD_REG | ADD | 4; - *inst = compiler->local_size; + *inst = U8(compiler->local_size); } else { inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); @@ -364,243 +644,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp POP_REG(reg_lmap[i]); } - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) -{ - sljit_u8 *inst; - sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + length); - FAIL_IF(!inst); - INC_SIZE(length); - if (rex) - *inst++ = rex; - *inst++ = opcode; - sljit_unaligned_store_s32(inst, imm); return SLJIT_SUCCESS; } -static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, - /* The register or immediate operand. */ - sljit_s32 a, sljit_sw imma, - /* The general operand (not immediate). */ - sljit_s32 b, sljit_sw immb) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) { sljit_u8 *inst; - sljit_u8 *buf_ptr; - sljit_u8 rex = 0; - sljit_s32 flags = size & ~0xf; - sljit_s32 inst_size; - - /* The immediate operand must be 32 bit. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); - /* Both cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); - /* Size flags not allowed for typed instructions. */ - SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); - /* Both size flags cannot be switched on. */ - SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); - /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); - - size &= 0xf; - inst_size = size; - - if (!compiler->mode32 && !(flags & EX86_NO_REXW)) - rex |= REX_W; - else if (flags & EX86_REX) - rex |= REX; - - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; - if (flags & EX86_PREF_66) - inst_size++; - - /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ - if (b & SLJIT_MEM) { - if (!(b & OFFS_REG_MASK)) { - if (NOT_HALFWORD(immb)) { - PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); - immb = 0; - if (b & REG_MASK) - b |= TO_OFFS_REG(TMP_REG2); - else - b |= TMP_REG2; - } - else if (reg_lmap[b & REG_MASK] == 4) - b |= TO_OFFS_REG(SLJIT_SP); - } - - if ((b & REG_MASK) == SLJIT_UNUSED) - inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ - else { - if (reg_map[b & REG_MASK] >= 8) - rex |= REX_B; - - if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { - /* Immediate operand. */ - if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_s8); - else - inst_size += sizeof(sljit_s32); - } - else if (reg_lmap[b & REG_MASK] == 5) - inst_size += sizeof(sljit_s8); - - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { - inst_size += 1; /* SIB byte. */ - if (reg_map[OFFS_REG(b)] >= 8) - rex |= REX_X; - } - } - } - else if (!(flags & EX86_SSE2_OP2)) { - if (reg_map[b] >= 8) - rex |= REX_B; - } - else if (freg_map[b] >= 8) - rex |= REX_B; - - if (a & SLJIT_IMM) { - if (flags & EX86_BIN_INS) { - if (imma <= 127 && imma >= -128) { - inst_size += 1; - flags |= EX86_BYTE_ARG; - } else - inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { - imma &= compiler->mode32 ? 0x1f : 0x3f; - if (imma != 1) { - inst_size ++; - flags |= EX86_BYTE_ARG; - } - } else if (flags & EX86_BYTE_ARG) - inst_size++; - else if (flags & EX86_HALF_ARG) - inst_size += sizeof(short); - else - inst_size += sizeof(sljit_s32); - } - else { - SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); - /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ - if (!(flags & EX86_SSE2_OP1)) { - if (reg_map[a] >= 8) - rex |= REX_R; - } - else if (freg_map[a] >= 8) - rex |= REX_R; - } - - if (rex) - inst_size++; - - inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); - PTR_FAIL_IF(!inst); - - /* Encoding the byte. */ - INC_SIZE(inst_size); - if (flags & EX86_PREF_F2) - *inst++ = 0xf2; - if (flags & EX86_PREF_F3) - *inst++ = 0xf3; - if (flags & EX86_PREF_66) - *inst++ = 0x66; - if (rex) - *inst++ = rex; - buf_ptr = inst + size; - - /* Encode mod/rm byte. */ - if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) - *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if (a & SLJIT_IMM) - *buf_ptr = 0; - else if (!(flags & EX86_SSE2_OP1)) - *buf_ptr = reg_lmap[a] << 3; - else - *buf_ptr = freg_lmap[a] << 3; - } - else { - if (a & SLJIT_IMM) { - if (imma == 1) - *inst = GROUP_SHIFT_1; - else - *inst = GROUP_SHIFT_N; - } else - *inst = GROUP_SHIFT_CL; - *buf_ptr = 0; - } - - if (!(b & SLJIT_MEM)) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]); - else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { - if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { - if (immb <= 127 && immb >= -128) - *buf_ptr |= 0x40; - else - *buf_ptr |= 0x80; - } - - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) - *buf_ptr++ |= reg_lmap[b & REG_MASK]; - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); - } - - if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { - if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ - else { - sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_s32); - } - } - } - else { - if (reg_lmap[b & REG_MASK] == 5) - *buf_ptr |= 0x40; - *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); - if (reg_lmap[b & REG_MASK] == 5) - *buf_ptr++ = 0; - } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = 0x25; - sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ - buf_ptr += sizeof(sljit_s32); - } + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); - if (a & SLJIT_IMM) { - if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; - else if (flags & EX86_HALF_ARG) - sljit_unaligned_store_s16(buf_ptr, imma); - else if (!(flags & EX86_SHIFT_INS)) - sljit_unaligned_store_s32(buf_ptr, imma); - } + FAIL_IF(emit_stack_frame_release(compiler)); - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ @@ -609,43 +669,38 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 #ifndef _WIN64 -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) { sljit_s32 src = src_ptr ? (*src_ptr) : 0; sljit_s32 word_arg_count = 0; SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); - - compiler->mode32 = 0; + SLJIT_ASSERT(!(src & SLJIT_MEM)); /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) word_arg_count++; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count == 0) return SLJIT_SUCCESS; - if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); - *src_ptr = TMP_REG2; + if (word_arg_count >= 3) { + if (src == SLJIT_R2) + *src_ptr = TMP_REG1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); } - else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2) - *src_ptr = TMP_REG1; - if (word_arg_count >= 3) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); } #else -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) { sljit_s32 src = src_ptr ? (*src_ptr) : 0; sljit_s32 arg_count = 0; @@ -656,16 +711,16 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + SLJIT_ASSERT(!(src & SLJIT_MEM)); - compiler->mode32 = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; float_arg_count++; @@ -687,29 +742,23 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (!data_trandfer) return SLJIT_SUCCESS; - if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); - *src_ptr = TMP_REG2; - } - while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (arg_count != float_arg_count) - FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); arg_count--; float_arg_count--; break; - case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: if (arg_count != float_arg_count) - FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); arg_count--; float_arg_count--; break; @@ -721,7 +770,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -735,13 +784,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); - PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0)); + compiler->mode32 = 0; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, type); } @@ -752,7 +807,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); - FAIL_IF(call_with_args(compiler, arg_types, &src, srcw)); + compiler->mode32 = 0; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -770,10 +843,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { if (reg_map[dst] < 8) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); @@ -850,9 +919,6 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { @@ -903,16 +969,16 @@ static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) sljit_s32 tmp, size; /* Don't adjust shadow stack if it isn't enabled. */ - if (!cpu_has_shadow_stack ()) + if (!cpu_has_shadow_stack()) return SLJIT_SUCCESS; size = compiler->local_size; tmp = compiler->scratches; if (tmp >= SLJIT_FIRST_SAVED_REG) - size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * sizeof(sljit_uw); + size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw); tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; if (SLJIT_S0 >= tmp) - size += (SLJIT_S0 - tmp + 1) * sizeof(sljit_uw); + size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw); - return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c index 515d98aefd..c7dd9be8fd 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c @@ -65,6 +65,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 15 - R15 */ +#define TMP_FREG (0) + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ @@ -77,9 +79,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ if (p <= compiler->scratches) \ - w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ else \ - w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \ p = SLJIT_MEM1(SLJIT_SP); \ do; \ } @@ -115,11 +117,11 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { /* Args: xmm0-xmm3 */ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; /* low-map. freg_map & 0x7. */ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }; #define REX_W 0x48 @@ -143,7 +145,8 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #endif /* SLJIT_CONFIG_X86_32 */ -#define TMP_FREG (0) +#define U8(v) ((sljit_u8)(v)) + /* Size flags for emit_x86_instruction: */ #define EX86_BIN_INS 0x0010 @@ -205,12 +208,15 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define JMP_i32 0xe9 #define JMP_rm (/* GROUP_FF */ 4 << 3) #define LEA_r_m 0x8d +#define LOOP_i8 0xe2 #define MOV_r_rm 0x8b #define MOV_r_i32 0xb8 #define MOV_rm_r 0x89 #define MOV_rm_i32 0xc7 #define MOV_rm8_i8 0xc6 #define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 #define MOVSD_x_xm 0x10 #define MOVSD_xm_x 0x11 #define MOVSXD_r_rm 0x63 @@ -274,14 +280,12 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define MOD_REG 0xc0 #define MOD_DISP8 0x40 -#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) -#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) -#define POP_REG(r) (*inst++ = (POP_r + (r))) -#define RET() (*inst++ = (RET_near)) -#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) -/* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads @@ -371,7 +375,7 @@ static void get_cpu_features(void) cpu_has_cmov = (features >> 15) & 0x1; } -static sljit_u8 get_jump_code(sljit_s32 type) +static sljit_u8 get_jump_code(sljit_uw type) { switch (type) { case SLJIT_EQUAL: @@ -383,10 +387,12 @@ static sljit_u8 get_jump_code(sljit_s32 type) return 0x85 /* jne */; case SLJIT_LESS: + case SLJIT_CARRY: case SLJIT_LESS_F64: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: case SLJIT_GREATER_EQUAL_F64: return 0x83 /* jae */; @@ -434,14 +440,14 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else - label_addr = jump->u.target - executable_offset; + label_addr = jump->u.target - (sljit_uw)executable_offset; short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; @@ -463,7 +469,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code jump->addr++; } else if (short_jump) { - *code_ptr++ = get_jump_code(type) - 0x10; + *code_ptr++ = U8(get_jump_code(type) - 0x10); jump->addr++; } else { @@ -492,7 +498,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u8 *buf_end; sljit_u8 len; sljit_sw executable_offset; - sljit_sw jump_addr; + sljit_uw jump_addr; struct sljit_label *label; struct sljit_jump *jump; @@ -530,7 +536,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil switch (*buf_ptr) { case 0: label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; break; case 1: @@ -575,11 +581,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - jump_addr = jump->addr + executable_offset; + jump_addr = jump->addr + (sljit_uw)executable_offset; if (jump->flags & PATCH_MB) { SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); - *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); + *(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -600,7 +606,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); #endif jump = jump->next; @@ -626,7 +632,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = code_ptr - code; + compiler->executable_size = (sljit_uw)(code_ptr - code); code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -682,17 +688,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) -static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) -static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + do { \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + BINARY_IMM32(op_imm, immw, arg, argw); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } \ + } while (0) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else /* !SLJIT_CONFIG_X86_64 */ + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + BINARY_IMM32(op_imm, immw, arg, argw) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif /* SLJIT_CONFIG_X86_64 */ static sljit_s32 emit_mov(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, @@ -795,7 +824,7 @@ static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) } static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, - sljit_s32 src, sljit_sw srcw, sljit_s32 base, sljit_sw disp) + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) sljit_u8 *inst, *jz_after_cmp_inst; @@ -821,12 +850,6 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); #endif /* SLJIT_CONFIG_X86_32 */ - if (src == SLJIT_UNUSED) { - /* Return address is on stack. */ - src = SLJIT_MEM1(base); - srcw = disp; - } - /* Compare return address against TMP_REG1. */ FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); @@ -861,8 +884,6 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(base); - SLJIT_UNUSED_ARG(disp); #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ return SLJIT_SUCCESS; } @@ -879,8 +900,6 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, { sljit_u8* inst; - SLJIT_ASSERT(dst != SLJIT_UNUSED); - if (FAST_IS_REG(src)) { inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); FAIL_IF(!inst); @@ -890,14 +909,14 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else if (!compiler->mode32) { if (NOT_HALFWORD(srcw)) return emit_load_imm64(compiler, dst, srcw); } else - return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); #endif } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -938,7 +957,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile { sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_s32 size; + sljit_uw size; #endif CHECK_ERROR(); @@ -975,7 +994,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile && reg_map[SLJIT_R1] < 7 && reg_map[TMP_REG1] == 2); #endif - compiler->mode32 = op & SLJIT_I32_OP; + compiler->mode32 = op & SLJIT_32; #endif SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); @@ -1084,7 +1103,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ FAIL_IF(!inst); \ INC_SIZE(1); \ - *inst = (prefix); \ + *inst = U8(prefix); \ } while (0) static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, @@ -1104,7 +1123,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); FAIL_IF(!inst); @@ -1134,7 +1153,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else if (FAST_IS_REG(src) && reg_map[src] >= 4) { /* src, dst are registers. */ - SLJIT_ASSERT(SLOW_IS_REG(dst)); + SLJIT_ASSERT(FAST_IS_REG(dst)); if (reg_map[dst] < 4) { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); @@ -1193,7 +1212,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, } if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); @@ -1206,7 +1225,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, *inst = MOV_rm8_r8; if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); @@ -1267,7 +1286,7 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); FAIL_IF(!inst); @@ -1316,9 +1335,6 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, return SLJIT_SUCCESS; } - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); @@ -1343,9 +1359,6 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, { sljit_u8* inst; - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); @@ -1412,7 +1425,7 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); #else if (cpu_has_cmov) { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)); inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); FAIL_IF(!inst); @@ -1420,9 +1433,9 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, *inst = CMOVE_r_rm; } else - FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31))); + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31))); - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0); #endif FAIL_IF(!inst); @@ -1450,7 +1463,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); CHECK_EXTRA_REGS(src, srcw, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op_flags & SLJIT_I32_OP; + compiler->mode32 = op_flags & SLJIT_32; #endif op = GET_OPCODE(op); @@ -1465,8 +1478,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } - if (op_flags & SLJIT_I32_OP) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op_flags & SLJIT_32) { if (src & SLJIT_MEM) { if (op == SLJIT_MOV_S32) op = SLJIT_MOV_U32; @@ -1475,8 +1488,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (op == SLJIT_MOV_U32) op = SLJIT_MOV_S32; } -#endif } +#endif if (src & SLJIT_IMM) { switch (op) { @@ -1520,8 +1533,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif - FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + EMIT_MOV(compiler, dst, dstw, src, srcw); break; case SLJIT_MOV_U8: FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); @@ -1542,6 +1556,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV_S32: FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); break; + case SLJIT_MOV32: + compiler->mode32 = 1; + EMIT_MOV(compiler, dst, dstw, src, srcw); + compiler->mode32 = 0; + break; #endif } @@ -1558,9 +1577,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_not_with_flags(compiler, dst, dstw, src, srcw); return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); - case SLJIT_NEG: - return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); - case SLJIT_CLZ: return emit_clz(compiler, op_flags, dst, dstw, src, srcw); } @@ -1568,36 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - if (IS_HALFWORD(immw) || compiler->mode32) { \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); \ - } \ - else { \ - FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ - inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ - FAIL_IF(!inst); \ - *inst = (op_mr); \ - } - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) - -#else - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) - -#endif - static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, @@ -1605,23 +1591,10 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_u8 op_eax_imm = (op_types >> 24); - sljit_u8 op_rm = (op_types >> 16) & 0xff; - sljit_u8 op_mr = (op_types >> 8) & 0xff; - sljit_u8 op_imm = op_types & 0xff; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { @@ -1725,23 +1698,10 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_u8 op_eax_imm = (op_types >> 24); - sljit_u8 op_rm = (op_types >> 16) & 0xff; - sljit_u8 op_mr = (op_types >> 8) & 0xff; - sljit_u8 op_imm = op_types & 0xff; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { @@ -1810,9 +1770,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_s32 dst_r; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; /* Register destination. */ if (dst_r == src1 && !(src2 & SLJIT_IMM)) { @@ -1841,7 +1799,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_s8)src1w; + *inst = U8(src1w); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { @@ -1884,7 +1842,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_s8)src2w; + *inst = U8(src2w); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { @@ -2167,13 +2125,6 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; return SLJIT_SUCCESS; } - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); - FAIL_IF(!inst); - *inst |= mode; - return SLJIT_SUCCESS; - } if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); @@ -2206,7 +2157,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); } - else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { if (src1 != dst) EMIT_MOV(compiler, dst, 0, src1, src1w); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); @@ -2235,7 +2186,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #endif - if (dst != SLJIT_UNUSED) + if (dst != TMP_REG1) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } @@ -2273,7 +2224,7 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); if (FAST_IS_REG(dst)) - return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0); + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); return SLJIT_SUCCESS; } @@ -2283,7 +2234,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -2292,11 +2243,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile CHECK_EXTRA_REGS(src1, src1w, (void)0); CHECK_EXTRA_REGS(src2, src2w, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op & SLJIT_I32_OP; + compiler->mode32 = op & SLJIT_32; #endif - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; + SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); switch (GET_OPCODE(op)) { case SLJIT_ADD: @@ -2310,17 +2260,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_cum_binary(compiler, BINARY_OPCODE(ADC), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + if (src1 == SLJIT_IMM && src1w == 0) + return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); + if (!HAS_FLAGS(op)) { if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; - if (SLOW_IS_REG(dst) && src2 == dst) { + if (FAST_IS_REG(dst) && src2 == dst) { FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); } } - if (dst == SLJIT_UNUSED) - return emit_cmp_binary(compiler, src1, src1w, src2, src2w); return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: @@ -2329,8 +2280,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_MUL: return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: - if (dst == SLJIT_UNUSED) - return emit_test_binary(compiler, src1, src1w, src2, src2w); return emit_cum_binary(compiler, BINARY_OPCODE(AND), dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: @@ -2353,6 +2302,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + if (opcode == SLJIT_SUB) { + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + } + return emit_test_binary(compiler, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -2369,7 +2350,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp /* Don't adjust shadow stack if it isn't enabled. */ if (!cpu_has_shadow_stack ()) return SLJIT_SUCCESS; - return adjust_shadow_stack(compiler, src, srcw, SLJIT_UNUSED, 0); + return adjust_shadow_stack(compiler, src, srcw); case SLJIT_PREFETCH_L1: case SLJIT_PREFETCH_L2: case SLJIT_PREFETCH_L3: @@ -2401,7 +2382,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { sljit_u8 *inst; @@ -2420,13 +2401,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* --------------------------------------------------------------------- */ /* Alignment(3) + 4 * 16 bytes. */ -static sljit_s32 sse2_data[3 + (4 * 4)]; -static sljit_s32 *sse2_buffer; +static sljit_u32 sse2_data[3 + (4 * 4)]; +static sljit_u32 *sse2_buffer; static void init_compiler(void) { /* Align to 16 bytes. */ - sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); + sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); /* Single precision constants (each constant is 16 byte long). */ sse2_buffer[0] = 0x80000000; @@ -2486,7 +2467,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp compiler->mode32 = 0; #endif - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTTSD2SI_r_xm; @@ -2518,7 +2499,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp srcw = 0; } - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTSI2SD_x_rm; @@ -2527,7 +2508,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp compiler->mode32 = 1; #endif if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2536,11 +2517,11 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { if (!FAST_IS_REG(src1)) { - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); src1 = TMP_FREG; } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2558,11 +2539,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (GET_OPCODE(op) == SLJIT_MOV_F64) { if (FAST_IS_REG(dst)) - return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); + return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); if (FAST_IS_REG(src)) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); } if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { @@ -2571,41 +2552,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* We overwrite the high bits of source. From SLJIT point of view, this is not an issue. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ - FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0)); } else { - FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); src = TMP_FREG; } - FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0)); if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } if (FAST_IS_REG(dst)) { dst_r = dst; if (dst != src) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); } switch (GET_OPCODE(op)) { case SLJIT_NEG_F64: - FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8))); break; case SLJIT_ABS_F64: - FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12))); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2636,37 +2617,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil src2w = src1w; } else if (dst != src2) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_SUB_F64: - FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_MUL_F64: - FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_DIV_F64: - FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2708,7 +2689,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF_NULL(jump); - set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); type &= 0xff; /* Worst case size. */ @@ -2740,8 +2721,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (src == SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF_NULL(jump); - set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT)); - jump->u.target = srcw; + set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); + jump->u.target = (sljit_uw)srcw; /* Worst case size. */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -2764,7 +2745,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_FF; - *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; + *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); } return SLJIT_SUCCESS; } @@ -2790,7 +2771,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type &= 0xff; /* setcc = jcc + 0x10. */ - cond_set = get_jump_code(type) + 0x10; + cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { @@ -2802,9 +2783,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | reg_lmap[TMP_REG1]; - *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); + *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); *inst++ = OR_rm8_r8; - *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; + *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); return SLJIT_SUCCESS; } @@ -2822,7 +2803,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* The movzx instruction does not affect flags. */ *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; + *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); if (reg != TMP_REG1) return SLJIT_SUCCESS; @@ -2849,11 +2830,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* Set low byte to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; - *inst++ = MOD_REG | reg_map[dst]; + *inst++ = U8(MOD_REG | reg_map[dst]); *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; + *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); return SLJIT_SUCCESS; } @@ -2872,15 +2853,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; /* cmovcc = setcc - 0x50. */ - *inst++ = cond_set - 0x50; - *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; + *inst++ = U8(cond_set - 0x50); + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]); return SLJIT_SUCCESS; } inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); /* Set al to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; @@ -2888,8 +2869,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */); + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); return SLJIT_SUCCESS; } @@ -2901,13 +2882,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); /* Set low register to conditional flag. */ - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | 0 /* eax */; *inst++ = OR_rm8_r8; *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); @@ -2915,14 +2896,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co INC_SIZE(2 + 3 + 2 + 2); /* Set low register to conditional flag. */ *inst++ = XCHG_r_rm; - *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | 1 /* ecx */; *inst++ = OR_rm8_r8; *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; *inst++ = XCHG_r_rm; - *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); } return SLJIT_SUCCESS; } @@ -2931,7 +2912,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); /* Set al to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; @@ -2941,7 +2922,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = MOVZX_r_rm8; *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); if (GET_OPCODE(op) < SLJIT_ADD) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); @@ -2964,7 +2945,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); @@ -2977,8 +2958,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_EXTRA_REGS(src, srcw, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = dst_reg & SLJIT_I32_OP; - dst_reg &= ~SLJIT_I32_OP; + compiler->mode32 = dst_reg & SLJIT_32; + dst_reg &= ~SLJIT_32; #endif if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { @@ -2990,7 +2971,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; - *inst = get_jump_code(type & 0xff) - 0x40; + *inst = U8(get_jump_code(type & 0xff) - 0x40); return SLJIT_SUCCESS; } @@ -3123,9 +3104,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); + sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); #else - sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); + sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); #endif SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); } diff --git a/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c index 147175afa6..915411fbed 100644 --- a/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c @@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000 struct chunk_header { void *executable; @@ -194,7 +194,7 @@ static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) if (fd == -1) return NULL; - if (ftruncate(fd, size)) { + if (ftruncate(fd, (off_t)size)) { close(fd); return NULL; } @@ -281,7 +281,7 @@ struct free_block { #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)((base) + 1)) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size; diff --git a/thirdparty/pcre2/src/sljit/sljitUtils.c b/thirdparty/pcre2/src/sljit/sljitUtils.c index 9bce714735..967593b157 100644 --- a/thirdparty/pcre2/src/sljit/sljitUtils.c +++ b/thirdparty/pcre2/src/sljit/sljitUtils.c @@ -131,12 +131,12 @@ static SLJIT_INLINE int open_dev_zero(void) #ifdef _WIN32 -static SLJIT_INLINE sljit_sw get_page_alignment(void) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { SYSTEM_INFO si; - static sljit_sw sljit_page_align; + static sljit_uw sljit_page_align = 0; if (!sljit_page_align) { GetSystemInfo(&si); - sljit_page_align = si.dwPageSize - 1; + sljit_page_align = (sljit_uw)si.dwPageSize - 1; } return sljit_page_align; } @@ -145,18 +145,21 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) { #include <unistd.h> -static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align = -1; - if (sljit_page_align < 0) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + static sljit_uw sljit_page_align = 0; + + sljit_sw align; + + if (!sljit_page_align) { #ifdef _SC_PAGESIZE - sljit_page_align = sysconf(_SC_PAGESIZE); + align = sysconf(_SC_PAGESIZE); #else - sljit_page_align = getpagesize(); + align = getpagesize(); #endif /* Should never happen. */ - if (sljit_page_align < 0) - sljit_page_align = 4096; - sljit_page_align--; + if (align < 0) + align = 4096; + sljit_page_align = (sljit_uw)align - 1; } return sljit_page_align; } @@ -227,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *st SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) { SLJIT_UNUSED_ARG(allocator_data); - munmap((void*)stack->min_start, stack->end - stack->min_start); + munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start)); SLJIT_FREE(stack, allocator_data); } @@ -237,7 +240,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj { struct sljit_stack *stack; void *ptr; - sljit_sw page_align; + sljit_uw page_align; SLJIT_UNUSED_ARG(allocator_data); @@ -295,7 +298,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st #if defined _WIN32 || defined(POSIX_MADV_DONTNEED) sljit_uw aligned_old_start; sljit_uw aligned_new_start; - sljit_sw page_align; + sljit_uw page_align; #endif if ((new_start < stack->min_start) || (new_start >= stack->end)) |