diff options
Diffstat (limited to 'thirdparty/embree/common/sys/intrinsics.h')
-rw-r--r-- | thirdparty/embree/common/sys/intrinsics.h | 124 |
1 files changed, 74 insertions, 50 deletions
diff --git a/thirdparty/embree/common/sys/intrinsics.h b/thirdparty/embree/common/sys/intrinsics.h index ed8dd7d40a..2c2f6eccda 100644 --- a/thirdparty/embree/common/sys/intrinsics.h +++ b/thirdparty/embree/common/sys/intrinsics.h @@ -13,6 +13,9 @@ #include "../simd/arm/emulation.h" #else #include <immintrin.h> +#if defined(__EMSCRIPTEN__) +#include "../simd/wasm/emulation.h" +#endif #endif #if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER) @@ -24,24 +27,26 @@ #endif #endif -#if defined(__LZCNT__) +#if defined(__aarch64__) #if !defined(_lzcnt_u32) - #define _lzcnt_u32 __lzcnt32 + #define _lzcnt_u32 __builtin_clz #endif - #if !defined(_lzcnt_u64) - #define _lzcnt_u64 __lzcnt64 +#else + #if defined(__LZCNT__) + #if !defined(_lzcnt_u32) + #define _lzcnt_u32 __lzcnt32 + #endif + #if !defined(_lzcnt_u64) + #define _lzcnt_u64 __lzcnt64 + #endif #endif #endif #if defined(__WIN32__) -// -- GODOT start -- -#if !defined(NOMINMAX) -// -- GODOT end -- -#define NOMINMAX -// -- GODOT start -- -#endif -#include "windows.h" -// -- GODOT end -- +# if !defined(NOMINMAX) +# define NOMINMAX +# endif +# include <windows.h> #endif /* normally defined in pmmintrin.h, but we always need this */ @@ -69,7 +74,7 @@ namespace embree } __forceinline int bsf(int v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return _tzcnt_u32(v); #else unsigned long r = 0; _BitScanForward(&r,v); return r; @@ -77,7 +82,7 @@ namespace embree } __forceinline unsigned bsf(unsigned v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return _tzcnt_u32(v); #else unsigned long r = 0; _BitScanForward(&r,v); return r; @@ -118,7 +123,7 @@ namespace embree #endif __forceinline int bsr(int v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return 31 - _lzcnt_u32(v); #else unsigned long r = 0; _BitScanReverse(&r,v); return r; @@ -126,7 +131,7 @@ namespace embree } __forceinline unsigned bsr(unsigned v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return 31 - _lzcnt_u32(v); #else unsigned long r = 0; _BitScanReverse(&r,v); return r; @@ -145,7 +150,7 @@ namespace embree __forceinline int lzcnt(const int x) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return _lzcnt_u32(x); #else if (unlikely(x == 0)) return 32; @@ -214,15 +219,26 @@ namespace embree #elif defined(__X86_ASM__) __forceinline void __cpuid(int out[4], int op) { - asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); +#if defined(__ARM_NEON) + if (op == 0) { // Get CPU name + out[0] = 0x41524d20; + out[1] = 0x41524d20; + out[2] = 0x41524d20; + out[3] = 0x41524d20; + } +#else + asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); +#endif } - + +#if !defined(__ARM_NEON) __forceinline void __cpuid_count(int out[4], int op1, int op2) { asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); } - #endif - + +#endif + __forceinline uint64_t read_tsc() { #if defined(__X86_ASM__) uint32_t high,low; @@ -235,30 +251,38 @@ namespace embree } __forceinline int bsf(int v) { -#if defined(__AVX2__) +#if defined(__ARM_NEON) + return __builtin_ctz(v); +#else +#if defined(__AVX2__) return _tzcnt_u32(v); #elif defined(__X86_ASM__) int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_ctz(v); #endif +#endif } #if defined(__64BIT__) __forceinline unsigned bsf(unsigned v) { -#if defined(__AVX2__) +#if defined(__ARM_NEON) + return __builtin_ctz(v); +#else +#if defined(__AVX2__) return _tzcnt_u32(v); #elif defined(__X86_ASM__) unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; #else return __builtin_ctz(v); #endif +#endif } #endif __forceinline size_t bsf(size_t v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) #if defined(__X86_64__) return _tzcnt_u64(v); #else @@ -295,7 +319,7 @@ namespace embree } __forceinline int bsr(int v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return 31 - _lzcnt_u32(v); #elif defined(__X86_ASM__) int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; @@ -304,7 +328,7 @@ namespace embree #endif } -#if defined(__64BIT__) +#if defined(__64BIT__) || defined(__EMSCRIPTEN__) __forceinline unsigned bsr(unsigned v) { #if defined(__AVX2__) return 31 - _lzcnt_u32(v); @@ -317,7 +341,7 @@ namespace embree #endif __forceinline size_t bsr(size_t v) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) #if defined(__X86_64__) return 63 - _lzcnt_u64(v); #else @@ -332,7 +356,7 @@ namespace embree __forceinline int lzcnt(const int x) { -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) return _lzcnt_u32(x); #else if (unlikely(x == 0)) return 32; @@ -341,18 +365,18 @@ namespace embree } __forceinline size_t blsr(size_t v) { -#if defined(__AVX2__) -#if defined(__INTEL_COMPILER) +#if defined(__AVX2__) && !defined(__aarch64__) + #if defined(__INTEL_COMPILER) return _blsr_u64(v); + #else + #if defined(__X86_64__) + return __blsr_u64(v); + #else + return __blsr_u32(v); + #endif + #endif #else -#if defined(__X86_64__) - return __blsr_u64(v); -#else - return __blsr_u32(v); -#endif -#endif -#else - return v & (v-1); + return v & (v-1); #endif } @@ -368,7 +392,7 @@ namespace embree #if defined(__X86_ASM__) int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; #else - return (v | (v << i)); + return (v | (1 << i)); #endif } @@ -376,7 +400,7 @@ namespace embree #if defined(__X86_ASM__) int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; #else - return (v & ~(v << i)); + return (v & ~(1 << i)); #endif } @@ -392,7 +416,7 @@ namespace embree #if defined(__X86_ASM__) size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; #else - return (v | (v << i)); + return (v | (1 << i)); #endif } @@ -400,7 +424,7 @@ namespace embree #if defined(__X86_ASM__) size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; #else - return (v & ~(v << i)); + return (v & ~(1 << i)); #endif } @@ -435,8 +459,8 @@ namespace embree #endif #endif -#if defined(__SSE4_2__) - +#if defined(__SSE4_2__) || defined(__ARM_NEON) + __forceinline int popcnt(int in) { return _mm_popcnt_u32(in); } @@ -483,14 +507,14 @@ namespace embree #endif } - __forceinline void prefetchL1EX(const void* ptr) { - prefetchEX(ptr); + __forceinline void prefetchL1EX(const void* ptr) { + prefetchEX(ptr); } - - __forceinline void prefetchL2EX(const void* ptr) { - prefetchEX(ptr); + + __forceinline void prefetchL2EX(const void* ptr) { + prefetchEX(ptr); } -#if defined(__AVX2__) +#if defined(__AVX2__) && !defined(__aarch64__) __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); } __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); } #if defined(__X86_64__) |