summaryrefslogtreecommitdiff
path: root/thirdparty/embree/common/sys/intrinsics.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/sys/intrinsics.h')
-rw-r--r--thirdparty/embree/common/sys/intrinsics.h124
1 files changed, 74 insertions, 50 deletions
diff --git a/thirdparty/embree/common/sys/intrinsics.h b/thirdparty/embree/common/sys/intrinsics.h
index ed8dd7d40a..2c2f6eccda 100644
--- a/thirdparty/embree/common/sys/intrinsics.h
+++ b/thirdparty/embree/common/sys/intrinsics.h
@@ -13,6 +13,9 @@
#include "../simd/arm/emulation.h"
#else
#include <immintrin.h>
+#if defined(__EMSCRIPTEN__)
+#include "../simd/wasm/emulation.h"
+#endif
#endif
#if defined(__BMI__) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
@@ -24,24 +27,26 @@
#endif
#endif
-#if defined(__LZCNT__)
+#if defined(__aarch64__)
#if !defined(_lzcnt_u32)
- #define _lzcnt_u32 __lzcnt32
+ #define _lzcnt_u32 __builtin_clz
#endif
- #if !defined(_lzcnt_u64)
- #define _lzcnt_u64 __lzcnt64
+#else
+ #if defined(__LZCNT__)
+ #if !defined(_lzcnt_u32)
+ #define _lzcnt_u32 __lzcnt32
+ #endif
+ #if !defined(_lzcnt_u64)
+ #define _lzcnt_u64 __lzcnt64
+ #endif
#endif
#endif
#if defined(__WIN32__)
-// -- GODOT start --
-#if !defined(NOMINMAX)
-// -- GODOT end --
-#define NOMINMAX
-// -- GODOT start --
-#endif
-#include "windows.h"
-// -- GODOT end --
+# if !defined(NOMINMAX)
+# define NOMINMAX
+# endif
+# include <windows.h>
#endif
/* normally defined in pmmintrin.h, but we always need this */
@@ -69,7 +74,7 @@ namespace embree
}
__forceinline int bsf(int v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return _tzcnt_u32(v);
#else
unsigned long r = 0; _BitScanForward(&r,v); return r;
@@ -77,7 +82,7 @@ namespace embree
}
__forceinline unsigned bsf(unsigned v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return _tzcnt_u32(v);
#else
unsigned long r = 0; _BitScanForward(&r,v); return r;
@@ -118,7 +123,7 @@ namespace embree
#endif
__forceinline int bsr(int v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#else
unsigned long r = 0; _BitScanReverse(&r,v); return r;
@@ -126,7 +131,7 @@ namespace embree
}
__forceinline unsigned bsr(unsigned v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#else
unsigned long r = 0; _BitScanReverse(&r,v); return r;
@@ -145,7 +150,7 @@ namespace embree
__forceinline int lzcnt(const int x)
{
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return _lzcnt_u32(x);
#else
if (unlikely(x == 0)) return 32;
@@ -214,15 +219,26 @@ namespace embree
#elif defined(__X86_ASM__)
__forceinline void __cpuid(int out[4], int op) {
- asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
+#if defined(__ARM_NEON)
+ if (op == 0) { // Get CPU name
+ out[0] = 0x41524d20;
+ out[1] = 0x41524d20;
+ out[2] = 0x41524d20;
+ out[3] = 0x41524d20;
+ }
+#else
+ asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
+#endif
}
-
+
+#if !defined(__ARM_NEON)
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
}
-
#endif
-
+
+#endif
+
__forceinline uint64_t read_tsc() {
#if defined(__X86_ASM__)
uint32_t high,low;
@@ -235,30 +251,38 @@ namespace embree
}
__forceinline int bsf(int v) {
-#if defined(__AVX2__)
+#if defined(__ARM_NEON)
+ return __builtin_ctz(v);
+#else
+#if defined(__AVX2__)
return _tzcnt_u32(v);
#elif defined(__X86_ASM__)
int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_ctz(v);
#endif
+#endif
}
#if defined(__64BIT__)
__forceinline unsigned bsf(unsigned v)
{
-#if defined(__AVX2__)
+#if defined(__ARM_NEON)
+ return __builtin_ctz(v);
+#else
+#if defined(__AVX2__)
return _tzcnt_u32(v);
#elif defined(__X86_ASM__)
unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r;
#else
return __builtin_ctz(v);
#endif
+#endif
}
#endif
__forceinline size_t bsf(size_t v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
#if defined(__X86_64__)
return _tzcnt_u64(v);
#else
@@ -295,7 +319,7 @@ namespace embree
}
__forceinline int bsr(int v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return 31 - _lzcnt_u32(v);
#elif defined(__X86_ASM__)
int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r;
@@ -304,7 +328,7 @@ namespace embree
#endif
}
-#if defined(__64BIT__)
+#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
__forceinline unsigned bsr(unsigned v) {
#if defined(__AVX2__)
return 31 - _lzcnt_u32(v);
@@ -317,7 +341,7 @@ namespace embree
#endif
__forceinline size_t bsr(size_t v) {
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
#if defined(__X86_64__)
return 63 - _lzcnt_u64(v);
#else
@@ -332,7 +356,7 @@ namespace embree
__forceinline int lzcnt(const int x)
{
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
return _lzcnt_u32(x);
#else
if (unlikely(x == 0)) return 32;
@@ -341,18 +365,18 @@ namespace embree
}
__forceinline size_t blsr(size_t v) {
-#if defined(__AVX2__)
-#if defined(__INTEL_COMPILER)
+#if defined(__AVX2__) && !defined(__aarch64__)
+ #if defined(__INTEL_COMPILER)
return _blsr_u64(v);
+ #else
+ #if defined(__X86_64__)
+ return __blsr_u64(v);
+ #else
+ return __blsr_u32(v);
+ #endif
+ #endif
#else
-#if defined(__X86_64__)
- return __blsr_u64(v);
-#else
- return __blsr_u32(v);
-#endif
-#endif
-#else
- return v & (v-1);
+ return v & (v-1);
#endif
}
@@ -368,7 +392,7 @@ namespace embree
#if defined(__X86_ASM__)
int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
- return (v | (v << i));
+ return (v | (1 << i));
#endif
}
@@ -376,7 +400,7 @@ namespace embree
#if defined(__X86_ASM__)
int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
- return (v & ~(v << i));
+ return (v & ~(1 << i));
#endif
}
@@ -392,7 +416,7 @@ namespace embree
#if defined(__X86_ASM__)
size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
- return (v | (v << i));
+ return (v | (1 << i));
#endif
}
@@ -400,7 +424,7 @@ namespace embree
#if defined(__X86_ASM__)
size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r;
#else
- return (v & ~(v << i));
+ return (v & ~(1 << i));
#endif
}
@@ -435,8 +459,8 @@ namespace embree
#endif
#endif
-#if defined(__SSE4_2__)
-
+#if defined(__SSE4_2__) || defined(__ARM_NEON)
+
__forceinline int popcnt(int in) {
return _mm_popcnt_u32(in);
}
@@ -483,14 +507,14 @@ namespace embree
#endif
}
- __forceinline void prefetchL1EX(const void* ptr) {
- prefetchEX(ptr);
+ __forceinline void prefetchL1EX(const void* ptr) {
+ prefetchEX(ptr);
}
-
- __forceinline void prefetchL2EX(const void* ptr) {
- prefetchEX(ptr);
+
+ __forceinline void prefetchL2EX(const void* ptr) {
+ prefetchEX(ptr);
}
-#if defined(__AVX2__)
+#if defined(__AVX2__) && !defined(__aarch64__)
__forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); }
__forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); }
#if defined(__X86_64__)