summaryrefslogtreecommitdiff
path: root/thirdparty/embree/common/simd/vfloat16_avx512.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/simd/vfloat16_avx512.h')
-rw-r--r--thirdparty/embree/common/simd/vfloat16_avx512.h5
1 files changed, 3 insertions, 2 deletions
diff --git a/thirdparty/embree/common/simd/vfloat16_avx512.h b/thirdparty/embree/common/simd/vfloat16_avx512.h
index 9f1e2459c4..75c471cc0c 100644
--- a/thirdparty/embree/common/simd/vfloat16_avx512.h
+++ b/thirdparty/embree/common/simd/vfloat16_avx512.h
@@ -177,9 +177,10 @@ namespace embree
__forceinline vfloat16 abs (const vfloat16& a) { return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(a),_mm512_set1_epi32(0x7FFFFFFF))); }
__forceinline vfloat16 signmsk(const vfloat16& a) { return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(a),_mm512_set1_epi32(0x80000000))); }
- __forceinline vfloat16 rcp(const vfloat16& a) {
+ __forceinline vfloat16 rcp(const vfloat16& a)
+ {
const vfloat16 r = _mm512_rcp14_ps(a);
- return _mm512_mul_ps(r, _mm512_fnmadd_ps(r, a, vfloat16(2.0f)));
+ return _mm512_fmadd_ps(r, _mm512_fnmadd_ps(a, r, vfloat16(1.0)), r); // computes r + r * (1 - a*r)
}
__forceinline vfloat16 sqr (const vfloat16& a) { return _mm512_mul_ps(a,a); }