From 767e374dced69b45db0afb30ca2ccf0bbbeef672 Mon Sep 17 00:00:00 2001 From: jfons Date: Thu, 20 May 2021 12:49:33 +0200 Subject: Upgrade Embree to the latest official release. Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library. --- thirdparty/embree/kernels/bvh/bvh_node_qaabb.h | 265 +++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 thirdparty/embree/kernels/bvh/bvh_node_qaabb.h (limited to 'thirdparty/embree/kernels/bvh/bvh_node_qaabb.h') diff --git a/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h b/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h new file mode 100644 index 0000000000..2afc8c98e7 --- /dev/null +++ b/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h @@ -0,0 +1,265 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "bvh_node_base.h" + +namespace embree +{ + /*! BVHN Quantized Node */ + template + struct __aligned(8) QuantizedBaseNode_t + { + typedef unsigned char T; + static const T MIN_QUAN = 0; + static const T MAX_QUAN = 255; + + /*! Clears the node. */ + __forceinline void clear() { + for (size_t i=0; i &lower, + const vfloat &upper, + T lower_quant[N], + T upper_quant[N], + float &start, + float &scale) + { + /* quantize bounds */ + const vbool m_valid = lower != vfloat(pos_inf); + const float minF = reduce_min(lower); + const float maxF = reduce_max(upper); + float diff = (1.0f+2.0f*float(ulp))*(maxF - minF); + float decode_scale = diff / float(MAX_QUAN); + if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero + assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF); + const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f; + vint ilower = max(vint(floor((lower - vfloat(minF))*vfloat(encode_scale))),MIN_QUAN); + vint iupper = min(vint(ceil ((upper - vfloat(minF))*vfloat(encode_scale))),MAX_QUAN); + + /* lower/upper correction */ + vbool m_lower_correction = (madd(vfloat(ilower),decode_scale,minF)) > lower; + vbool m_upper_correction = (madd(vfloat(iupper),decode_scale,minF)) < upper; + ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN); + iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN); + + /* disable invalid lanes */ + ilower = select(m_valid,ilower,MAX_QUAN); + iupper = select(m_valid,iupper,MIN_QUAN); + + /* store as uchar to memory */ + vint::store(lower_quant,ilower); + vint::store(upper_quant,iupper); + start = minF; + scale = decode_scale; + +#if defined(DEBUG) + vfloat extract_lower( vint::loadu(lower_quant) ); + vfloat extract_upper( vint::loadu(upper_quant) ); + vfloat final_extract_lower = madd(extract_lower,decode_scale,minF); + vfloat final_extract_upper = madd(extract_upper,decode_scale,minF); + assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid)); + assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid)); +#endif + } + + __forceinline void init_dim(AABBNode_t,N>& node) + { + init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x); + init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y); + init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z); + } + + __forceinline vbool validMask() const { return vint::loadu(lower_x) <= vint::loadu(upper_x); } + +#if defined(__AVX512F__) // KNL + __forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); } +#endif + __forceinline vfloat dequantizeLowerX() const { return madd(vfloat(vint::loadu(lower_x)),scale.x,vfloat(start.x)); } + + __forceinline vfloat dequantizeUpperX() const { return madd(vfloat(vint::loadu(upper_x)),scale.x,vfloat(start.x)); } + + __forceinline vfloat dequantizeLowerY() const { return madd(vfloat(vint::loadu(lower_y)),scale.y,vfloat(start.y)); } + + __forceinline vfloat dequantizeUpperY() const { return madd(vfloat(vint::loadu(upper_y)),scale.y,vfloat(start.y)); } + + __forceinline vfloat dequantizeLowerZ() const { return madd(vfloat(vint::loadu(lower_z)),scale.z,vfloat(start.z)); } + + __forceinline vfloat dequantizeUpperZ() const { return madd(vfloat(vint::loadu(upper_z)),scale.z,vfloat(start.z)); } + + template + __forceinline vfloat dequantize(const size_t offset) const { return vfloat(vint::loadu(all_planes+offset)); } + +#if defined(__AVX512F__) + __forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); } + __forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); } + __forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); } +#endif + + union { + struct { + T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children + T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children + T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children + T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children + T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children + T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children + }; + T all_planes[6*N]; + }; + + Vec3f start; + Vec3f scale; + + friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n) + { + o << "QuantizedBaseNode { " << embree_endl; + o << " start " << n.start << embree_endl; + o << " scale " << n.scale << embree_endl; + o << " lower_x " << vuint::loadu(n.lower_x) << embree_endl; + o << " upper_x " << vuint::loadu(n.upper_x) << embree_endl; + o << " lower_y " << vuint::loadu(n.lower_y) << embree_endl; + o << " upper_y " << vuint::loadu(n.upper_y) << embree_endl; + o << " lower_z " << vuint::loadu(n.lower_z) << embree_endl; + o << " upper_z " << vuint::loadu(n.upper_z) << embree_endl; + o << "}" << embree_endl; + return o; + } + + }; + + template + struct __aligned(8) QuantizedNode_t : public BaseNode_t, QuantizedBaseNode_t + { + using BaseNode_t::children; + using QuantizedBaseNode_t::lower_x; + using QuantizedBaseNode_t::upper_x; + using QuantizedBaseNode_t::lower_y; + using QuantizedBaseNode_t::upper_y; + using QuantizedBaseNode_t::lower_z; + using QuantizedBaseNode_t::upper_z; + using QuantizedBaseNode_t::start; + using QuantizedBaseNode_t::scale; + using QuantizedBaseNode_t::init_dim; + + __forceinline void setRef(size_t i, const NodeRef& ref) { + assert(i < N); + children[i] = ref; + } + + struct Create2 + { + template + __forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const + { + __aligned(64) AABBNode_t node; + node.clear(); + for (size_t i=0; iinit(node); + + return (size_t)qnode | NodeRef::tyQuantizedNode; + } + }; + + struct Set2 + { + template + __forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const + { + QuantizedNode_t* node = ref.quantizedNode(); + for (size_t i=0; isetRef(i,children[i]); + return ref; + } + }; + + __forceinline void init(AABBNode_t& node) + { + for (size_t i=0;i + struct __aligned(8) QuantizedBaseNodeMB_t + { + QuantizedBaseNode_t node0; + QuantizedBaseNode_t node1; + + /*! Clears the node. */ + __forceinline void clear() { + node0.clear(); + node1.clear(); + } + + /*! Returns bounds of specified child. */ + __forceinline BBox3fa bounds(size_t i) const + { + assert(i < N); + BBox3fa bounds0 = node0.bounds(i); + BBox3fa bounds1 = node1.bounds(i); + bounds0.extend(bounds1); + return bounds0; + } + + /*! Returns extent of bounds of specified child. */ + __forceinline Vec3fa extent(size_t i) const { + return bounds(i).size(); + } + + __forceinline vbool validMask() const { return node0.validMask(); } + + template + __forceinline vfloat dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); } + template + __forceinline vfloat dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); } + template + __forceinline vfloat dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); } + template + __forceinline vfloat dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); } + template + __forceinline vfloat dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); } + template + __forceinline vfloat dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); } + + + template + __forceinline vfloat dequantizeLowerX(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeLowerX()[i]),vfloat(node1.dequantizeLowerX()[i]),t); } + template + __forceinline vfloat dequantizeUpperX(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeUpperX()[i]),vfloat(node1.dequantizeUpperX()[i]),t); } + template + __forceinline vfloat dequantizeLowerY(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeLowerY()[i]),vfloat(node1.dequantizeLowerY()[i]),t); } + template + __forceinline vfloat dequantizeUpperY(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeUpperY()[i]),vfloat(node1.dequantizeUpperY()[i]),t); } + template + __forceinline vfloat dequantizeLowerZ(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeLowerZ()[i]),vfloat(node1.dequantizeLowerZ()[i]),t); } + template + __forceinline vfloat dequantizeUpperZ(const size_t i, const vfloat &t) const { return lerp(vfloat(node0.dequantizeUpperZ()[i]),vfloat(node1.dequantizeUpperZ()[i]),t); } + + }; +} -- cgit v1.2.3