summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
diff options
context:
space:
mode:
authorjfons <joan.fonssanchez@gmail.com>2021-05-20 12:49:33 +0200
committerjfons <joan.fonssanchez@gmail.com>2021-05-21 17:00:24 +0200
commit767e374dced69b45db0afb30ca2ccf0bbbeef672 (patch)
treea712cecc2c8cc2c6d6ecdc4a50020d423ddb4c0c /thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
parent42b6602f1d4b108cecb94b94c0d2b645acaebd4f (diff)
Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/geometry/grid_soa.h')
-rw-r--r--thirdparty/embree-aarch64/kernels/geometry/grid_soa.h275
1 files changed, 0 insertions, 275 deletions
diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
deleted file mode 100644
index d3b275586c..0000000000
--- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h
+++ /dev/null
@@ -1,275 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "../common/ray.h"
-#include "../common/scene_subdiv_mesh.h"
-#include "../bvh/bvh.h"
-#include "../subdiv/tessellation.h"
-#include "../subdiv/tessellation_cache.h"
-#include "subdivpatch1.h"
-
-namespace embree
-{
- namespace isa
- {
- class GridSOA
- {
- public:
-
- /*! GridSOA constructor */
- GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
- const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
- const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
-
- /*! Subgrid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
- unsigned x0, unsigned x1, unsigned y0, unsigned y1,
- const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- const unsigned width = x1-x0+1;
- const unsigned height = y1-y0+1;
- const GridRange range(0,width-1,0,height-1);
- size_t bvhBytes = 0;
- if (time_steps == 1)
- bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
- else {
- bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
- bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
- }
- const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
- size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
-#if !defined(__X86_64__) && !defined(__aarch64__)
- rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
-#endif
- void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
- assert(data);
- return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
- }
-
- /*! Grid creation */
- template<typename Allocator>
- static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
- const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
- {
- return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
- }
-
- /*! returns reference to root */
- __forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
- __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
-
- /*! returns pointer to BVH array */
- __forceinline int8_t* bvhData() { return &data[0]; }
- __forceinline const int8_t* bvhData() const { return &data[0]; }
-
- /*! returns pointer to Grid array */
- __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
- __forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
-
- __forceinline void* encodeLeaf(size_t u, size_t v) {
- return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
- }
- __forceinline float* decodeLeaf(size_t t, const void* ptr) {
- return gridData(t) + (((size_t) (ptr) >> 4) - 1);
- }
-
- /*! returns the size of the BVH over the grid in bytes */
- static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
-
- /*! returns the size of the temporal BVH over the time range BVHs */
- static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
-
- /*! calculates bounding box of grid range */
- __forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
- {
- const float* const grid_array = gridData(time);
- const float* const grid_x_array = grid_array + 0 * dim_offset;
- const float* const grid_y_array = grid_array + 1 * dim_offset;
- const float* const grid_z_array = grid_array + 2 * dim_offset;
-
- /* compute the bounds just for the range! */
- BBox3fa bounds( empty );
- for (unsigned v = range.v_start; v<=range.v_end; v++)
- {
- for (unsigned u = range.u_start; u<=range.u_end; u++)
- {
- const float x = grid_x_array[ v * width + u];
- const float y = grid_y_array[ v * width + u];
- const float z = grid_z_array[ v * width + u];
- bounds.extend( Vec3fa(x,y,z) );
- }
- }
- assert(is_finite(bounds));
- return bounds;
- }
-
- /*! Evaluates grid over patch and builds BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
-
- /*! Create BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
-
- /*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
-
- /*! Create MBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
-
- /*! Create MSMBlur BVH4 tree over grid. */
- std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
-
- template<typename Loader>
- struct MapUV
- {
- typedef typename Loader::vfloat vfloat;
- const float* const grid_uv;
- size_t line_offset;
- size_t lines;
-
- __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
- : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
-
- __forceinline void operator() (vfloat& u, vfloat& v) const {
- const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
- const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
- const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
- const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
- const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
- u = uv[0];v = uv[1];
- }
- };
-
- struct Gather2x3
- {
- enum { M = 4 };
- typedef vbool4 vbool;
- typedef vint4 vint;
- typedef vfloat4 vfloat;
-
- static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- if (unlikely(line_offset == 2))
- {
- r0 = shuffle<0,1,1,1>(r0);
- r1 = shuffle<0,1,1,1>(r1);
- }
- return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf4& v0_o,
- Vec3vf4& v1_o,
- Vec3vf4& v2_o)
- {
- const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-
-#if defined (__AVX__)
- struct Gather3x3
- {
- enum { M = 8 };
- typedef vbool8 vbool;
- typedef vint8 vint;
- typedef vfloat8 vfloat;
-
- static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
- {
- vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
- vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
- vfloat4 rc;
- if (likely(lines > 2))
- rc = vfloat4::loadu(grid + 2*line_offset);
- else
- rc = rb;
-
- if (unlikely(line_offset == 2))
- {
- ra = shuffle<0,1,1,1>(ra);
- rb = shuffle<0,1,1,1>(rb);
- rc = shuffle<0,1,1,1>(rc);
- }
-
- const vfloat8 r0 = vfloat8(ra,rb);
- const vfloat8 r1 = vfloat8(rb,rc);
- return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
- shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
- shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
- }
-
- static __forceinline void gather(const float* const grid_x,
- const float* const grid_y,
- const float* const grid_z,
- const size_t line_offset,
- const size_t lines,
- Vec3vf8& v0_o,
- Vec3vf8& v1_o,
- Vec3vf8& v2_o)
- {
- const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
- const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
- const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
- v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
- v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
- v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
- }
- };
-#endif
-
- template<typename vfloat>
- static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
- {
- typedef typename vfloat::Int vint;
- const vint iu = asInt(uv) & 0xffff;
- const vint iv = srl(asInt(uv),16);
- const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
- const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
- return Vec2<vfloat>(u,v);
- }
-
- __forceinline unsigned int geomID() const {
- return _geomID;
- }
-
- __forceinline unsigned int primID() const {
- return _primID;
- }
-
- public:
- BVH4::NodeRef troot;
-#if !defined(__X86_64__) && !defined(__aarch64__)
- unsigned align1;
-#endif
- unsigned time_steps;
- unsigned width;
-
- unsigned height;
- unsigned dim_offset;
- unsigned _geomID;
- unsigned _primID;
-
- unsigned align2;
- unsigned gridOffset;
- unsigned gridBytes;
- unsigned rootOffset;
-
- int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
- };
- }
-}