summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h')
-rw-r--r--thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h449
1 files changed, 449 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h b/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h
new file mode 100644
index 0000000000..9769bc17bd
--- /dev/null
+++ b/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h
@@ -0,0 +1,449 @@
+// Copyright 2009-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "catmullclark_patch.h"
+#include "bspline_curve.h"
+
+namespace embree
+{
+ template<typename Vertex, typename Vertex_t = Vertex>
+ class __aligned(64) BSplinePatchT
+ {
+ typedef CatmullClark1RingT<Vertex,Vertex_t> CatmullClarkRing;
+ typedef CatmullClarkPatchT<Vertex,Vertex_t> CatmullClarkPatch;
+
+ public:
+
+ __forceinline BSplinePatchT () {}
+
+ __forceinline BSplinePatchT (const CatmullClarkPatch& patch) {
+ init(patch);
+ }
+
+ __forceinline BSplinePatchT(const CatmullClarkPatch& patch,
+ const BezierCurveT<Vertex>* border0,
+ const BezierCurveT<Vertex>* border1,
+ const BezierCurveT<Vertex>* border2,
+ const BezierCurveT<Vertex>* border3)
+ {
+ init(patch);
+ }
+
+ __forceinline BSplinePatchT (const HalfEdge* edge, const char* vertices, size_t stride) {
+ init(edge,vertices,stride);
+ }
+
+ __forceinline Vertex hard_corner(const Vertex& v01, const Vertex& v02,
+ const Vertex& v10, const Vertex& v11, const Vertex& v12,
+ const Vertex& v20, const Vertex& v21, const Vertex& v22)
+ {
+ return 4.0f*v11 - 2.0f*(v12+v21) + v22;
+ }
+
+ __forceinline Vertex soft_convex_corner( const Vertex& v01, const Vertex& v02,
+ const Vertex& v10, const Vertex& v11, const Vertex& v12,
+ const Vertex& v20, const Vertex& v21, const Vertex& v22)
+ {
+ return -8.0f*v11 + 4.0f*(v12+v21) + v22;
+ }
+
+ __forceinline Vertex convex_corner(const float vertex_crease_weight,
+ const Vertex& v01, const Vertex& v02,
+ const Vertex& v10, const Vertex& v11, const Vertex& v12,
+ const Vertex& v20, const Vertex& v21, const Vertex& v22)
+ {
+ if (std::isinf(vertex_crease_weight)) return hard_corner(v01,v02,v10,v11,v12,v20,v21,v22);
+ else return soft_convex_corner(v01,v02,v10,v11,v12,v20,v21,v22);
+ }
+
+ __forceinline Vertex load(const HalfEdge* edge, const char* vertices, size_t stride) {
+ return Vertex_t::loadu(vertices+edge->getStartVertexIndex()*stride);
+ }
+
+ __forceinline void init_border(const CatmullClarkRing& edge0,
+ Vertex& v01, Vertex& v02,
+ const Vertex& v11, const Vertex& v12,
+ const Vertex& v21, const Vertex& v22)
+ {
+ if (likely(edge0.has_opposite_back(0)))
+ {
+ v01 = edge0.back(2);
+ v02 = edge0.back(1);
+ } else {
+ v01 = 2.0f*v11-v21;
+ v02 = 2.0f*v12-v22;
+ }
+ }
+
+ __forceinline void init_corner(const CatmullClarkRing& edge0,
+ Vertex& v00, const Vertex& v01, const Vertex& v02,
+ const Vertex& v10, const Vertex& v11, const Vertex& v12,
+ const Vertex& v20, const Vertex& v21, const Vertex& v22)
+ {
+ const bool MAYBE_UNUSED has_back1 = edge0.has_opposite_back(1);
+ const bool has_back0 = edge0.has_opposite_back(0);
+ const bool has_front1 = edge0.has_opposite_front(1);
+ const bool MAYBE_UNUSED has_front2 = edge0.has_opposite_front(2);
+
+ if (likely(has_back0)) {
+ if (likely(has_front1)) { assert(has_back1 && has_front2); v00 = edge0.back(3); }
+ else { assert(!has_back1); v00 = 2.0f*v01-v02; }
+ }
+ else {
+ if (likely(has_front1)) { assert(!has_front2); v00 = 2.0f*v10-v20; }
+ else v00 = convex_corner(edge0.vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
+ }
+ }
+
+ void init(const CatmullClarkPatch& patch)
+ {
+ /* fill inner vertices */
+ const Vertex v11 = v[1][1] = patch.ring[0].vtx;
+ const Vertex v12 = v[1][2] = patch.ring[1].vtx;
+ const Vertex v22 = v[2][2] = patch.ring[2].vtx;
+ const Vertex v21 = v[2][1] = patch.ring[3].vtx;
+
+ /* fill border vertices */
+ init_border(patch.ring[0],v[0][1],v[0][2],v11,v12,v21,v22);
+ init_border(patch.ring[1],v[1][3],v[2][3],v12,v22,v11,v21);
+ init_border(patch.ring[2],v[3][2],v[3][1],v22,v21,v12,v11);
+ init_border(patch.ring[3],v[2][0],v[1][0],v21,v11,v22,v12);
+
+ /* fill corner vertices */
+ init_corner(patch.ring[0],v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
+ init_corner(patch.ring[1],v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
+ init_corner(patch.ring[2],v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
+ init_corner(patch.ring[3],v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
+ }
+
+ void init_border(const HalfEdge* edge0, const char* vertices, size_t stride,
+ Vertex& v01, Vertex& v02,
+ const Vertex& v11, const Vertex& v12,
+ const Vertex& v21, const Vertex& v22)
+ {
+ if (likely(edge0->hasOpposite()))
+ {
+ const HalfEdge* e = edge0->opposite()->next()->next();
+ v01 = load(e,vertices,stride);
+ v02 = load(e->next(),vertices,stride);
+ } else {
+ v01 = 2.0f*v11-v21;
+ v02 = 2.0f*v12-v22;
+ }
+ }
+
+ void init_corner(const HalfEdge* edge0, const char* vertices, size_t stride,
+ Vertex& v00, const Vertex& v01, const Vertex& v02,
+ const Vertex& v10, const Vertex& v11, const Vertex& v12,
+ const Vertex& v20, const Vertex& v21, const Vertex& v22)
+ {
+ const bool has_back0 = edge0->hasOpposite();
+ const bool has_front1 = edge0->prev()->hasOpposite();
+
+ if (likely(has_back0))
+ {
+ const HalfEdge* e = edge0->opposite()->next();
+ if (likely(has_front1))
+ {
+ assert(e->hasOpposite());
+ assert(edge0->prev()->opposite()->prev()->hasOpposite());
+ v00 = load(e->opposite()->prev(),vertices,stride);
+ }
+ else {
+ assert(!e->hasOpposite());
+ v00 = 2.0f*v01-v02;
+ }
+ }
+ else
+ {
+ if (likely(has_front1)) {
+ assert(!edge0->prev()->opposite()->prev()->hasOpposite());
+ v00 = 2.0f*v10-v20;
+ }
+ else {
+ assert(edge0->vertex_crease_weight == 0.0f || std::isinf(edge0->vertex_crease_weight));
+ v00 = convex_corner(edge0->vertex_crease_weight,v01,v02,v10,v11,v12,v20,v21,v22);
+ }
+ }
+ }
+
+ void init(const HalfEdge* edge0, const char* vertices, size_t stride)
+ {
+ assert( edge0->isRegularFace() );
+
+ /* fill inner vertices */
+ const Vertex v11 = v[1][1] = load(edge0,vertices,stride); const HalfEdge* edge1 = edge0->next();
+ const Vertex v12 = v[1][2] = load(edge1,vertices,stride); const HalfEdge* edge2 = edge1->next();
+ const Vertex v22 = v[2][2] = load(edge2,vertices,stride); const HalfEdge* edge3 = edge2->next();
+ const Vertex v21 = v[2][1] = load(edge3,vertices,stride); assert(edge0 == edge3->next());
+
+ /* fill border vertices */
+ init_border(edge0,vertices,stride,v[0][1],v[0][2],v11,v12,v21,v22);
+ init_border(edge1,vertices,stride,v[1][3],v[2][3],v12,v22,v11,v21);
+ init_border(edge2,vertices,stride,v[3][2],v[3][1],v22,v21,v12,v11);
+ init_border(edge3,vertices,stride,v[2][0],v[1][0],v21,v11,v22,v12);
+
+ /* fill corner vertices */
+ init_corner(edge0,vertices,stride,v[0][0],v[0][1],v[0][2],v[1][0],v11,v12,v[2][0],v21,v22);
+ init_corner(edge1,vertices,stride,v[0][3],v[1][3],v[2][3],v[0][2],v12,v22,v[0][1],v11,v21);
+ init_corner(edge2,vertices,stride,v[3][3],v[3][2],v[3][1],v[2][3],v22,v21,v[1][3],v12,v11);
+ init_corner(edge3,vertices,stride,v[3][0],v[2][0],v[1][0],v[3][1],v21,v11,v[3][2],v22,v12);
+ }
+
+ __forceinline BBox<Vertex> bounds() const
+ {
+ const Vertex* const cv = &v[0][0];
+ BBox<Vertex> bounds (cv[0]);
+ for (size_t i=1; i<16 ; i++)
+ bounds.extend( cv[i] );
+ return bounds;
+ }
+
+ __forceinline Vertex eval(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::eval(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::eval(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex eval_du(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::eval(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::derivative(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex eval_dv(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::derivative(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::eval(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex eval_dudu(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::eval(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::derivative2(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex eval_dvdv(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::derivative2(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::eval(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex eval_dudv(const float uu, const float vv) const
+ {
+ const Vec4f v_n = BSplineBasis::derivative(vv);
+ const Vertex_t curve0 = madd(v_n[0],v[0][0],madd(v_n[1],v[1][0],madd(v_n[2],v[2][0],v_n[3] * v[3][0])));
+ const Vertex_t curve1 = madd(v_n[0],v[0][1],madd(v_n[1],v[1][1],madd(v_n[2],v[2][1],v_n[3] * v[3][1])));
+ const Vertex_t curve2 = madd(v_n[0],v[0][2],madd(v_n[1],v[1][2],madd(v_n[2],v[2][2],v_n[3] * v[3][2])));
+ const Vertex_t curve3 = madd(v_n[0],v[0][3],madd(v_n[1],v[1][3],madd(v_n[2],v[2][3],v_n[3] * v[3][3])));
+
+ const Vec4f u_n = BSplineBasis::derivative(uu);
+ return madd(u_n[0],curve0,madd(u_n[1],curve1,madd(u_n[2],curve2,u_n[3] * curve3)));
+ }
+
+ __forceinline Vertex normal(const float uu, const float vv) const
+ {
+ const Vertex tu = eval_du(uu,vv);
+ const Vertex tv = eval_dv(uu,vv);
+ return cross(tu,tv);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval(const T& uu, const T& vv, const Vec4<T>& u_n, const Vec4<T>& v_n) const
+ {
+ const T curve0_x = madd(v_n[0],T(v[0][0].x),madd(v_n[1],T(v[1][0].x),madd(v_n[2],T(v[2][0].x),v_n[3] * T(v[3][0].x))));
+ const T curve1_x = madd(v_n[0],T(v[0][1].x),madd(v_n[1],T(v[1][1].x),madd(v_n[2],T(v[2][1].x),v_n[3] * T(v[3][1].x))));
+ const T curve2_x = madd(v_n[0],T(v[0][2].x),madd(v_n[1],T(v[1][2].x),madd(v_n[2],T(v[2][2].x),v_n[3] * T(v[3][2].x))));
+ const T curve3_x = madd(v_n[0],T(v[0][3].x),madd(v_n[1],T(v[1][3].x),madd(v_n[2],T(v[2][3].x),v_n[3] * T(v[3][3].x))));
+ const T x = madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
+
+ const T curve0_y = madd(v_n[0],T(v[0][0].y),madd(v_n[1],T(v[1][0].y),madd(v_n[2],T(v[2][0].y),v_n[3] * T(v[3][0].y))));
+ const T curve1_y = madd(v_n[0],T(v[0][1].y),madd(v_n[1],T(v[1][1].y),madd(v_n[2],T(v[2][1].y),v_n[3] * T(v[3][1].y))));
+ const T curve2_y = madd(v_n[0],T(v[0][2].y),madd(v_n[1],T(v[1][2].y),madd(v_n[2],T(v[2][2].y),v_n[3] * T(v[3][2].y))));
+ const T curve3_y = madd(v_n[0],T(v[0][3].y),madd(v_n[1],T(v[1][3].y),madd(v_n[2],T(v[2][3].y),v_n[3] * T(v[3][3].y))));
+ const T y = madd(u_n[0],curve0_y,madd(u_n[1],curve1_y,madd(u_n[2],curve2_y,u_n[3] * curve3_y)));
+
+ const T curve0_z = madd(v_n[0],T(v[0][0].z),madd(v_n[1],T(v[1][0].z),madd(v_n[2],T(v[2][0].z),v_n[3] * T(v[3][0].z))));
+ const T curve1_z = madd(v_n[0],T(v[0][1].z),madd(v_n[1],T(v[1][1].z),madd(v_n[2],T(v[2][1].z),v_n[3] * T(v[3][1].z))));
+ const T curve2_z = madd(v_n[0],T(v[0][2].z),madd(v_n[1],T(v[1][2].z),madd(v_n[2],T(v[2][2].z),v_n[3] * T(v[3][2].z))));
+ const T curve3_z = madd(v_n[0],T(v[0][3].z),madd(v_n[1],T(v[1][3].z),madd(v_n[2],T(v[2][3].z),v_n[3] * T(v[3][3].z))));
+ const T z = madd(u_n[0],curve0_z,madd(u_n[1],curve1_z,madd(u_n[2],curve2_z,u_n[3] * curve3_z)));
+
+ return Vec3<T>(x,y,z);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::eval(uu);
+ const Vec4<T> v_n = BSplineBasis::eval(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval_du(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::derivative(uu);
+ const Vec4<T> v_n = BSplineBasis::eval(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval_dv(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::eval(uu);
+ const Vec4<T> v_n = BSplineBasis::derivative(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval_dudu(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::derivative2(uu);
+ const Vec4<T> v_n = BSplineBasis::eval(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval_dvdv(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::eval(uu);
+ const Vec4<T> v_n = BSplineBasis::derivative2(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> eval_dudv(const T& uu, const T& vv) const
+ {
+ const Vec4<T> u_n = BSplineBasis::derivative(uu);
+ const Vec4<T> v_n = BSplineBasis::derivative(vv);
+ return eval(uu,vv,u_n,v_n);
+ }
+
+ template<typename T>
+ __forceinline Vec3<T> normal(const T& uu, const T& vv) const {
+ return cross(eval_du(uu,vv),eval_dv(uu,vv));
+ }
+
+ void eval(const float u, const float v,
+ Vertex* P, Vertex* dPdu, Vertex* dPdv, Vertex* ddPdudu, Vertex* ddPdvdv, Vertex* ddPdudv,
+ const float dscale = 1.0f) const
+ {
+ if (P) {
+ *P = eval(u,v);
+ }
+ if (dPdu) {
+ assert(dPdu); *dPdu = eval_du(u,v)*dscale;
+ assert(dPdv); *dPdv = eval_dv(u,v)*dscale;
+ }
+ if (ddPdudu) {
+ assert(ddPdudu); *ddPdudu = eval_dudu(u,v)*sqr(dscale);
+ assert(ddPdvdv); *ddPdvdv = eval_dvdv(u,v)*sqr(dscale);
+ assert(ddPdudv); *ddPdudv = eval_dudv(u,v)*sqr(dscale);
+ }
+ }
+
+ template<class vfloat>
+ __forceinline vfloat eval(const size_t i, const vfloat& uu, const vfloat& vv, const Vec4<vfloat>& u_n, const Vec4<vfloat>& v_n) const
+ {
+ const vfloat curve0_x = madd(v_n[0],vfloat(v[0][0][i]),madd(v_n[1],vfloat(v[1][0][i]),madd(v_n[2],vfloat(v[2][0][i]),v_n[3] * vfloat(v[3][0][i]))));
+ const vfloat curve1_x = madd(v_n[0],vfloat(v[0][1][i]),madd(v_n[1],vfloat(v[1][1][i]),madd(v_n[2],vfloat(v[2][1][i]),v_n[3] * vfloat(v[3][1][i]))));
+ const vfloat curve2_x = madd(v_n[0],vfloat(v[0][2][i]),madd(v_n[1],vfloat(v[1][2][i]),madd(v_n[2],vfloat(v[2][2][i]),v_n[3] * vfloat(v[3][2][i]))));
+ const vfloat curve3_x = madd(v_n[0],vfloat(v[0][3][i]),madd(v_n[1],vfloat(v[1][3][i]),madd(v_n[2],vfloat(v[2][3][i]),v_n[3] * vfloat(v[3][3][i]))));
+ return madd(u_n[0],curve0_x,madd(u_n[1],curve1_x,madd(u_n[2],curve2_x,u_n[3] * curve3_x)));
+ }
+
+ template<typename vbool, typename vfloat>
+ void eval(const vbool& valid, const vfloat& uu, const vfloat& vv,
+ float* P, float* dPdu, float* dPdv, float* ddPdudu, float* ddPdvdv, float* ddPdudv,
+ const float dscale, const size_t dstride, const size_t N) const
+ {
+ if (P) {
+ const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,P+i*dstride,eval(i,uu,vv,u_n,v_n));
+ }
+ if (dPdu)
+ {
+ {
+ assert(dPdu);
+ const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,dPdu+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
+ }
+ {
+ assert(dPdv);
+ const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,dPdv+i*dstride,eval(i,uu,vv,u_n,v_n)*dscale);
+ }
+ }
+ if (ddPdudu)
+ {
+ {
+ assert(ddPdudu);
+ const Vec4<vfloat> u_n = BSplineBasis::derivative2(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::eval(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudu+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
+ }
+ {
+ assert(ddPdvdv);
+ const Vec4<vfloat> u_n = BSplineBasis::eval(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::derivative2(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdvdv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
+ }
+ {
+ assert(ddPdudv);
+ const Vec4<vfloat> u_n = BSplineBasis::derivative(uu);
+ const Vec4<vfloat> v_n = BSplineBasis::derivative(vv);
+ for (size_t i=0; i<N; i++) vfloat::store(valid,ddPdudv+i*dstride,eval(i,uu,vv,u_n,v_n)*sqr(dscale));
+ }
+ }
+ }
+
+ friend __forceinline embree_ostream operator<<(embree_ostream o, const BSplinePatchT& p)
+ {
+ for (size_t y=0; y<4; y++)
+ for (size_t x=0; x<4; x++)
+ o << "[" << y << "][" << x << "] " << p.v[y][x] << embree_endl;
+ return o;
+ }
+
+ public:
+ Vertex v[4][4];
+ };
+
+ typedef BSplinePatchT<Vec3fa,Vec3fa_t> BSplinePatch3fa;
+}