1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../math/math.h"
/* include SSE wrapper classes */
#if defined(__SSE__) || defined(__ARM_NEON)
# include "sse.h"
#endif
/* include AVX wrapper classes */
#if defined(__AVX__)
# include "avx.h"
#endif
/* include AVX512 wrapper classes */
#if defined (__AVX512F__)
# include "avx512.h"
#endif
namespace embree
{
template <int N>
__forceinline vbool<N> isfinite(const vfloat<N>& v)
{
return (v >= vfloat<N>(-std::numeric_limits<float>::max()))
& (v <= vfloat<N>( std::numeric_limits<float>::max()));
}
/* foreach unique */
template<typename vbool, typename vint, typename Closure>
__forceinline void foreach_unique(const vbool& valid0, const vint& vi, const Closure& closure)
{
vbool valid1 = valid0;
while (any(valid1)) {
const int j = int(bsf(movemask(valid1)));
const int i = vi[j];
const vbool valid2 = valid1 & (i == vi);
valid1 = andn(valid1, valid2);
closure(valid2, i);
}
}
/* returns the next unique value i in vi and the corresponding valid_i mask */
template<typename vbool, typename vint>
__forceinline int next_unique(vbool& valid, const vint& vi, /*out*/ vbool& valid_i)
{
assert(any(valid));
const int j = int(bsf(movemask(valid)));
const int i = vi[j];
valid_i = valid & (i == vi);
valid = andn(valid, valid_i);
return i;
}
/* foreach unique index */
template<typename vbool, typename vint, typename Closure>
__forceinline void foreach_unique_index(const vbool& valid0, const vint& vi, const Closure& closure)
{
vbool valid1 = valid0;
while (any(valid1)) {
const int j = int(bsf(movemask(valid1)));
const int i = vi[j];
const vbool valid2 = valid1 & (i == vi);
valid1 = andn(valid1, valid2);
closure(valid2, i, j);
}
}
/* returns the index of the next unique value i in vi and the corresponding valid_i mask */
template<typename vbool, typename vint>
__forceinline int next_unique_index(vbool& valid, const vint& vi, /*out*/ vbool& valid_i)
{
assert(any(valid));
const int j = int(bsf(movemask(valid)));
const int i = vi[j];
valid_i = valid & (i == vi);
valid = andn(valid, valid_i);
return j;
}
template<typename Closure>
__forceinline void foreach2(int x0, int x1, int y0, int y1, const Closure& closure)
{
__aligned(64) int U[2*VSIZEX];
__aligned(64) int V[2*VSIZEX];
int index = 0;
for (int y=y0; y<y1; y++) {
const bool lasty = y+1>=y1;
const vintx vy = y;
for (int x=x0; x<x1; ) { //x+=VSIZEX) {
const bool lastx = x+VSIZEX >= x1;
vintx vx = x+vintx(step);
vintx::storeu(&U[index], vx);
vintx::storeu(&V[index], vy);
const int dx = min(x1-x,VSIZEX);
index += dx;
x += dx;
if (index >= VSIZEX || (lastx && lasty)) {
const vboolx valid = vintx(step) < vintx(index);
closure(valid, vintx::load(U), vintx::load(V));
x-= max(0, index-VSIZEX);
index = 0;
}
}
}
}
}
|