summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/kernels/common/profile.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/kernels/common/profile.h')
-rw-r--r--thirdparty/embree-aarch64/kernels/common/profile.h159
1 files changed, 159 insertions, 0 deletions
diff --git a/thirdparty/embree-aarch64/kernels/common/profile.h b/thirdparty/embree-aarch64/kernels/common/profile.h
new file mode 100644
index 0000000000..a7de36414d
--- /dev/null
+++ b/thirdparty/embree-aarch64/kernels/common/profile.h
@@ -0,0 +1,159 @@
+// Copyright 2009-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "default.h"
+
+namespace embree
+{
+ /*! helper structure for the implementation of the profile functions below */
+ struct ProfileTimer
+ {
+ static const size_t N = 20;
+
+ ProfileTimer () {}
+
+ ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
+ {
+ for (size_t i=0; i<N; i++) names[i] = nullptr;
+ for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
+ for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
+ for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
+ for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
+ }
+
+ __forceinline void begin()
+ {
+ j=0;
+ t0 = tj = getSeconds();
+ }
+
+ __forceinline void end() {
+ absolute("total");
+ i++;
+ }
+
+ __forceinline void operator() (const char* name) {
+ relative(name);
+ }
+
+ __forceinline void absolute (const char* name)
+ {
+ const double t1 = getSeconds();
+ const double dt = t1-t0;
+ assert(names[j] == nullptr || names[j] == name);
+ names[j] = name;
+ if (i == 0) dt_fst[j] = dt;
+ if (i>=numSkip) {
+ dt_min[j] = min(dt_min[j],dt);
+ dt_avg[j] = dt_avg[j] + dt;
+ dt_max[j] = max(dt_max[j],dt);
+ }
+ j++;
+ maxJ = max(maxJ,j);
+ }
+
+ __forceinline void relative (const char* name)
+ {
+ const double t1 = getSeconds();
+ const double dt = t1-tj;
+ tj = t1;
+ assert(names[j] == nullptr || names[j] == name);
+ names[j] = name;
+ if (i == 0) dt_fst[j] = dt;
+ if (i>=numSkip) {
+ dt_min[j] = min(dt_min[j],dt);
+ dt_avg[j] = dt_avg[j] + dt;
+ dt_max[j] = max(dt_max[j],dt);
+ }
+ j++;
+ maxJ = max(maxJ,j);
+ }
+
+ void print(size_t numElements)
+ {
+ for (size_t k=0; k<N; k++)
+ dt_avg[k] /= double(i-numSkip);
+
+ printf(" profile [M/s]:\n");
+ for (size_t j=0; j<maxJ; j++)
+ printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
+ names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
+
+ printf(" profile [ms]:\n");
+ for (size_t j=0; j<maxJ; j++)
+ printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
+ names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
+ }
+
+ void print()
+ {
+ printf(" profile:\n");
+
+ for (size_t k=0; k<N; k++)
+ dt_avg[k] /= double(i-numSkip);
+
+ for (size_t j=0; j<maxJ; j++) {
+ printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
+ names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
+ }
+ }
+
+ double avg() {
+ return dt_avg[maxJ-1]/double(i-numSkip);
+ }
+
+ private:
+ size_t i;
+ size_t j;
+ size_t maxJ;
+ size_t numSkip;
+ double t0;
+ double tj;
+ const char* names[N];
+ double dt_fst[N];
+ double dt_min[N];
+ double dt_avg[N];
+ double dt_max[N];
+ };
+
+ /*! This function executes some code block multiple times and measured sections of it.
+ Use the following way:
+
+ profile(1,10,1000,[&](ProfileTimer& timer) {
+ // code
+ timer("A");
+ // code
+ timer("B");
+ });
+ */
+ template<typename Closure>
+ void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
+ {
+ ProfileTimer timer(numSkip);
+
+ for (size_t i=0; i<numSkip+numIter; i++)
+ {
+ timer.begin();
+ closure(timer);
+ timer.end();
+ }
+ timer.print(numElements);
+ }
+
+ /*! similar as the function above, but the timer object comes externally */
+ template<typename Closure>
+ void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
+ {
+ timer = ProfileTimer(numSkip);
+
+ for (size_t i=0; i<numSkip+numIter; i++)
+ {
+ timer.begin();
+ closure(timer);
+ timer.end();
+ }
+ timer.print(numElements);
+ }
+}