summaryrefslogtreecommitdiff
path: root/thirdparty/embree/common/algorithms/parallel_prefix_sum.h
diff options
context:
space:
mode:
authorRĂ©mi Verschelde <remi@verschelde.fr>2021-05-21 18:30:02 +0200
committerGitHub <noreply@github.com>2021-05-21 18:30:02 +0200
commit3ee034451a9349e7de26decc662afefd7ab8c460 (patch)
treea8bec3fbb06c2eaca05a075f5ffe2cdd2d94f04a /thirdparty/embree/common/algorithms/parallel_prefix_sum.h
parent8fa07eae145e1e37eb8708ce8c117188b58e3ecc (diff)
parent767e374dced69b45db0afb30ca2ccf0bbbeef672 (diff)
Merge pull request #48885 from JFonS/upgrade_embree
Upgrade Embree to the latest official release (3.13.0).
Diffstat (limited to 'thirdparty/embree/common/algorithms/parallel_prefix_sum.h')
-rw-r--r--thirdparty/embree/common/algorithms/parallel_prefix_sum.h85
1 files changed, 85 insertions, 0 deletions
diff --git a/thirdparty/embree/common/algorithms/parallel_prefix_sum.h b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h
new file mode 100644
index 0000000000..208bb4e480
--- /dev/null
+++ b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h
@@ -0,0 +1,85 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "parallel_for.h"
+
+namespace embree
+{
+ template<typename Value>
+ struct ParallelPrefixSumState
+ {
+ enum { MAX_TASKS = 64 };
+ Value counts[MAX_TASKS];
+ Value sums [MAX_TASKS];
+ };
+
+ template<typename Index, typename Value, typename Func, typename Reduction>
+ __forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
+ {
+ /* calculate number of tasks to use */
+ const size_t numThreads = TaskScheduler::threadCount();
+ const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
+ const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
+
+ /* perform parallel prefix sum */
+ parallel_for(taskCount, [&](const size_t taskIndex)
+ {
+ const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
+ const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
+ state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
+ });
+
+ /* calculate prefix sum */
+ Value sum=identity;
+ for (size_t i=0; i<taskCount; i++)
+ {
+ const Value c = state.counts[i];
+ state.sums[i] = sum;
+ sum=reduction(sum,c);
+ }
+
+ return sum;
+ }
+
+ /*! parallel calculation of prefix sums */
+ template<typename SrcArray, typename DstArray, typename Value, typename Add>
+ __forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
+ {
+ /* perform single threaded prefix operation for small N */
+ if (N < SINGLE_THREAD_THRESHOLD)
+ {
+ Value sum=identity;
+ for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
+ return sum;
+ }
+
+ /* perform parallel prefix operation for large N */
+ else
+ {
+ ParallelPrefixSumState<Value> state;
+
+ /* initial run just sets up start values for subtasks */
+ parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
+
+ Value s = identity;
+ for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
+ return s;
+
+ }, add);
+
+ /* final run calculates prefix sum */
+ return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
+
+ Value s = identity;
+ for (size_t i=r.begin(); i<r.end(); i++) {
+ dst[i] = add(sum,s);
+ s = add(s,src[i]);
+ }
+ return s;
+
+ }, add);
+ }
+ }
+}