summaryrefslogtreecommitdiff
path: root/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h')
-rw-r--r--thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h85
1 files changed, 0 insertions, 85 deletions
diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h b/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h
deleted file mode 100644
index 117c7a79b0..0000000000
--- a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2009-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-
-#include "parallel_for.h"
-
-namespace embree
-{
- template<typename Value>
- struct ParallelPrefixSumState
- {
- enum { MAX_TASKS = 64 };
- Value counts[MAX_TASKS];
- Value sums [MAX_TASKS];
- };
-
- template<typename Index, typename Value, typename Func, typename Reduction>
- __forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
- {
- /* calculate number of tasks to use */
- const size_t numThreads = TaskScheduler::threadCount();
- const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
- const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
-
- /* perform parallel prefix sum */
- parallel_for(taskCount, [&](const size_t taskIndex)
- {
- const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
- const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
- state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
- });
-
- /* calculate prefix sum */
- Value sum=identity;
- for (size_t i=0; i<taskCount; i++)
- {
- const Value c = state.counts[i];
- state.sums[i] = sum;
- sum=reduction(sum,c);
- }
-
- return sum;
- }
-
- /*! parallel calculation of prefix sums */
- template<typename SrcArray, typename DstArray, typename Value, typename Add>
- __forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
- {
- /* perform single threaded prefix operation for small N */
- if (N < SINGLE_THREAD_THRESHOLD)
- {
- Value sum=identity;
- for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
- return sum;
- }
-
- /* perform parallel prefix operation for large N */
- else
- {
- ParallelPrefixSumState<Value> state;
-
- /* initial run just sets up start values for subtasks */
- parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
-
- Value s = identity;
- for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
- return s;
-
- }, add);
-
- /* final run calculates prefix sum */
- return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
-
- Value s = identity;
- for (size_t i=r.begin(); i<r.end(); i++) {
- dst[i] = add(sum,s);
- s = add(s,src[i]);
- }
- return s;
-
- }, add);
- }
- }
-}