diff options
Diffstat (limited to 'thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h')
-rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h | 85 |
1 files changed, 0 insertions, 85 deletions
diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h b/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h deleted file mode 100644 index 117c7a79b0..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "parallel_for.h" - -namespace embree -{ - template<typename Value> - struct ParallelPrefixSumState - { - enum { MAX_TASKS = 64 }; - Value counts[MAX_TASKS]; - Value sums [MAX_TASKS]; - }; - - template<typename Index, typename Value, typename Func, typename Reduction> - __forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction) - { - /* calculate number of tasks to use */ - const size_t numThreads = TaskScheduler::threadCount(); - const size_t numBlocks = (last-first+minStepSize-1)/minStepSize; - const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS)); - - /* perform parallel prefix sum */ - parallel_for(taskCount, [&](const size_t taskIndex) - { - const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount; - const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount; - state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]); - }); - - /* calculate prefix sum */ - Value sum=identity; - for (size_t i=0; i<taskCount; i++) - { - const Value c = state.counts[i]; - state.sums[i] = sum; - sum=reduction(sum,c); - } - - return sum; - } - - /*! parallel calculation of prefix sums */ - template<typename SrcArray, typename DstArray, typename Value, typename Add> - __forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096) - { - /* perform single threaded prefix operation for small N */ - if (N < SINGLE_THREAD_THRESHOLD) - { - Value sum=identity; - for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum; - return sum; - } - - /* perform parallel prefix operation for large N */ - else - { - ParallelPrefixSumState<Value> state; - - /* initial run just sets up start values for subtasks */ - parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value { - - Value s = identity; - for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]); - return s; - - }, add); - - /* final run calculates prefix sum */ - return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value { - - Value s = identity; - for (size_t i=r.begin(); i<r.end(); i++) { - dst[i] = add(sum,s); - s = add(s,src[i]); - } - return s; - - }, add); - } - } -} |