diff options
Diffstat (limited to 'thirdparty/embree/kernels/subdiv/tessellation_cache.h')
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/tessellation_cache.h | 325 | 
1 files changed, 325 insertions, 0 deletions
diff --git a/thirdparty/embree/kernels/subdiv/tessellation_cache.h b/thirdparty/embree/kernels/subdiv/tessellation_cache.h new file mode 100644 index 0000000000..99edf49be4 --- /dev/null +++ b/thirdparty/embree/kernels/subdiv/tessellation_cache.h @@ -0,0 +1,325 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/default.h" + +/* force a complete cache invalidation when running out of allocation space */ +#define FORCE_SIMPLE_FLUSH 0 + +#define THREAD_BLOCK_ATOMIC_ADD 4 + +#if defined(DEBUG) +#define CACHE_STATS(x)  +#else +#define CACHE_STATS(x)  +#endif + +namespace embree +{ +  class SharedTessellationCacheStats +  { +  public: +    /* stats */ +    static std::atomic<size_t> cache_accesses; +    static std::atomic<size_t> cache_hits; +    static std::atomic<size_t> cache_misses; +    static std::atomic<size_t> cache_flushes;                 +    static size_t        cache_num_patches; +    __aligned(64) static SpinLock mtx; +     +    /* print stats for debugging */                  +    static void printStats(); +    static void clearStats(); +  }; +   +  void resizeTessellationCache(size_t new_size); +  void resetTessellationCache(); +   + //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// + + struct __aligned(64) ThreadWorkState  + { +   ALIGNED_STRUCT_(64); + +   std::atomic<size_t> counter; +   ThreadWorkState* next; +   bool allocated; + +   __forceinline ThreadWorkState(bool allocated = false)  +     : counter(0), next(nullptr), allocated(allocated)  +   { +     assert( ((size_t)this % 64) == 0 );  +   }    + }; + + class __aligned(64) SharedLazyTessellationCache  + { + public: +    +   static const size_t NUM_CACHE_SEGMENTS              = 8; +   static const size_t NUM_PREALLOC_THREAD_WORK_STATES = 512; +   static const size_t COMMIT_INDEX_SHIFT              = 32+8; +#if defined(__64BIT__) +   static const size_t REF_TAG_MASK                    = 0xffffffffff; +#else +   static const size_t REF_TAG_MASK                    = 0x7FFFFFFF; +#endif +   static const size_t MAX_TESSELLATION_CACHE_SIZE     = REF_TAG_MASK+1; +   static const size_t BLOCK_SIZE                      = 64; +    + +    /*! Per thread tessellation ref cache */ +   static __thread ThreadWorkState* init_t_state; +   static ThreadWorkState* current_t_state; +    +   static __forceinline ThreadWorkState *threadState()  +   { +     if (unlikely(!init_t_state)) +       /* sets init_t_state, can't return pointer due to macosx icc bug*/ +       SharedLazyTessellationCache::sharedLazyTessellationCache.getNextRenderThreadWorkState(); +     return init_t_state; +   } + +   struct Tag +   { +     __forceinline Tag() : data(0) {} + +     __forceinline Tag(void* ptr, size_t combinedTime) {  +       init(ptr,combinedTime); +     } + +     __forceinline Tag(size_t ptr, size_t combinedTime) { +       init((void*)ptr,combinedTime);  +     } + +     __forceinline void init(void* ptr, size_t combinedTime) +     { +       if (ptr == nullptr) { +         data = 0; +         return; +       } +       int64_t new_root_ref = (int64_t) ptr; +       new_root_ref -= (int64_t)SharedLazyTessellationCache::sharedLazyTessellationCache.getDataPtr();                                 +       assert( new_root_ref <= (int64_t)REF_TAG_MASK ); +       new_root_ref |= (int64_t)combinedTime << COMMIT_INDEX_SHIFT;  +       data = new_root_ref; +     } + +     __forceinline int64_t get() const { return data.load(); } +     __forceinline void set( int64_t v ) { data.store(v); } +     __forceinline void reset() { data.store(0); } + +   private: +     atomic<int64_t> data; +   }; + +   static __forceinline size_t extractCommitIndex(const int64_t v) { return v >> SharedLazyTessellationCache::COMMIT_INDEX_SHIFT; } + +   struct CacheEntry +   { +     Tag tag; +     SpinLock mutex; +   }; + + private: + +   float *data; +   bool hugepages; +   size_t size; +   size_t maxBlocks; +   ThreadWorkState *threadWorkState; +       +   __aligned(64) std::atomic<size_t> localTime; +   __aligned(64) std::atomic<size_t> next_block; +   __aligned(64) SpinLock   reset_state; +   __aligned(64) SpinLock   linkedlist_mtx; +   __aligned(64) std::atomic<size_t> switch_block_threshold; +   __aligned(64) std::atomic<size_t> numRenderThreads; + + + public: + +       +   SharedLazyTessellationCache(); +   ~SharedLazyTessellationCache(); + +   void getNextRenderThreadWorkState(); + +   __forceinline size_t maxAllocSize() const { +     return switch_block_threshold; +   } + +   __forceinline size_t getCurrentIndex() { return localTime.load(); } +   __forceinline void   addCurrentIndex(const size_t i=1) { localTime.fetch_add(i); } + +   __forceinline size_t getTime(const size_t globalTime) { +     return localTime.load()+NUM_CACHE_SEGMENTS*globalTime; +   } + + +   __forceinline size_t lockThread  (ThreadWorkState *const t_state, const ssize_t plus=1) { return t_state->counter.fetch_add(plus);  } +   __forceinline size_t unlockThread(ThreadWorkState *const t_state, const ssize_t plus=-1) { assert(isLocked(t_state)); return t_state->counter.fetch_add(plus); } + +   __forceinline bool isLocked(ThreadWorkState *const t_state) { return t_state->counter.load() != 0; } + +   static __forceinline void lock  () { sharedLazyTessellationCache.lockThread(threadState()); } +   static __forceinline void unlock() { sharedLazyTessellationCache.unlockThread(threadState()); } +   static __forceinline bool isLocked() { return sharedLazyTessellationCache.isLocked(threadState()); } +   static __forceinline size_t getState() { return threadState()->counter.load(); } +   static __forceinline void lockThreadLoop() { sharedLazyTessellationCache.lockThreadLoop(threadState()); } + +   static __forceinline size_t getTCacheTime(const size_t globalTime) { +     return sharedLazyTessellationCache.getTime(globalTime); +   } + +   /* per thread lock */ +   __forceinline void lockThreadLoop (ThreadWorkState *const t_state)  +   {  +     while(1) +     { +       size_t lock = SharedLazyTessellationCache::sharedLazyTessellationCache.lockThread(t_state,1); +       if (unlikely(lock >= THREAD_BLOCK_ATOMIC_ADD)) +       { +         /* lock failed wait until sync phase is over */ +         sharedLazyTessellationCache.unlockThread(t_state,-1);	        +         sharedLazyTessellationCache.waitForUsersLessEqual(t_state,0); +       } +       else +         break; +     } +   } + +   static __forceinline void* lookup(CacheEntry& entry, size_t globalTime) +   {    +     const int64_t subdiv_patch_root_ref = entry.tag.get();  +     CACHE_STATS(SharedTessellationCacheStats::cache_accesses++); +      +     if (likely(subdiv_patch_root_ref != 0))  +     { +       const size_t subdiv_patch_root = (subdiv_patch_root_ref & REF_TAG_MASK) + (size_t)sharedLazyTessellationCache.getDataPtr(); +       const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref); +        +       if (likely( sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime) )) +       { +         CACHE_STATS(SharedTessellationCacheStats::cache_hits++); +         return (void*) subdiv_patch_root; +       } +     } +     CACHE_STATS(SharedTessellationCacheStats::cache_misses++); +     return nullptr; +   } + +   template<typename Constructor> +     static __forceinline auto lookup (CacheEntry& entry, size_t globalTime, const Constructor constructor, const bool before=false) -> decltype(constructor()) +   { +     ThreadWorkState *t_state = SharedLazyTessellationCache::threadState(); + +     while (true) +     { +       sharedLazyTessellationCache.lockThreadLoop(t_state); +       void* patch = SharedLazyTessellationCache::lookup(entry,globalTime); +       if (patch) return (decltype(constructor())) patch; +        +       if (entry.mutex.try_lock()) +       { +         if (!validTag(entry.tag,globalTime))  +         { +           auto timeBefore = sharedLazyTessellationCache.getTime(globalTime); +           auto ret = constructor(); // thread is locked here! +           assert(ret); +           /* this should never return nullptr */ +           auto timeAfter = sharedLazyTessellationCache.getTime(globalTime); +           auto time = before ? timeBefore : timeAfter; +           __memory_barrier(); +           entry.tag = SharedLazyTessellationCache::Tag(ret,time); +           __memory_barrier(); +           entry.mutex.unlock(); +           return ret; +         } +         entry.mutex.unlock(); +       } +       SharedLazyTessellationCache::sharedLazyTessellationCache.unlockThread(t_state); +     } +   } +    +   __forceinline bool validCacheIndex(const size_t i, const size_t globalTime) +   { +#if FORCE_SIMPLE_FLUSH == 1 +     return i == getTime(globalTime); +#else +     return i+(NUM_CACHE_SEGMENTS-1) >= getTime(globalTime); +#endif +   } + +   static __forceinline bool validTime(const size_t oldtime, const size_t newTime) +   { +     return oldtime+(NUM_CACHE_SEGMENTS-1) >= newTime; +   } + + +    static __forceinline bool validTag(const Tag& tag, size_t globalTime) +    { +      const int64_t subdiv_patch_root_ref = tag.get();  +      if (subdiv_patch_root_ref == 0) return false; +      const size_t subdiv_patch_cache_index = extractCommitIndex(subdiv_patch_root_ref); +      return sharedLazyTessellationCache.validCacheIndex(subdiv_patch_cache_index,globalTime); +    } + +   void waitForUsersLessEqual(ThreadWorkState *const t_state, +			      const unsigned int users); +     +   __forceinline size_t alloc(const size_t blocks) +   { +     if (unlikely(blocks >= switch_block_threshold)) +       throw_RTCError(RTC_ERROR_INVALID_OPERATION,"allocation exceeds size of tessellation cache segment"); + +     assert(blocks < switch_block_threshold); +     size_t index = next_block.fetch_add(blocks); +     if (unlikely(index + blocks >= switch_block_threshold)) return (size_t)-1; +     return index; +   } + +   static __forceinline void* malloc(const size_t bytes) +   { +     size_t block_index = -1; +     ThreadWorkState *const t_state = threadState(); +     while (true) +     { +       block_index = sharedLazyTessellationCache.alloc((bytes+BLOCK_SIZE-1)/BLOCK_SIZE); +       if (block_index == (size_t)-1) +       { +         sharedLazyTessellationCache.unlockThread(t_state);		   +         sharedLazyTessellationCache.allocNextSegment(); +         sharedLazyTessellationCache.lockThread(t_state); +         continue;  +       } +       break; +     } +     return sharedLazyTessellationCache.getBlockPtr(block_index); +   } + +   __forceinline void *getBlockPtr(const size_t block_index) +   { +     assert(block_index < maxBlocks); +     assert(data); +     assert(block_index*16 <= size); +     return (void*)&data[block_index*16]; +   } + +   __forceinline void*  getDataPtr()      { return data; } +   __forceinline size_t getNumUsedBytes() { return next_block * BLOCK_SIZE; } +   __forceinline size_t getMaxBlocks()    { return maxBlocks; } +   __forceinline size_t getSize()         { return size; } + +   void allocNextSegment(); +   void realloc(const size_t newSize); + +   void reset(); + +   static SharedLazyTessellationCache sharedLazyTessellationCache; + }; +}  |