summaryrefslogtreecommitdiff
path: root/thirdparty/oidn/common
diff options
context:
space:
mode:
authorJuan Linietsky <reduzio@gmail.com>2020-05-01 09:34:23 -0300
committerJuan Linietsky <reduzio@gmail.com>2020-05-10 15:59:09 -0300
commit1bea8e1eacc68bcedbd3f207395bccf11011dae2 (patch)
treeb75303a69491978c1e13360a3e6f355c5234dfe0 /thirdparty/oidn/common
parent6a0473bcc23c096ef9ee929632a209761c2668f6 (diff)
New lightmapper
-Added LocalVector (needed it) -Added stb_rect_pack (It's pretty cool, we could probably use it for other stuff too) -Fixes and changes all around the place -Added library for 128 bits fixed point (required for Delaunay3D)
Diffstat (limited to 'thirdparty/oidn/common')
-rw-r--r--thirdparty/oidn/common/barrier.h52
-rw-r--r--thirdparty/oidn/common/exception.h45
-rw-r--r--thirdparty/oidn/common/platform.cpp114
-rw-r--r--thirdparty/oidn/common/platform.h131
-rw-r--r--thirdparty/oidn/common/ref.h163
-rw-r--r--thirdparty/oidn/common/tensor.cpp83
-rw-r--r--thirdparty/oidn/common/tensor.h66
-rw-r--r--thirdparty/oidn/common/thread.cpp297
-rw-r--r--thirdparty/oidn/common/thread.h202
-rw-r--r--thirdparty/oidn/common/timer.h49
10 files changed, 1202 insertions, 0 deletions
diff --git a/thirdparty/oidn/common/barrier.h b/thirdparty/oidn/common/barrier.h
new file mode 100644
index 0000000000..b20f670053
--- /dev/null
+++ b/thirdparty/oidn/common/barrier.h
@@ -0,0 +1,52 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <mutex>
+#include <condition_variable>
+
+namespace oidn {
+
+ class Barrier
+ {
+ private:
+ std::mutex m;
+ std::condition_variable cv;
+ volatile int count;
+
+ public:
+ Barrier(int count) : count(count) {}
+
+ void wait()
+ {
+ std::unique_lock<std::mutex> lk(m);
+ count--;
+
+ if (count == 0)
+ {
+ lk.unlock();
+ cv.notify_all();
+ }
+ else
+ {
+ cv.wait(lk, [&]{ return count == 0; });
+ }
+ }
+ };
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/exception.h b/thirdparty/oidn/common/exception.h
new file mode 100644
index 0000000000..18069c6a7d
--- /dev/null
+++ b/thirdparty/oidn/common/exception.h
@@ -0,0 +1,45 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include <exception>
+#include "platform.h"
+
+namespace oidn {
+
+ class Exception : public std::exception
+ {
+ private:
+ Error error;
+ const char* message;
+
+ public:
+ Exception(Error error, const char* message)
+ : error(error), message(message) {}
+
+ Error code() const noexcept
+ {
+ return error;
+ }
+
+ const char* what() const noexcept override
+ {
+ return message;
+ }
+ };
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/platform.cpp b/thirdparty/oidn/common/platform.cpp
new file mode 100644
index 0000000000..59a14ff47c
--- /dev/null
+++ b/thirdparty/oidn/common/platform.cpp
@@ -0,0 +1,114 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#include "platform.h"
+
+namespace oidn {
+
+ // ----------------------------------------------------------------------------
+ // Common functions
+ // ----------------------------------------------------------------------------
+
+ void* alignedMalloc(size_t size, size_t alignment)
+ {
+ if (size == 0)
+ return nullptr;
+
+ assert((alignment & (alignment-1)) == 0);
+ void* ptr = _mm_malloc(size, alignment);
+
+ if (ptr == nullptr)
+ throw std::bad_alloc();
+
+ return ptr;
+ }
+
+ void alignedFree(void* ptr)
+ {
+ if (ptr)
+ _mm_free(ptr);
+ }
+
+ // ----------------------------------------------------------------------------
+ // System information
+ // ----------------------------------------------------------------------------
+
+ std::string getPlatformName()
+ {
+ std::string name;
+
+ #if defined(__linux__)
+ name = "Linux";
+ #elif defined(__FreeBSD__)
+ name = "FreeBSD";
+ #elif defined(__CYGWIN__)
+ name = "Cygwin";
+ #elif defined(_WIN32)
+ name = "Windows";
+ #elif defined(__APPLE__)
+ name = "macOS";
+ #elif defined(__unix__)
+ name = "Unix";
+ #else
+ return "Unknown";
+ #endif
+
+ #if defined(__x86_64__) || defined(_M_X64) || defined(__ia64__) || defined(__aarch64__)
+ name += " (64-bit)";
+ #else
+ name += " (32-bit)";
+ #endif
+
+ return name;
+ }
+
+ std::string getCompilerName()
+ {
+ #if defined(__INTEL_COMPILER)
+ int mayor = __INTEL_COMPILER / 100 % 100;
+ int minor = __INTEL_COMPILER % 100;
+ std::string version = "Intel Compiler ";
+ version += toString(mayor);
+ version += "." + toString(minor);
+ #if defined(__INTEL_COMPILER_UPDATE)
+ version += "." + toString(__INTEL_COMPILER_UPDATE);
+ #endif
+ return version;
+ #elif defined(__clang__)
+ return "Clang " __clang_version__;
+ #elif defined(__GNUC__)
+ return "GCC " __VERSION__;
+ #elif defined(_MSC_VER)
+ std::string version = toString(_MSC_FULL_VER);
+ version.insert(4, ".");
+ version.insert(9, ".");
+ version.insert(2, ".");
+ return "Visual C++ Compiler " + version;
+ #else
+ return "Unknown";
+ #endif
+ }
+
+ std::string getBuildName()
+ {
+ #if defined(NDEBUG)
+ return "Release";
+ #else
+ return "Debug";
+ #endif
+ }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/platform.h b/thirdparty/oidn/common/platform.h
new file mode 100644
index 0000000000..205ac8981d
--- /dev/null
+++ b/thirdparty/oidn/common/platform.h
@@ -0,0 +1,131 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#if defined(_WIN32)
+ #define WIN32_LEAN_AND_MEAN
+ #define NOMINMAX
+ #include <Windows.h>
+#elif defined(__APPLE__)
+ #include <sys/sysctl.h>
+#endif
+
+#include <xmmintrin.h>
+#include <cstdint>
+#include <climits>
+#include <limits>
+#include <atomic>
+#include <algorithm>
+#include <memory>
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+#include "include/OpenImageDenoise/oidn.hpp"
+
+namespace oidn {
+
+ // ----------------------------------------------------------------------------
+ // Macros
+ // ----------------------------------------------------------------------------
+
+ #if defined(_WIN32)
+ // Windows
+ #if !defined(__noinline)
+ #define __noinline __declspec(noinline)
+ #endif
+ #else
+ // Unix
+ #if !defined(__forceinline)
+ #define __forceinline inline __attribute__((always_inline))
+ #endif
+ #if !defined(__noinline)
+ #define __noinline __attribute__((noinline))
+ #endif
+ #endif
+
+ #ifndef UNUSED
+ #define UNUSED(x) ((void)x)
+ #endif
+ #ifndef MAYBE_UNUSED
+ #define MAYBE_UNUSED(x) UNUSED(x)
+ #endif
+
+ // ----------------------------------------------------------------------------
+ // Error handling and debugging
+ // ----------------------------------------------------------------------------
+
+ struct Verbose
+ {
+ int verbose;
+
+ Verbose(int v = 0) : verbose(v) {}
+ __forceinline bool isVerbose(int v = 1) const { return v <= verbose; }
+ };
+
+ #define OIDN_WARNING(message) { if (isVerbose()) std::cerr << "Warning: " << message << std::endl; }
+ #define OIDN_FATAL(message) throw std::runtime_error(message);
+
+ // ----------------------------------------------------------------------------
+ // Common functions
+ // ----------------------------------------------------------------------------
+
+ using std::min;
+ using std::max;
+
+ template<typename T>
+ __forceinline T clamp(const T& value, const T& minValue, const T& maxValue)
+ {
+ return min(max(value, minValue), maxValue);
+ }
+
+ void* alignedMalloc(size_t size, size_t alignment);
+ void alignedFree(void* ptr);
+
+ template<typename T>
+ inline std::string toString(const T& a)
+ {
+ std::stringstream sm;
+ sm << a;
+ return sm.str();
+ }
+
+#if defined(__APPLE__)
+ template<typename T>
+ bool getSysctl(const char* name, T& value)
+ {
+ int64_t result = 0;
+ size_t size = sizeof(result);
+
+ if (sysctlbyname(name, &result, &size, nullptr, 0) != 0)
+ return false;
+
+ value = T(result);
+ return true;
+ }
+#endif
+
+ // ----------------------------------------------------------------------------
+ // System information
+ // ----------------------------------------------------------------------------
+
+ std::string getPlatformName();
+ std::string getCompilerName();
+ std::string getBuildName();
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/ref.h b/thirdparty/oidn/common/ref.h
new file mode 100644
index 0000000000..de44603af2
--- /dev/null
+++ b/thirdparty/oidn/common/ref.h
@@ -0,0 +1,163 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+
+namespace oidn {
+
+ class RefCount
+ {
+ private:
+ std::atomic<size_t> count;
+
+ public:
+ __forceinline RefCount(int count = 0) noexcept : count(count) {}
+
+ __forceinline size_t incRef() noexcept
+ {
+ return count.fetch_add(1) + 1;
+ }
+
+ __forceinline size_t decRef()
+ {
+ const size_t newCount = decRefKeep();
+ if (newCount == 0)
+ destroy();
+ return newCount;
+ }
+
+ __forceinline size_t decRefKeep() noexcept
+ {
+ return count.fetch_add(-1) - 1;
+ }
+
+ __forceinline void destroy()
+ {
+ delete this;
+ }
+
+ protected:
+ // Disable copying
+ RefCount(const RefCount&) = delete;
+ RefCount& operator =(const RefCount&) = delete;
+
+ virtual ~RefCount() noexcept = default;
+ };
+
+ template<typename T>
+ class Ref
+ {
+ private:
+ T* ptr;
+
+ public:
+ __forceinline Ref() noexcept : ptr(nullptr) {}
+ __forceinline Ref(std::nullptr_t) noexcept : ptr(nullptr) {}
+ __forceinline Ref(const Ref& other) noexcept : ptr(other.ptr) { if (ptr) ptr->incRef(); }
+ __forceinline Ref(Ref&& other) noexcept : ptr(other.ptr) { other.ptr = nullptr; }
+ __forceinline Ref(T* ptr) noexcept : ptr(ptr) { if (ptr) ptr->incRef(); }
+
+ template<typename Y>
+ __forceinline Ref(const Ref<Y>& other) noexcept : ptr(other.get()) { if (ptr) ptr->incRef(); }
+
+ template<typename Y>
+ __forceinline explicit Ref(Y* ptr) noexcept : ptr(ptr) { if (ptr) ptr->incRef(); }
+
+ __forceinline ~Ref() { if (ptr) ptr->decRef(); }
+
+ __forceinline Ref& operator =(const Ref& other)
+ {
+ if (other.ptr)
+ other.ptr->incRef();
+ if (ptr)
+ ptr->decRef();
+ ptr = other.ptr;
+ return *this;
+ }
+
+ __forceinline Ref& operator =(Ref&& other)
+ {
+ if (ptr)
+ ptr->decRef();
+ ptr = other.ptr;
+ other.ptr = nullptr;
+ return *this;
+ }
+
+ __forceinline Ref& operator =(T* other)
+ {
+ if (other)
+ other->incRef();
+ if (ptr)
+ ptr->decRef();
+ ptr = other;
+ return *this;
+ }
+
+ __forceinline Ref& operator =(std::nullptr_t)
+ {
+ if (ptr)
+ ptr->decRef();
+ ptr = nullptr;
+ return *this;
+ }
+
+ __forceinline operator bool() const noexcept { return ptr != nullptr; }
+
+ __forceinline T& operator *() const noexcept { return *ptr; }
+ __forceinline T* operator ->() const noexcept { return ptr; }
+
+ __forceinline T* get() const noexcept { return ptr; }
+
+ __forceinline T* detach() noexcept
+ {
+ T* res = ptr;
+ ptr = nullptr;
+ return res;
+ }
+ };
+
+ template<typename T> __forceinline bool operator < (const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr < b.ptr; }
+
+ template<typename T> __forceinline bool operator ==(const Ref<T>& a, std::nullptr_t) noexcept { return a.ptr == nullptr; }
+ template<typename T> __forceinline bool operator ==(std::nullptr_t, const Ref<T>& b) noexcept { return nullptr == b.ptr; }
+ template<typename T> __forceinline bool operator ==(const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr == b.ptr; }
+
+ template<typename T> __forceinline bool operator !=(const Ref<T>& a, std::nullptr_t) noexcept { return a.ptr != nullptr; }
+ template<typename T> __forceinline bool operator !=(std::nullptr_t, const Ref<T>& b) noexcept { return nullptr != b.ptr; }
+ template<typename T> __forceinline bool operator !=(const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr != b.ptr; }
+
+ template<typename T, typename... Args>
+ __forceinline Ref<T> makeRef(Args&&... args)
+ {
+ return Ref<T>(new T(std::forward<Args>(args)...));
+ }
+
+ template<typename T, typename Y>
+ __forceinline Ref<Y> staticRefCast(const Ref<T>& a)
+ {
+ return Ref<Y>(static_cast<Y*>(a.get()));
+ }
+
+ template<typename T, typename Y>
+ __forceinline Ref<Y> dynamicRefCast(const Ref<T>& a)
+ {
+ return Ref<Y>(dynamic_cast<Y*>(a.get()));
+ }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/tensor.cpp b/thirdparty/oidn/common/tensor.cpp
new file mode 100644
index 0000000000..0249f2e141
--- /dev/null
+++ b/thirdparty/oidn/common/tensor.cpp
@@ -0,0 +1,83 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#include "exception.h"
+#include "tensor.h"
+
+namespace oidn {
+
+ std::map<std::string, Tensor> parseTensors(void* buffer)
+ {
+ char* input = (char*)buffer;
+
+ // Parse the magic value
+ const int magic = *(unsigned short*)input;
+ if (magic != 0x41D7)
+ throw Exception(Error::InvalidOperation, "invalid tensor archive");
+ input += sizeof(unsigned short);
+
+ // Parse the version
+ const int majorVersion = *(unsigned char*)input++;
+ const int minorVersion = *(unsigned char*)input++;
+ UNUSED(minorVersion);
+ if (majorVersion > 1)
+ throw Exception(Error::InvalidOperation, "unsupported tensor archive version");
+
+ // Parse the number of tensors
+ const int numTensors = *(int*)input;
+ input += sizeof(int);
+
+ // Parse the tensors
+ std::map<std::string, Tensor> tensorMap;
+ for (int i = 0; i < numTensors; ++i)
+ {
+ Tensor tensor;
+
+ // Parse the name
+ const int nameLen = *(unsigned char*)input++;
+ std::string name(input, nameLen);
+ input += nameLen;
+
+ // Parse the number of dimensions
+ const int ndims = *(unsigned char*)input++;
+
+ // Parse the shape of the tensor
+ tensor.dims.resize(ndims);
+ for (int i = 0; i < ndims; ++i)
+ tensor.dims[i] = ((int*)input)[i];
+ input += ndims * sizeof(int);
+
+ // Parse the format of the tensor
+ tensor.format = std::string(input, input + ndims);
+ input += ndims;
+
+ // Parse the data type of the tensor
+ const char type = *(unsigned char*)input++;
+ if (type != 'f') // only float32 is supported
+ throw Exception(Error::InvalidOperation, "unsupported tensor data type");
+
+ // Skip the data
+ tensor.data = (float*)input;
+ input += tensor.size() * sizeof(float);
+
+ // Add the tensor to the map
+ tensorMap.emplace(name, std::move(tensor));
+ }
+
+ return tensorMap;
+ }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/tensor.h b/thirdparty/oidn/common/tensor.h
new file mode 100644
index 0000000000..48e7d1123d
--- /dev/null
+++ b/thirdparty/oidn/common/tensor.h
@@ -0,0 +1,66 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <vector>
+#include <map>
+
+namespace oidn {
+
+ template<typename T>
+ using shared_vector = std::shared_ptr<std::vector<T>>;
+
+ // Generic tensor
+ struct Tensor
+ {
+ float* data;
+ std::vector<int64_t> dims;
+ std::string format;
+ shared_vector<char> buffer; // optional, only for reference counting
+
+ __forceinline Tensor() : data(nullptr) {}
+
+ __forceinline Tensor(const std::vector<int64_t>& dims, const std::string& format)
+ : dims(dims),
+ format(format)
+ {
+ buffer = std::make_shared<std::vector<char>>(size() * sizeof(float));
+ data = (float*)buffer->data();
+ }
+
+ __forceinline operator bool() const { return data != nullptr; }
+
+ __forceinline int ndims() const { return (int)dims.size(); }
+
+ // Returns the number of values
+ __forceinline size_t size() const
+ {
+ size_t size = 1;
+ for (int i = 0; i < ndims(); ++i)
+ size *= dims[i];
+ return size;
+ }
+
+ __forceinline float& operator [](size_t i) { return data[i]; }
+ __forceinline const float& operator [](size_t i) const { return data[i]; }
+ };
+
+ // Parses tensors from a buffer
+ std::map<std::string, Tensor> parseTensors(void* buffer);
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/thread.cpp b/thirdparty/oidn/common/thread.cpp
new file mode 100644
index 0000000000..48c489c57b
--- /dev/null
+++ b/thirdparty/oidn/common/thread.cpp
@@ -0,0 +1,297 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#if defined(_MSC_VER)
+ #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+#if defined(__APPLE__)
+ #include <mach/thread_act.h>
+ #include <mach/mach_init.h>
+#endif
+
+#include "thread.h"
+#include <fstream>
+
+namespace oidn {
+
+#if defined(_WIN32)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - Windows
+ // --------------------------------------------------------------------------
+
+ ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+ : Verbose(verbose)
+ {
+ HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
+ pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
+ pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
+
+ if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
+ {
+ // Get logical processor information
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
+ DWORD bufferSize = 0;
+
+ // First call the function with an empty buffer to get the required buffer size
+ BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+ if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+ {
+ OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+ return;
+ }
+
+ // Allocate the buffer
+ buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
+ if (!buffer)
+ {
+ OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
+ return;
+ }
+
+ // Call again the function but now with the properly sized buffer
+ result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+ if (!result)
+ {
+ OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+ free(buffer);
+ return;
+ }
+
+ // Iterate over the logical processor information structures
+ // There should be one structure for each physical core
+ char* ptr = (char*)buffer;
+ while (ptr < (char*)buffer + bufferSize)
+ {
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
+ if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
+ {
+ // Iterate over the groups
+ int numThreads = 0;
+ for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
+ {
+ GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
+ while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
+ {
+ // Extract the next set bit/thread from the mask
+ GROUP_AFFINITY threadAffinity = coreAffinity;
+ threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
+
+ // Push the affinity for this thread
+ affinities.push_back(threadAffinity);
+ oldAffinities.push_back(threadAffinity);
+ numThreads++;
+
+ // Remove this bit/thread from the mask
+ coreAffinity.Mask ^= threadAffinity.Mask;
+ }
+ }
+ }
+
+ // Next structure
+ ptr += item->Size;
+ }
+
+ // Free the buffer
+ free(buffer);
+ }
+ }
+
+ void ThreadAffinity::set(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ // Save the current affinity and set the new one
+ const HANDLE thread = GetCurrentThread();
+ if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
+ OIDN_WARNING("SetThreadGroupAffinity failed");
+ }
+
+ void ThreadAffinity::restore(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ // Restore the original affinity
+ const HANDLE thread = GetCurrentThread();
+ if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
+ OIDN_WARNING("SetThreadGroupAffinity failed");
+ }
+
+#elif defined(__linux__)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - Linux
+ // --------------------------------------------------------------------------
+
+ ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+ : Verbose(verbose)
+ {
+ std::vector<int> threadIds;
+
+ // Parse the thread/CPU topology
+ for (int cpuId = 0; ; cpuId++)
+ {
+ std::fstream fs;
+ std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
+ fs.open(cpu.c_str(), std::fstream::in);
+ if (fs.fail()) break;
+
+ int i;
+ int j = 0;
+ while ((j < numThreadsPerCore) && (fs >> i))
+ {
+ if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
+ threadIds.push_back(i);
+
+ if (fs.peek() == ',')
+ fs.ignore();
+ j++;
+ }
+
+ fs.close();
+ }
+
+ #if 0
+ for (size_t i = 0; i < thread_ids.size(); ++i)
+ std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
+ #endif
+
+ // Create the affinity structures
+ affinities.resize(threadIds.size());
+ oldAffinities.resize(threadIds.size());
+
+ for (size_t i = 0; i < threadIds.size(); ++i)
+ {
+ cpu_set_t affinity;
+ CPU_ZERO(&affinity);
+ CPU_SET(threadIds[i], &affinity);
+
+ affinities[i] = affinity;
+ oldAffinities[i] = affinity;
+ }
+ }
+
+ void ThreadAffinity::set(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ const pthread_t thread = pthread_self();
+
+ // Save the current affinity
+ if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+ {
+ OIDN_WARNING("pthread_getaffinity_np failed");
+ oldAffinities[threadIndex] = affinities[threadIndex];
+ return;
+ }
+
+ // Set the new affinity
+ if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
+ OIDN_WARNING("pthread_setaffinity_np failed");
+ }
+
+ void ThreadAffinity::restore(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ const pthread_t thread = pthread_self();
+
+ // Restore the original affinity
+ if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+ OIDN_WARNING("pthread_setaffinity_np failed");
+ }
+
+#elif defined(__APPLE__)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - macOS
+ // --------------------------------------------------------------------------
+
+ ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+ : Verbose(verbose)
+ {
+ // Query the thread/CPU topology
+ int numPhysicalCpus;
+ int numLogicalCpus;
+
+ if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
+ {
+ OIDN_WARNING("sysctlbyname failed");
+ return;
+ }
+
+ if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
+ return; // this shouldn't happen
+ const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
+
+ // Create the affinity structures
+ // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
+ // should be on the same core together
+ for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
+ {
+ thread_affinity_policy affinity;
+ affinity.affinity_tag = core;
+
+ for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
+ {
+ affinities.push_back(affinity);
+ oldAffinities.push_back(affinity);
+ }
+ }
+ }
+
+ void ThreadAffinity::set(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ const auto thread = mach_thread_self();
+
+ // Save the current affinity
+ mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
+ boolean_t getDefault = FALSE;
+ if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
+ {
+ OIDN_WARNING("thread_policy_get failed");
+ oldAffinities[threadIndex] = affinities[threadIndex];
+ return;
+ }
+
+ // Set the new affinity
+ if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+ OIDN_WARNING("thread_policy_set failed");
+ }
+
+ void ThreadAffinity::restore(int threadIndex)
+ {
+ if (threadIndex >= (int)affinities.size())
+ return;
+
+ const auto thread = mach_thread_self();
+
+ // Restore the original affinity
+ if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+ OIDN_WARNING("thread_policy_set failed");
+ }
+
+#endif
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/thread.h b/thirdparty/oidn/common/thread.h
new file mode 100644
index 0000000000..2c731367da
--- /dev/null
+++ b/thirdparty/oidn/common/thread.h
@@ -0,0 +1,202 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+
+#if !defined(_WIN32)
+ #include <pthread.h>
+ #include <sched.h>
+ #if defined(__APPLE__)
+ #include <mach/thread_policy.h>
+ #endif
+#endif
+
+#include <vector>
+#include <mutex>
+
+namespace oidn {
+
+ // --------------------------------------------------------------------------
+ // ThreadLocal
+ // --------------------------------------------------------------------------
+
+ // Wrapper which makes any variable thread-local
+ template<typename T>
+ class ThreadLocal : public Verbose
+ {
+ private:
+ #if defined(_WIN32)
+ DWORD key;
+ #else
+ pthread_key_t key;
+ #endif
+
+ std::vector<T*> instances;
+ std::mutex mutex;
+
+ public:
+ ThreadLocal(int verbose = 0)
+ : Verbose(verbose)
+ {
+ #if defined(_WIN32)
+ key = TlsAlloc();
+ if (key == TLS_OUT_OF_INDEXES)
+ OIDN_FATAL("TlsAlloc failed");
+ #else
+ if (pthread_key_create(&key, nullptr) != 0)
+ OIDN_FATAL("pthread_key_create failed");
+ #endif
+ }
+
+ ~ThreadLocal()
+ {
+ std::lock_guard<std::mutex> lock(mutex);
+ for (T* ptr : instances)
+ delete ptr;
+
+ #if defined(_WIN32)
+ if (!TlsFree(key))
+ OIDN_WARNING("TlsFree failed");
+ #else
+ if (pthread_key_delete(key) != 0)
+ OIDN_WARNING("pthread_key_delete failed");
+ #endif
+ }
+
+ T& get()
+ {
+ #if defined(_WIN32)
+ T* ptr = (T*)TlsGetValue(key);
+ #else
+ T* ptr = (T*)pthread_getspecific(key);
+ #endif
+
+ if (ptr)
+ return *ptr;
+
+ ptr = new T;
+ std::lock_guard<std::mutex> lock(mutex);
+ instances.push_back(ptr);
+
+ #if defined(_WIN32)
+ if (!TlsSetValue(key, ptr))
+ OIDN_FATAL("TlsSetValue failed");
+ #else
+ if (pthread_setspecific(key, ptr) != 0)
+ OIDN_FATAL("pthread_setspecific failed");
+ #endif
+
+ return *ptr;
+ }
+ };
+
+#if defined(_WIN32)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - Windows
+ // --------------------------------------------------------------------------
+
+ class ThreadAffinity : public Verbose
+ {
+ private:
+ typedef BOOL (WINAPI *GetLogicalProcessorInformationExFunc)(LOGICAL_PROCESSOR_RELATIONSHIP,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
+ PDWORD);
+
+ typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE,
+ CONST GROUP_AFFINITY*,
+ PGROUP_AFFINITY);
+
+ GetLogicalProcessorInformationExFunc pGetLogicalProcessorInformationEx = nullptr;
+ SetThreadGroupAffinityFunc pSetThreadGroupAffinity = nullptr;
+
+ std::vector<GROUP_AFFINITY> affinities; // thread affinities
+ std::vector<GROUP_AFFINITY> oldAffinities; // original thread affinities
+
+ public:
+ ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+ int getNumThreads() const
+ {
+ return (int)affinities.size();
+ }
+
+ // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+ void set(int threadIndex);
+
+ // Restores the affinity of the thread
+ void restore(int threadIndex);
+ };
+
+#elif defined(__linux__)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - Linux
+ // --------------------------------------------------------------------------
+
+ class ThreadAffinity : public Verbose
+ {
+ private:
+ std::vector<cpu_set_t> affinities; // thread affinities
+ std::vector<cpu_set_t> oldAffinities; // original thread affinities
+
+ public:
+ ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+ int getNumThreads() const
+ {
+ return (int)affinities.size();
+ }
+
+ // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+ void set(int threadIndex);
+
+ // Restores the affinity of the thread
+ void restore(int threadIndex);
+ };
+
+#elif defined(__APPLE__)
+
+ // --------------------------------------------------------------------------
+ // ThreadAffinity - macOS
+ // --------------------------------------------------------------------------
+
+ class ThreadAffinity : public Verbose
+ {
+ private:
+ std::vector<thread_affinity_policy> affinities; // thread affinities
+ std::vector<thread_affinity_policy> oldAffinities; // original thread affinities
+
+ public:
+ ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+ int getNumThreads() const
+ {
+ return (int)affinities.size();
+ }
+
+ // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+ void set(int threadIndex);
+
+ // Restores the affinity of the thread
+ void restore(int threadIndex);
+ };
+
+#endif
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/timer.h b/thirdparty/oidn/common/timer.h
new file mode 100644
index 0000000000..62aaaa1c33
--- /dev/null
+++ b/thirdparty/oidn/common/timer.h
@@ -0,0 +1,49 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation //
+// //
+// Licensed under the Apache License, Version 2.0 (the "License"); //
+// you may not use this file except in compliance with the License. //
+// You may obtain a copy of the License at //
+// //
+// http://www.apache.org/licenses/LICENSE-2.0 //
+// //
+// Unless required by applicable law or agreed to in writing, software //
+// distributed under the License is distributed on an "AS IS" BASIS, //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and //
+// limitations under the License. //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <chrono>
+
+namespace oidn {
+
+ class Timer
+ {
+ private:
+ using clock = std::chrono::high_resolution_clock;
+
+ std::chrono::time_point<clock> start;
+
+ public:
+ Timer()
+ {
+ reset();
+ }
+
+ void reset()
+ {
+ start = clock::now();
+ }
+
+ double query() const
+ {
+ auto end = clock::now();
+ return std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
+ }
+ };
+
+} // namespace oidn