New lightmapper

-Added LocalVector (needed it) -Added stb_rect_pack (It's pretty cool, we could probably use it for other stuff too) -Fixes and changes all around the place -Added library for 128 bits fixed point (required for Delaunay3D)
author: Juan Linietsky <reduzio@gmail.com> 2020-05-01 09:34:23 -0300
committer: Juan Linietsky <reduzio@gmail.com> 2020-05-10 15:59:09 -0300
commit: 1bea8e1eacc68bcedbd3f207395bccf11011dae2 (patch)
tree: b75303a69491978c1e13360a3e6f355c5234dfe0 /thirdparty/oidn/common
parent: 6a0473bcc23c096ef9ee929632a209761c2668f6 (diff)
10 files changed, 1202 insertions, 0 deletions
diff --git a/thirdparty/oidn/common/barrier.h b/thirdparty/oidn/common/barrier.h
new file mode 100644
index 0000000000..b20f670053
--- /dev/null
+++ b/thirdparty/oidn/common/barrier.h
@@ -0,0 +1,52 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <mutex>
+#include <condition_variable>
+
+namespace oidn {
+
+  class Barrier
+  {
+  private:
+    std::mutex m;
+    std::condition_variable cv;
+    volatile int count;
+
+  public:
+    Barrier(int count) : count(count) {}
+
+    void wait()
+    {
+      std::unique_lock<std::mutex> lk(m);
+      count--;
+
+      if (count == 0)
+      {
+        lk.unlock();
+        cv.notify_all();
+      }
+      else
+      {
+        cv.wait(lk, [&]{ return count == 0; });
+      }
+    }
+  };
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/exception.h b/thirdparty/oidn/common/exception.h
new file mode 100644
index 0000000000..18069c6a7d
--- /dev/null
+++ b/thirdparty/oidn/common/exception.h
@@ -0,0 +1,45 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include <exception>
+#include "platform.h"
+
+namespace oidn {
+
+  class Exception : public std::exception
+  {
+  private:
+    Error error;
+    const char* message;
+
+  public:
+    Exception(Error error, const char* message)
+      : error(error), message(message) {}
+
+    Error code() const noexcept
+    {
+      return error;
+    }
+
+    const char* what() const noexcept override
+    {
+      return message;
+    }
+  };
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/platform.cpp b/thirdparty/oidn/common/platform.cpp
new file mode 100644
index 0000000000..59a14ff47c
--- /dev/null
+++ b/thirdparty/oidn/common/platform.cpp
@@ -0,0 +1,114 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#include "platform.h"
+
+namespace oidn {
+
+  // ----------------------------------------------------------------------------
+  // Common functions
+  // ----------------------------------------------------------------------------
+
+  void* alignedMalloc(size_t size, size_t alignment)
+  {
+    if (size == 0)
+      return nullptr;
+
+    assert((alignment & (alignment-1)) == 0);
+    void* ptr = _mm_malloc(size, alignment);
+
+    if (ptr == nullptr)
+      throw std::bad_alloc();
+
+    return ptr;
+  }
+
+  void alignedFree(void* ptr)
+  {
+    if (ptr)
+      _mm_free(ptr);
+  }
+
+  // ----------------------------------------------------------------------------
+  // System information
+  // ----------------------------------------------------------------------------
+
+  std::string getPlatformName()
+  {
+    std::string name;
+
+  #if defined(__linux__)
+    name = "Linux";
+  #elif defined(__FreeBSD__)
+    name = "FreeBSD";
+  #elif defined(__CYGWIN__)
+    name = "Cygwin";
+  #elif defined(_WIN32)
+    name = "Windows";
+  #elif defined(__APPLE__)
+    name = "macOS";
+  #elif defined(__unix__)
+    name = "Unix";
+  #else
+    return "Unknown";
+  #endif
+
+  #if defined(__x86_64__) || defined(_M_X64) || defined(__ia64__) || defined(__aarch64__)
+    name += " (64-bit)";
+  #else
+    name += " (32-bit)";
+  #endif
+
+    return name;
+  }
+
+  std::string getCompilerName()
+  {
+  #if defined(__INTEL_COMPILER)
+    int mayor = __INTEL_COMPILER / 100 % 100;
+    int minor = __INTEL_COMPILER % 100;
+    std::string version = "Intel Compiler ";
+    version += toString(mayor);
+    version += "." + toString(minor);
+  #if defined(__INTEL_COMPILER_UPDATE)
+    version += "." + toString(__INTEL_COMPILER_UPDATE);
+  #endif
+    return version;
+  #elif defined(__clang__)
+    return "Clang " __clang_version__;
+  #elif defined(__GNUC__)
+    return "GCC " __VERSION__;
+  #elif defined(_MSC_VER)
+    std::string version = toString(_MSC_FULL_VER);
+    version.insert(4, ".");
+    version.insert(9, ".");
+    version.insert(2, ".");
+    return "Visual C++ Compiler " + version;
+  #else
+    return "Unknown";
+  #endif
+  }
+
+  std::string getBuildName()
+  {
+  #if defined(NDEBUG)
+    return "Release";
+  #else
+    return "Debug";
+  #endif
+  }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/platform.h b/thirdparty/oidn/common/platform.h
new file mode 100644
index 0000000000..205ac8981d
--- /dev/null
+++ b/thirdparty/oidn/common/platform.h
@@ -0,0 +1,131 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#if defined(_WIN32)
+  #define WIN32_LEAN_AND_MEAN
+  #define NOMINMAX
+  #include <Windows.h>
+#elif defined(__APPLE__)
+  #include <sys/sysctl.h>
+#endif
+
+#include <xmmintrin.h>
+#include <cstdint>
+#include <climits>
+#include <limits>
+#include <atomic>
+#include <algorithm>
+#include <memory>
+#include <cmath>
+#include <string>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+#include "include/OpenImageDenoise/oidn.hpp"
+
+namespace oidn {
+
+  // ----------------------------------------------------------------------------
+  // Macros
+  // ----------------------------------------------------------------------------
+
+  #if defined(_WIN32)
+    // Windows
+    #if !defined(__noinline)
+      #define __noinline     __declspec(noinline)
+    #endif
+  #else
+    // Unix
+    #if !defined(__forceinline)
+      #define __forceinline  inline __attribute__((always_inline))
+    #endif
+    #if !defined(__noinline)
+      #define __noinline     __attribute__((noinline))
+    #endif
+  #endif
+
+  #ifndef UNUSED
+    #define UNUSED(x) ((void)x)
+  #endif
+  #ifndef MAYBE_UNUSED
+    #define MAYBE_UNUSED(x) UNUSED(x)
+  #endif
+
+  // ----------------------------------------------------------------------------
+  // Error handling and debugging
+  // ----------------------------------------------------------------------------
+
+  struct Verbose
+  {
+    int verbose;
+
+    Verbose(int v = 0) : verbose(v) {}
+    __forceinline bool isVerbose(int v = 1) const { return v <= verbose; }
+  };
+
+  #define OIDN_WARNING(message) { if (isVerbose()) std::cerr << "Warning: " << message << std::endl; }
+  #define OIDN_FATAL(message) throw std::runtime_error(message);
+
+  // ----------------------------------------------------------------------------
+  // Common functions
+  // ----------------------------------------------------------------------------
+
+  using std::min;
+  using std::max;
+
+  template<typename T>
+  __forceinline T clamp(const T& value, const T& minValue, const T& maxValue)
+  {
+    return min(max(value, minValue), maxValue);
+  }
+
+  void* alignedMalloc(size_t size, size_t alignment);
+  void alignedFree(void* ptr);
+
+  template<typename T>
+  inline std::string toString(const T& a)
+  {
+    std::stringstream sm;
+    sm << a;
+    return sm.str();
+  }
+
+#if defined(__APPLE__)
+  template<typename T>
+  bool getSysctl(const char* name, T& value)
+  {
+    int64_t result = 0;
+    size_t size = sizeof(result);
+
+    if (sysctlbyname(name, &result, &size, nullptr, 0) != 0)
+      return false;
+
+    value = T(result);
+    return true;
+  }
+#endif
+
+  // ----------------------------------------------------------------------------
+  // System information
+  // ----------------------------------------------------------------------------
+
+  std::string getPlatformName();
+  std::string getCompilerName();
+  std::string getBuildName();
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/ref.h b/thirdparty/oidn/common/ref.h
new file mode 100644
index 0000000000..de44603af2
--- /dev/null
+++ b/thirdparty/oidn/common/ref.h
@@ -0,0 +1,163 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+
+namespace oidn {
+
+  class RefCount
+  {
+  private:
+    std::atomic<size_t> count;
+
+  public:
+    __forceinline RefCount(int count = 0) noexcept : count(count) {}
+
+    __forceinline size_t incRef() noexcept
+    {
+      return count.fetch_add(1) + 1;
+    }
+
+    __forceinline size_t decRef()
+    {
+      const size_t newCount = decRefKeep();
+      if (newCount == 0)
+        destroy();
+      return newCount;
+    }
+
+    __forceinline size_t decRefKeep() noexcept
+    {
+      return count.fetch_add(-1) - 1;
+    }
+
+    __forceinline void destroy()
+    {
+      delete this;
+    }
+
+  protected:
+    // Disable copying
+    RefCount(const RefCount&) = delete;
+    RefCount& operator =(const RefCount&) = delete;
+
+    virtual ~RefCount() noexcept = default;
+  };
+
+  template<typename T>
+  class Ref
+  {
+  private:
+    T* ptr;
+
+  public:
+    __forceinline Ref() noexcept : ptr(nullptr) {}
+    __forceinline Ref(std::nullptr_t) noexcept : ptr(nullptr) {}
+    __forceinline Ref(const Ref& other) noexcept : ptr(other.ptr) { if (ptr) ptr->incRef(); }
+    __forceinline Ref(Ref&& other) noexcept : ptr(other.ptr) { other.ptr = nullptr; }
+    __forceinline Ref(T* ptr) noexcept : ptr(ptr) { if (ptr) ptr->incRef(); }
+
+    template<typename Y>
+    __forceinline Ref(const Ref<Y>& other) noexcept : ptr(other.get()) { if (ptr) ptr->incRef(); }
+
+    template<typename Y>
+    __forceinline explicit Ref(Y* ptr) noexcept : ptr(ptr) { if (ptr) ptr->incRef(); }
+
+    __forceinline ~Ref() { if (ptr) ptr->decRef(); }
+
+    __forceinline Ref& operator =(const Ref& other)
+    {
+      if (other.ptr)
+        other.ptr->incRef();
+      if (ptr)
+        ptr->decRef();
+      ptr = other.ptr;
+      return *this;
+    }
+
+    __forceinline Ref& operator =(Ref&& other)
+    {
+      if (ptr)
+        ptr->decRef();
+      ptr = other.ptr;
+      other.ptr = nullptr;
+      return *this;
+    }
+
+    __forceinline Ref& operator =(T* other)
+    {
+      if (other)
+        other->incRef();
+      if (ptr)
+        ptr->decRef();
+      ptr = other;
+      return *this;
+    }
+
+    __forceinline Ref& operator =(std::nullptr_t)
+    {
+      if (ptr)
+        ptr->decRef();
+      ptr = nullptr;
+      return *this;
+    }
+
+    __forceinline operator bool() const noexcept { return ptr != nullptr; }
+
+    __forceinline T& operator  *() const noexcept { return *ptr; }
+    __forceinline T* operator ->() const noexcept { return  ptr; }
+
+    __forceinline T* get() const noexcept { return ptr; }
+
+    __forceinline T* detach() noexcept
+    {
+      T* res = ptr;
+      ptr = nullptr;
+      return res;
+    }
+  };
+
+  template<typename T> __forceinline bool operator < (const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr   <  b.ptr;   }
+
+  template<typename T> __forceinline bool operator ==(const Ref<T>& a, std::nullptr_t)  noexcept { return a.ptr   == nullptr; }
+  template<typename T> __forceinline bool operator ==(std::nullptr_t,  const Ref<T>& b) noexcept { return nullptr == b.ptr;   }
+  template<typename T> __forceinline bool operator ==(const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr   == b.ptr;   }
+
+  template<typename T> __forceinline bool operator !=(const Ref<T>& a, std::nullptr_t)  noexcept { return a.ptr   != nullptr; }
+  template<typename T> __forceinline bool operator !=(std::nullptr_t,  const Ref<T>& b) noexcept { return nullptr != b.ptr;   }
+  template<typename T> __forceinline bool operator !=(const Ref<T>& a, const Ref<T>& b) noexcept { return a.ptr   != b.ptr;   }
+
+  template<typename T, typename... Args>
+  __forceinline Ref<T> makeRef(Args&&... args)
+  {
+    return Ref<T>(new T(std::forward<Args>(args)...));
+  }
+
+  template<typename T, typename Y>
+  __forceinline Ref<Y> staticRefCast(const Ref<T>& a)
+  {
+    return Ref<Y>(static_cast<Y*>(a.get()));
+  }
+
+  template<typename T, typename Y>
+  __forceinline Ref<Y> dynamicRefCast(const Ref<T>& a)
+  {
+    return Ref<Y>(dynamic_cast<Y*>(a.get()));
+  }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/tensor.cpp b/thirdparty/oidn/common/tensor.cpp
new file mode 100644
index 0000000000..0249f2e141
--- /dev/null
+++ b/thirdparty/oidn/common/tensor.cpp
@@ -0,0 +1,83 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#include "exception.h"
+#include "tensor.h"
+
+namespace oidn {
+
+  std::map<std::string, Tensor> parseTensors(void* buffer)
+  {
+    char* input = (char*)buffer;
+
+    // Parse the magic value
+    const int magic = *(unsigned short*)input;
+    if (magic != 0x41D7)
+      throw Exception(Error::InvalidOperation, "invalid tensor archive");
+    input += sizeof(unsigned short);
+
+    // Parse the version
+    const int majorVersion = *(unsigned char*)input++;
+    const int minorVersion = *(unsigned char*)input++;
+    UNUSED(minorVersion);
+    if (majorVersion > 1)
+      throw Exception(Error::InvalidOperation, "unsupported tensor archive version");
+
+    // Parse the number of tensors
+    const int numTensors = *(int*)input;
+    input += sizeof(int);
+
+    // Parse the tensors
+    std::map<std::string, Tensor> tensorMap;
+    for (int i = 0; i < numTensors; ++i)
+    {
+      Tensor tensor;
+
+      // Parse the name
+      const int nameLen = *(unsigned char*)input++;
+      std::string name(input, nameLen);
+      input += nameLen;
+
+      // Parse the number of dimensions
+      const int ndims = *(unsigned char*)input++;
+
+      // Parse the shape of the tensor
+      tensor.dims.resize(ndims);
+      for (int i = 0; i < ndims; ++i)
+        tensor.dims[i] = ((int*)input)[i];
+      input += ndims * sizeof(int);
+
+      // Parse the format of the tensor
+      tensor.format = std::string(input, input + ndims);
+      input += ndims;
+
+      // Parse the data type of the tensor
+      const char type = *(unsigned char*)input++;
+      if (type != 'f') // only float32 is supported
+        throw Exception(Error::InvalidOperation, "unsupported tensor data type");
+
+      // Skip the data
+      tensor.data = (float*)input;
+      input += tensor.size() * sizeof(float);
+
+      // Add the tensor to the map
+      tensorMap.emplace(name, std::move(tensor));
+    }
+
+    return tensorMap;
+  }
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/tensor.h b/thirdparty/oidn/common/tensor.h
new file mode 100644
index 0000000000..48e7d1123d
--- /dev/null
+++ b/thirdparty/oidn/common/tensor.h
@@ -0,0 +1,66 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <vector>
+#include <map>
+
+namespace oidn {
+
+  template<typename T>
+  using shared_vector = std::shared_ptr<std::vector<T>>;
+
+  // Generic tensor
+  struct Tensor
+  {
+    float* data;
+    std::vector<int64_t> dims;
+    std::string format;
+    shared_vector<char> buffer; // optional, only for reference counting
+
+    __forceinline Tensor() : data(nullptr) {}
+
+    __forceinline Tensor(const std::vector<int64_t>& dims, const std::string& format)
+      : dims(dims),
+        format(format)
+    {
+      buffer = std::make_shared<std::vector<char>>(size() * sizeof(float));
+      data = (float*)buffer->data();
+    }
+
+    __forceinline operator bool() const { return data != nullptr; }
+
+    __forceinline int ndims() const { return (int)dims.size(); }
+
+    // Returns the number of values
+    __forceinline size_t size() const
+    {
+      size_t size = 1;
+      for (int i = 0; i < ndims(); ++i)
+        size *= dims[i];
+      return size;
+    }
+
+    __forceinline float& operator [](size_t i) { return data[i]; }
+    __forceinline const float& operator [](size_t i) const { return data[i]; }
+  };
+
+  // Parses tensors from a buffer
+  std::map<std::string, Tensor> parseTensors(void* buffer);
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/thread.cpp b/thirdparty/oidn/common/thread.cpp
new file mode 100644
index 0000000000..48c489c57b
--- /dev/null
+++ b/thirdparty/oidn/common/thread.cpp
@@ -0,0 +1,297 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#if defined(_MSC_VER)
+  #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+#if defined(__APPLE__)
+  #include <mach/thread_act.h>
+  #include <mach/mach_init.h>
+#endif
+
+#include "thread.h"
+#include <fstream>
+
+namespace oidn {
+
+#if defined(_WIN32)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Windows
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
+    pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
+    pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
+
+    if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
+    {
+      // Get logical processor information
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
+      DWORD bufferSize = 0;
+
+      // First call the function with an empty buffer to get the required buffer size
+      BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+      if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+      {
+        OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+        return;
+      }
+
+      // Allocate the buffer
+      buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
+      if (!buffer)
+      {
+        OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
+        return;
+      }
+
+      // Call again the function but now with the properly sized buffer
+      result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+      if (!result)
+      {
+        OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+        free(buffer);
+        return;
+      }
+
+      // Iterate over the logical processor information structures
+      // There should be one structure for each physical core
+      char* ptr = (char*)buffer;
+      while (ptr < (char*)buffer + bufferSize)
+      {
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
+        if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
+        {
+          // Iterate over the groups
+          int numThreads = 0;
+          for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
+          {
+            GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
+            while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
+            {
+              // Extract the next set bit/thread from the mask
+              GROUP_AFFINITY threadAffinity = coreAffinity;
+              threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
+
+              // Push the affinity for this thread
+              affinities.push_back(threadAffinity);
+              oldAffinities.push_back(threadAffinity);
+              numThreads++;
+
+              // Remove this bit/thread from the mask
+              coreAffinity.Mask ^= threadAffinity.Mask;
+            }
+          }
+        }
+
+        // Next structure
+        ptr += item->Size;
+      }
+
+      // Free the buffer
+      free(buffer);
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    // Save the current affinity and set the new one
+    const HANDLE thread = GetCurrentThread();
+    if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
+      OIDN_WARNING("SetThreadGroupAffinity failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    // Restore the original affinity
+    const HANDLE thread = GetCurrentThread();
+    if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
+      OIDN_WARNING("SetThreadGroupAffinity failed");
+  }
+
+#elif defined(__linux__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Linux
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    std::vector<int> threadIds;
+
+    // Parse the thread/CPU topology
+    for (int cpuId = 0; ; cpuId++)
+    {
+      std::fstream fs;
+      std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
+      fs.open(cpu.c_str(), std::fstream::in);
+      if (fs.fail()) break;
+
+      int i;
+      int j = 0;
+      while ((j < numThreadsPerCore) && (fs >> i))
+      {
+        if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
+          threadIds.push_back(i);
+
+        if (fs.peek() == ',')
+          fs.ignore();
+        j++;
+      }
+
+      fs.close();
+    }
+
+  #if 0
+    for (size_t i = 0; i < thread_ids.size(); ++i)
+      std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
+  #endif
+
+    // Create the affinity structures
+    affinities.resize(threadIds.size());
+    oldAffinities.resize(threadIds.size());
+
+    for (size_t i = 0; i < threadIds.size(); ++i)
+    {
+      cpu_set_t affinity;
+      CPU_ZERO(&affinity);
+      CPU_SET(threadIds[i], &affinity);
+
+      affinities[i] = affinity;
+      oldAffinities[i] = affinity;
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const pthread_t thread = pthread_self();
+
+    // Save the current affinity
+    if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+    {
+      OIDN_WARNING("pthread_getaffinity_np failed");
+      oldAffinities[threadIndex] = affinities[threadIndex];
+      return;
+    }
+
+    // Set the new affinity
+    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
+      OIDN_WARNING("pthread_setaffinity_np failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const pthread_t thread = pthread_self();
+
+    // Restore the original affinity
+    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+      OIDN_WARNING("pthread_setaffinity_np failed");
+  }
+
+#elif defined(__APPLE__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - macOS
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    // Query the thread/CPU topology
+    int numPhysicalCpus;
+    int numLogicalCpus;
+
+    if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
+    {
+      OIDN_WARNING("sysctlbyname failed");
+      return;
+    }
+
+    if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
+      return; // this shouldn't happen
+    const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
+
+    // Create the affinity structures
+    // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
+    // should be on the same core together
+    for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
+    {
+      thread_affinity_policy affinity;
+      affinity.affinity_tag = core;
+
+      for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
+      {
+        affinities.push_back(affinity);
+        oldAffinities.push_back(affinity);
+      }
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const auto thread = mach_thread_self();
+
+    // Save the current affinity
+    mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
+    boolean_t getDefault = FALSE;
+    if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
+    {
+      OIDN_WARNING("thread_policy_get failed");
+      oldAffinities[threadIndex] = affinities[threadIndex];
+      return;
+    }
+
+    // Set the new affinity
+    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+      OIDN_WARNING("thread_policy_set failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const auto thread = mach_thread_self();
+
+    // Restore the original affinity
+    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+      OIDN_WARNING("thread_policy_set failed");
+  }
+
+#endif
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/thread.h b/thirdparty/oidn/common/thread.h
new file mode 100644
index 0000000000..2c731367da
--- /dev/null
+++ b/thirdparty/oidn/common/thread.h
@@ -0,0 +1,202 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+
+#if !defined(_WIN32)
+  #include <pthread.h>
+  #include <sched.h>
+  #if defined(__APPLE__)
+    #include <mach/thread_policy.h>
+  #endif
+#endif
+
+#include <vector>
+#include <mutex>
+
+namespace oidn {
+
+  // --------------------------------------------------------------------------
+  // ThreadLocal
+  // --------------------------------------------------------------------------
+
+  // Wrapper which makes any variable thread-local
+  template<typename T>
+  class ThreadLocal : public Verbose
+  {
+  private:
+  #if defined(_WIN32)
+    DWORD key;
+  #else
+    pthread_key_t key;
+  #endif
+
+    std::vector<T*> instances;
+    std::mutex mutex;
+
+  public:
+    ThreadLocal(int verbose = 0)
+      : Verbose(verbose)
+    {
+    #if defined(_WIN32)
+      key = TlsAlloc();
+      if (key == TLS_OUT_OF_INDEXES)
+        OIDN_FATAL("TlsAlloc failed");
+    #else
+      if (pthread_key_create(&key, nullptr) != 0)
+        OIDN_FATAL("pthread_key_create failed");
+    #endif
+    }
+
+    ~ThreadLocal()
+    {
+      std::lock_guard<std::mutex> lock(mutex);
+      for (T* ptr : instances)
+        delete ptr;
+
+    #if defined(_WIN32)
+      if (!TlsFree(key))
+        OIDN_WARNING("TlsFree failed");
+    #else
+      if (pthread_key_delete(key) != 0)
+        OIDN_WARNING("pthread_key_delete failed");
+    #endif
+    }
+
+    T& get()
+    {
+    #if defined(_WIN32)
+      T* ptr = (T*)TlsGetValue(key);
+    #else
+      T* ptr = (T*)pthread_getspecific(key);
+    #endif
+
+      if (ptr)
+        return *ptr;
+
+      ptr = new T;
+      std::lock_guard<std::mutex> lock(mutex);
+      instances.push_back(ptr);
+
+    #if defined(_WIN32)
+      if (!TlsSetValue(key, ptr))
+        OIDN_FATAL("TlsSetValue failed");
+    #else
+      if (pthread_setspecific(key, ptr) != 0)
+        OIDN_FATAL("pthread_setspecific failed");
+    #endif
+
+      return *ptr;
+    }
+  };
+
+#if defined(_WIN32)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Windows
+  // --------------------------------------------------------------------------
+
+  class ThreadAffinity : public Verbose
+  {
+  private:
+    typedef BOOL (WINAPI *GetLogicalProcessorInformationExFunc)(LOGICAL_PROCESSOR_RELATIONSHIP,
+                                                                PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
+                                                                PDWORD);
+
+    typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE,
+                                                      CONST GROUP_AFFINITY*,
+                                                      PGROUP_AFFINITY);
+
+    GetLogicalProcessorInformationExFunc pGetLogicalProcessorInformationEx = nullptr;
+    SetThreadGroupAffinityFunc pSetThreadGroupAffinity = nullptr;
+
+    std::vector<GROUP_AFFINITY> affinities;    // thread affinities
+    std::vector<GROUP_AFFINITY> oldAffinities; // original thread affinities
+
+  public:
+    ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+    int getNumThreads() const
+    {
+      return (int)affinities.size();
+    }
+
+    // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+    void set(int threadIndex);
+
+    // Restores the affinity of the thread
+    void restore(int threadIndex);
+  };
+
+#elif defined(__linux__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Linux
+  // --------------------------------------------------------------------------
+
+  class ThreadAffinity : public Verbose
+  {
+  private:
+    std::vector<cpu_set_t> affinities;    // thread affinities
+    std::vector<cpu_set_t> oldAffinities; // original thread affinities
+
+  public:
+    ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+    int getNumThreads() const
+    {
+      return (int)affinities.size();
+    }
+
+    // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+    void set(int threadIndex);
+
+    // Restores the affinity of the thread
+    void restore(int threadIndex);
+  };
+
+#elif defined(__APPLE__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - macOS
+  // --------------------------------------------------------------------------
+
+  class ThreadAffinity : public Verbose
+  {
+  private:
+    std::vector<thread_affinity_policy> affinities;    // thread affinities
+    std::vector<thread_affinity_policy> oldAffinities; // original thread affinities
+
+  public:
+    ThreadAffinity(int numThreadsPerCore = INT_MAX, int verbose = 0);
+
+    int getNumThreads() const
+    {
+      return (int)affinities.size();
+    }
+
+    // Sets the affinity (0..numThreads-1) of the thread after saving the current affinity
+    void set(int threadIndex);
+
+    // Restores the affinity of the thread
+    void restore(int threadIndex);
+  };
+
+#endif
+
+} // namespace oidn
diff --git a/thirdparty/oidn/common/timer.h b/thirdparty/oidn/common/timer.h
new file mode 100644
index 0000000000..62aaaa1c33
--- /dev/null
+++ b/thirdparty/oidn/common/timer.h
@@ -0,0 +1,49 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#pragma once
+
+#include "platform.h"
+#include <chrono>
+
+namespace oidn {
+
+  class Timer
+  {
+  private:
+    using clock = std::chrono::high_resolution_clock;
+
+    std::chrono::time_point<clock> start;
+
+  public:
+    Timer()
+    {
+      reset();
+    }
+
+    void reset()
+    {
+      start = clock::now();
+    }
+
+    double query() const
+    {
+      auto end = clock::now();
+      return std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
+    }
+  };
+
+} // namespace oidn
author	Juan Linietsky <reduzio@gmail.com>	2020-05-01 09:34:23 -0300
committer	Juan Linietsky <reduzio@gmail.com>	2020-05-10 15:59:09 -0300
commit	1bea8e1eacc68bcedbd3f207395bccf11011dae2 (patch)
tree	b75303a69491978c1e13360a3e6f355c5234dfe0 /thirdparty/oidn/common
parent	6a0473bcc23c096ef9ee929632a209761c2668f6 (diff)