1 files changed, 297 insertions, 0 deletions
diff --git a/thirdparty/oidn/common/thread.cpp b/thirdparty/oidn/common/thread.cpp
new file mode 100644
index 0000000000..48c489c57b
--- /dev/null
+++ b/thirdparty/oidn/common/thread.cpp
@@ -0,0 +1,297 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#if defined(_MSC_VER)
+  #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
+#endif
+
+#if defined(__APPLE__)
+  #include <mach/thread_act.h>
+  #include <mach/mach_init.h>
+#endif
+
+#include "thread.h"
+#include <fstream>
+
+namespace oidn {
+
+#if defined(_WIN32)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Windows
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
+    pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
+    pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
+
+    if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
+    {
+      // Get logical processor information
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
+      DWORD bufferSize = 0;
+
+      // First call the function with an empty buffer to get the required buffer size
+      BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+      if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+      {
+        OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+        return;
+      }
+
+      // Allocate the buffer
+      buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
+      if (!buffer)
+      {
+        OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
+        return;
+      }
+
+      // Call again the function but now with the properly sized buffer
+      result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
+      if (!result)
+      {
+        OIDN_WARNING("GetLogicalProcessorInformationEx failed");
+        free(buffer);
+        return;
+      }
+
+      // Iterate over the logical processor information structures
+      // There should be one structure for each physical core
+      char* ptr = (char*)buffer;
+      while (ptr < (char*)buffer + bufferSize)
+      {
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
+        if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
+        {
+          // Iterate over the groups
+          int numThreads = 0;
+          for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
+          {
+            GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
+            while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
+            {
+              // Extract the next set bit/thread from the mask
+              GROUP_AFFINITY threadAffinity = coreAffinity;
+              threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
+
+              // Push the affinity for this thread
+              affinities.push_back(threadAffinity);
+              oldAffinities.push_back(threadAffinity);
+              numThreads++;
+
+              // Remove this bit/thread from the mask
+              coreAffinity.Mask ^= threadAffinity.Mask;
+            }
+          }
+        }
+
+        // Next structure
+        ptr += item->Size;
+      }
+
+      // Free the buffer
+      free(buffer);
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    // Save the current affinity and set the new one
+    const HANDLE thread = GetCurrentThread();
+    if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
+      OIDN_WARNING("SetThreadGroupAffinity failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    // Restore the original affinity
+    const HANDLE thread = GetCurrentThread();
+    if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
+      OIDN_WARNING("SetThreadGroupAffinity failed");
+  }
+
+#elif defined(__linux__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - Linux
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    std::vector<int> threadIds;
+
+    // Parse the thread/CPU topology
+    for (int cpuId = 0; ; cpuId++)
+    {
+      std::fstream fs;
+      std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
+      fs.open(cpu.c_str(), std::fstream::in);
+      if (fs.fail()) break;
+
+      int i;
+      int j = 0;
+      while ((j < numThreadsPerCore) && (fs >> i))
+      {
+        if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
+          threadIds.push_back(i);
+
+        if (fs.peek() == ',')
+          fs.ignore();
+        j++;
+      }
+
+      fs.close();
+    }
+
+  #if 0
+    for (size_t i = 0; i < thread_ids.size(); ++i)
+      std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
+  #endif
+
+    // Create the affinity structures
+    affinities.resize(threadIds.size());
+    oldAffinities.resize(threadIds.size());
+
+    for (size_t i = 0; i < threadIds.size(); ++i)
+    {
+      cpu_set_t affinity;
+      CPU_ZERO(&affinity);
+      CPU_SET(threadIds[i], &affinity);
+
+      affinities[i] = affinity;
+      oldAffinities[i] = affinity;
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const pthread_t thread = pthread_self();
+
+    // Save the current affinity
+    if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+    {
+      OIDN_WARNING("pthread_getaffinity_np failed");
+      oldAffinities[threadIndex] = affinities[threadIndex];
+      return;
+    }
+
+    // Set the new affinity
+    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
+      OIDN_WARNING("pthread_setaffinity_np failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const pthread_t thread = pthread_self();
+
+    // Restore the original affinity
+    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
+      OIDN_WARNING("pthread_setaffinity_np failed");
+  }
+
+#elif defined(__APPLE__)
+
+  // --------------------------------------------------------------------------
+  // ThreadAffinity - macOS
+  // --------------------------------------------------------------------------
+
+  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
+    : Verbose(verbose)
+  {
+    // Query the thread/CPU topology
+    int numPhysicalCpus;
+    int numLogicalCpus;
+
+    if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
+    {
+      OIDN_WARNING("sysctlbyname failed");
+      return;
+    }
+
+    if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
+      return; // this shouldn't happen
+    const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
+
+    // Create the affinity structures
+    // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
+    // should be on the same core together
+    for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
+    {
+      thread_affinity_policy affinity;
+      affinity.affinity_tag = core;
+
+      for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
+      {
+        affinities.push_back(affinity);
+        oldAffinities.push_back(affinity);
+      }
+    }
+  }
+
+  void ThreadAffinity::set(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const auto thread = mach_thread_self();
+
+    // Save the current affinity
+    mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
+    boolean_t getDefault = FALSE;
+    if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
+    {
+      OIDN_WARNING("thread_policy_get failed");
+      oldAffinities[threadIndex] = affinities[threadIndex];
+      return;
+    }
+
+    // Set the new affinity
+    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+      OIDN_WARNING("thread_policy_set failed");
+  }
+
+  void ThreadAffinity::restore(int threadIndex)
+  {
+    if (threadIndex >= (int)affinities.size())
+      return;
+
+    const auto thread = mach_thread_self();
+
+    // Restore the original affinity
+    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
+      OIDN_WARNING("thread_policy_set failed");
+  }
+
+#endif
+
+} // namespace oidn