diff options
Diffstat (limited to 'thirdparty/oidn/common/thread.cpp')
| -rw-r--r-- | thirdparty/oidn/common/thread.cpp | 297 | 
1 files changed, 297 insertions, 0 deletions
diff --git a/thirdparty/oidn/common/thread.cpp b/thirdparty/oidn/common/thread.cpp new file mode 100644 index 0000000000..48c489c57b --- /dev/null +++ b/thirdparty/oidn/common/thread.cpp @@ -0,0 +1,297 @@ +// ======================================================================== // +// Copyright 2009-2019 Intel Corporation                                    // +//                                                                          // +// Licensed under the Apache License, Version 2.0 (the "License");          // +// you may not use this file except in compliance with the License.         // +// You may obtain a copy of the License at                                  // +//                                                                          // +//     http://www.apache.org/licenses/LICENSE-2.0                           // +//                                                                          // +// Unless required by applicable law or agreed to in writing, software      // +// distributed under the License is distributed on an "AS IS" BASIS,        // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and      // +// limitations under the License.                                           // +// ======================================================================== // + +#if defined(_MSC_VER) +  #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned +#endif + +#if defined(__APPLE__) +  #include <mach/thread_act.h> +  #include <mach/mach_init.h> +#endif + +#include "thread.h" +#include <fstream> + +namespace oidn { + +#if defined(_WIN32) + +  // -------------------------------------------------------------------------- +  // ThreadAffinity - Windows +  // -------------------------------------------------------------------------- + +  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) +    : Verbose(verbose) +  { +    HMODULE hLib = GetModuleHandle(TEXT("kernel32")); +    pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx"); +    pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity"); + +    if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity) +    { +      // Get logical processor information +      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr; +      DWORD bufferSize = 0; + +      // First call the function with an empty buffer to get the required buffer size +      BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); +      if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER) +      { +        OIDN_WARNING("GetLogicalProcessorInformationEx failed"); +        return; +      } + +      // Allocate the buffer +      buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize); +      if (!buffer) +      { +        OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed"); +        return; +      } + +      // Call again the function but now with the properly sized buffer +      result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); +      if (!result) +      { +        OIDN_WARNING("GetLogicalProcessorInformationEx failed"); +        free(buffer); +        return; +      } + +      // Iterate over the logical processor information structures +      // There should be one structure for each physical core +      char* ptr = (char*)buffer; +      while (ptr < (char*)buffer + bufferSize) +      { +        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr; +        if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0) +        { +          // Iterate over the groups +          int numThreads = 0; +          for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group) +          { +            GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group]; +            while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore)) +            { +              // Extract the next set bit/thread from the mask +              GROUP_AFFINITY threadAffinity = coreAffinity; +              threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask; + +              // Push the affinity for this thread +              affinities.push_back(threadAffinity); +              oldAffinities.push_back(threadAffinity); +              numThreads++; + +              // Remove this bit/thread from the mask +              coreAffinity.Mask ^= threadAffinity.Mask; +            } +          } +        } + +        // Next structure +        ptr += item->Size; +      } + +      // Free the buffer +      free(buffer); +    } +  } + +  void ThreadAffinity::set(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    // Save the current affinity and set the new one +    const HANDLE thread = GetCurrentThread(); +    if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex])) +      OIDN_WARNING("SetThreadGroupAffinity failed"); +  } + +  void ThreadAffinity::restore(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    // Restore the original affinity +    const HANDLE thread = GetCurrentThread(); +    if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr)) +      OIDN_WARNING("SetThreadGroupAffinity failed"); +  } + +#elif defined(__linux__) + +  // -------------------------------------------------------------------------- +  // ThreadAffinity - Linux +  // -------------------------------------------------------------------------- + +  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) +    : Verbose(verbose) +  { +    std::vector<int> threadIds; + +    // Parse the thread/CPU topology +    for (int cpuId = 0; ; cpuId++) +    { +      std::fstream fs; +      std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list"); +      fs.open(cpu.c_str(), std::fstream::in); +      if (fs.fail()) break; + +      int i; +      int j = 0; +      while ((j < numThreadsPerCore) && (fs >> i)) +      { +        if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; })) +          threadIds.push_back(i); + +        if (fs.peek() == ',') +          fs.ignore(); +        j++; +      } + +      fs.close(); +    } + +  #if 0 +    for (size_t i = 0; i < thread_ids.size(); ++i) +      std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl; +  #endif + +    // Create the affinity structures +    affinities.resize(threadIds.size()); +    oldAffinities.resize(threadIds.size()); + +    for (size_t i = 0; i < threadIds.size(); ++i) +    { +      cpu_set_t affinity; +      CPU_ZERO(&affinity); +      CPU_SET(threadIds[i], &affinity); + +      affinities[i] = affinity; +      oldAffinities[i] = affinity; +    } +  } + +  void ThreadAffinity::set(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    const pthread_t thread = pthread_self(); + +    // Save the current affinity +    if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) +    { +      OIDN_WARNING("pthread_getaffinity_np failed"); +      oldAffinities[threadIndex] = affinities[threadIndex]; +      return; +    } + +    // Set the new affinity +    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0) +      OIDN_WARNING("pthread_setaffinity_np failed"); +  } + +  void ThreadAffinity::restore(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    const pthread_t thread = pthread_self(); + +    // Restore the original affinity +    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) +      OIDN_WARNING("pthread_setaffinity_np failed"); +  } + +#elif defined(__APPLE__) + +  // -------------------------------------------------------------------------- +  // ThreadAffinity - macOS +  // -------------------------------------------------------------------------- + +  ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) +    : Verbose(verbose) +  { +    // Query the thread/CPU topology +    int numPhysicalCpus; +    int numLogicalCpus; + +    if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus)) +    { +      OIDN_WARNING("sysctlbyname failed"); +      return; +    } + +    if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1)) +      return; // this shouldn't happen +    const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus; + +    // Create the affinity structures +    // macOS doesn't support binding a thread to a specific core, but we can at least group threads which +    // should be on the same core together +    for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1! +    { +      thread_affinity_policy affinity; +      affinity.affinity_tag = core; + +      for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread) +      { +        affinities.push_back(affinity); +        oldAffinities.push_back(affinity); +      } +    } +  } + +  void ThreadAffinity::set(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    const auto thread = mach_thread_self(); + +    // Save the current affinity +    mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT; +    boolean_t getDefault = FALSE; +    if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS) +    { +      OIDN_WARNING("thread_policy_get failed"); +      oldAffinities[threadIndex] = affinities[threadIndex]; +      return; +    } + +    // Set the new affinity +    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) +      OIDN_WARNING("thread_policy_set failed"); +  } + +  void ThreadAffinity::restore(int threadIndex) +  { +    if (threadIndex >= (int)affinities.size()) +      return; + +    const auto thread = mach_thread_self(); + +    // Restore the original affinity +    if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) +      OIDN_WARNING("thread_policy_set failed"); +  } + +#endif + +} // namespace oidn  |