// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "thread.h" #include "sysinfo.h" #include "string.h" #include <iostream> #if defined(__ARM_NEON) #include "../simd/arm/emulation.h" #else #include <xmmintrin.h> #endif #if defined(PTHREADS_WIN32) #pragma comment (lib, "pthreadVC.lib") #endif //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__WIN32__) #define WIN32_LEAN_AND_MEAN #include <windows.h> namespace embree { /*! set the affinity of a given thread */ void setAffinity(HANDLE thread, ssize_t affinity) { typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); HMODULE hlib = LoadLibrary("Kernel32"); GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount"); SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity"); SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx"); if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) { int groups = pGetActiveProcessorGroupCount(); int totalProcessors = 0, group = 0, number = 0; for (int i = 0; i<groups; i++) { int processors = pGetActiveProcessorCount(i); if (totalProcessors + processors > affinity) { group = i; number = (int)affinity - totalProcessors; break; } totalProcessors += processors; } GROUP_AFFINITY groupAffinity; groupAffinity.Group = (WORD)group; groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); groupAffinity.Reserved[0] = 0; groupAffinity.Reserved[1] = 0; groupAffinity.Reserved[2] = 0; if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning PROCESSOR_NUMBER processorNumber; processorNumber.Group = group; processorNumber.Number = number; processorNumber.Reserved = 0; if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning } else { if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) WARNING("SetThreadAffinityMask failed"); // on purpose only a warning if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning } } /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { setAffinity(GetCurrentThread(), affinity); } struct ThreadStartupData { public: ThreadStartupData (thread_func f, void* arg) : f(f), arg(arg) {} public: thread_func f; void* arg; }; DWORD WINAPI threadStartup(LPVOID ptr) { ThreadStartupData* parg = (ThreadStartupData*) ptr; _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); parg->f(parg->arg); delete parg; return 0; } #if !defined(PTHREADS_WIN32) /*! creates a hardware thread running on specific core */ thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) { HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr); if (thread == nullptr) FATAL("CreateThread failed"); if (threadID >= 0) setAffinity(thread, threadID); return thread_t(thread); } /*! the thread calling this function gets yielded */ void yield() { SwitchToThread(); } /*! waits until the given thread has terminated */ void join(thread_t tid) { WaitForSingleObject(HANDLE(tid), INFINITE); CloseHandle(HANDLE(tid)); } /*! destroy a hardware thread by its handle */ void destroyThread(thread_t tid) { TerminateThread(HANDLE(tid),0); CloseHandle(HANDLE(tid)); } /*! creates thread local storage */ tls_t createTls() { return tls_t(size_t(TlsAlloc())); } /*! set the thread local storage pointer */ void setTls(tls_t tls, void* const ptr) { TlsSetValue(DWORD(size_t(tls)), ptr); } /*! return the thread local storage pointer */ void* getTls(tls_t tls) { return TlsGetValue(DWORD(size_t(tls))); } /*! destroys thread local storage identifier */ void destroyTls(tls_t tls) { TlsFree(DWORD(size_t(tls))); } #endif } #endif //////////////////////////////////////////////////////////////////////////////// /// Linux Platform //////////////////////////////////////////////////////////////////////////////// // -- GODOT start -- #if defined(__LINUX__) && !defined(__ANDROID__) // -- GODOT end -- #include <fstream> #include <sstream> #include <algorithm> namespace embree { static MutexSys mutex; static std::vector<size_t> threadIDs; /* changes thread ID mapping such that we first fill up all thread on one core */ size_t mapThreadID(size_t threadID) { Lock<MutexSys> lock(mutex); if (threadIDs.size() == 0) { /* parse thread/CPU topology */ for (size_t cpuID=0;;cpuID++) { std::fstream fs; std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list"); fs.open (cpu.c_str(), std::fstream::in); if (fs.fail()) break; int i; while (fs >> i) { if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; })) threadIDs.push_back(i); if (fs.peek() == ',') fs.ignore(); } fs.close(); } #if 0 for (size_t i=0;i<threadIDs.size();i++) std::cout << i << " -> " << threadIDs[i] << std::endl; #endif /* verify the mapping and do not use it if the mapping has errors */ for (size_t i=0;i<threadIDs.size();i++) { for (size_t j=0;j<threadIDs.size();j++) { if (i != j && threadIDs[i] == threadIDs[j]) { threadIDs.clear(); } } } } /* re-map threadIDs if mapping is available */ size_t ID = threadID; if (threadID < threadIDs.size()) ID = threadIDs[threadID]; /* find correct thread to affinitize to */ cpu_set_t set; if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) { for (int i=0, j=0; i<CPU_SETSIZE; i++) { if (!CPU_ISSET(i,&set)) continue; if (j == ID) { ID = i; break; } j++; } } return ID; } /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { cpu_set_t cset; CPU_ZERO(&cset); size_t threadID = mapThreadID(affinity); CPU_SET(threadID, &cset); pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); } } #endif // -- GODOT start -- //////////////////////////////////////////////////////////////////////////////// /// Android Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__ANDROID__) namespace embree { /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { cpu_set_t cset; CPU_ZERO(&cset); CPU_SET(affinity, &cset); sched_setaffinity(0, sizeof(cset), &cset); } } #endif // -- GODOT end -- //////////////////////////////////////////////////////////////////////////////// /// FreeBSD Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__FreeBSD__) #include <pthread_np.h> namespace embree { /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { cpuset_t cset; CPU_ZERO(&cset); CPU_SET(affinity, &cset); pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); } } #endif //////////////////////////////////////////////////////////////////////////////// /// MacOSX Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__MACOSX__) #include <mach/thread_act.h> #include <mach/thread_policy.h> #include <mach/mach_init.h> namespace embree { /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip thread_affinity_policy ap; ap.affinity_tag = affinity; if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) WARNING("setting thread affinity failed"); // on purpose only a warning #endif } } #endif //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// #if defined(__UNIX__) || defined(PTHREADS_WIN32) #include <pthread.h> #include <sched.h> #if defined(__USE_NUMA__) #include <numa.h> #endif namespace embree { struct ThreadStartupData { public: ThreadStartupData (thread_func f, void* arg, int affinity) : f(f), arg(arg), affinity(affinity) {} public: thread_func f; void* arg; ssize_t affinity; }; static void* threadStartup(ThreadStartupData* parg) { _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); /*! Mac OS X does not support setting affinity at thread creation time */ #if defined(__MACOSX__) if (parg->affinity >= 0) setAffinity(parg->affinity); #endif parg->f(parg->arg); delete parg; return nullptr; } /*! creates a hardware thread running on specific core */ thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) { /* set stack size */ pthread_attr_t attr; pthread_attr_init(&attr); if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size); /* create thread */ pthread_t* tid = new pthread_t; if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) { pthread_attr_destroy(&attr); delete tid; FATAL("pthread_create failed"); } pthread_attr_destroy(&attr); /* set affinity */ // -- GODOT start -- #if defined(__LINUX__) && !defined(__ANDROID__) // -- GODOT end -- if (threadID >= 0) { cpu_set_t cset; CPU_ZERO(&cset); threadID = mapThreadID(threadID); CPU_SET(threadID, &cset); pthread_setaffinity_np(*tid, sizeof(cset), &cset); } #elif defined(__FreeBSD__) if (threadID >= 0) { cpuset_t cset; CPU_ZERO(&cset); CPU_SET(threadID, &cset); pthread_setaffinity_np(*tid, sizeof(cset), &cset); } // -- GODOT start -- #elif defined(__ANDROID__) if (threadID >= 0) { cpu_set_t cset; CPU_ZERO(&cset); CPU_SET(threadID, &cset); sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); } #endif // -- GODOT end -- return thread_t(tid); } /*! the thread calling this function gets yielded */ void yield() { sched_yield(); } /*! waits until the given thread has terminated */ void join(thread_t tid) { if (pthread_join(*(pthread_t*)tid, nullptr) != 0) FATAL("pthread_join failed"); delete (pthread_t*)tid; } /*! destroy a hardware thread by its handle */ void destroyThread(thread_t tid) { // -- GODOT start -- #if defined(__ANDROID__) FATAL("Can't destroy threads on Android."); #else pthread_cancel(*(pthread_t*)tid); delete (pthread_t*)tid; #endif // -- GODOT end -- } /*! creates thread local storage */ tls_t createTls() { pthread_key_t* key = new pthread_key_t; if (pthread_key_create(key,nullptr) != 0) { delete key; FATAL("pthread_key_create failed"); } return tls_t(key); } /*! return the thread local storage pointer */ void* getTls(tls_t tls) { assert(tls); return pthread_getspecific(*(pthread_key_t*)tls); } /*! set the thread local storage pointer */ void setTls(tls_t tls, void* const ptr) { assert(tls); if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0) FATAL("pthread_setspecific failed"); } /*! destroys thread local storage identifier */ void destroyTls(tls_t tls) { assert(tls); if (pthread_key_delete(*(pthread_key_t*)tls) != 0) FATAL("pthread_key_delete failed"); delete (pthread_key_t*)tls; } } #endif