AimRT/_deps/tbb-src/test/common/utils_concurrency_limit.h
2025-01-12 20:43:08 +08:00

332 lines
9.6 KiB
C++

/*
Copyright (c) 2020-2023 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef __TBB_test_common_utils_concurrency_limit_H
#define __TBB_test_common_utils_concurrency_limit_H
#include "config.h"
#include "utils_assert.h"
#include "utils_report.h"
#include "oneapi/tbb/task_arena.h"
#include "oneapi/tbb/task_scheduler_observer.h"
#include "oneapi/tbb/enumerable_thread_specific.h"
#include <cstddef>
#include <vector>
#include <algorithm>
#if _WIN32 || _WIN64
#include <windows.h>
#elif __unix__
#include <unistd.h>
#if __linux__
#include <sys/sysinfo.h>
#endif
#include <string.h>
#include <sched.h>
#if __FreeBSD__
#include <errno.h>
#include <sys/param.h>
#include <sys/cpuset.h>
#endif
#endif
#include <thread>
namespace utils {
using thread_num_type = std::size_t;
inline thread_num_type get_platform_max_threads() {
static thread_num_type platform_max_threads = tbb::this_task_arena::max_concurrency();
return platform_max_threads;
}
inline std::vector<thread_num_type> concurrency_range(thread_num_type max_threads) {
std::vector<thread_num_type> threads_range;
thread_num_type step = 1;
for(thread_num_type thread_num = 1; thread_num <= max_threads; thread_num += step++)
threads_range.push_back(thread_num);
if(threads_range.back() != max_threads)
threads_range.push_back(max_threads);
// rotate in order to make threads_range non-monotonic
std::rotate(threads_range.begin(), threads_range.begin() + threads_range.size()/2, threads_range.end());
return threads_range;
}
inline std::vector<thread_num_type> concurrency_range() {
static std::vector<thread_num_type> threads_range = concurrency_range(get_platform_max_threads());
return threads_range;
}
#if !__TBB_TEST_SKIP_AFFINITY
static int maxProcs = 0;
static int get_max_procs() {
if (!maxProcs) {
#if _WIN32||_WIN64
DWORD_PTR pam, sam, m = 1;
GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
int nproc = 0;
for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
if ( pam & m )
++nproc;
}
maxProcs = nproc;
#elif __linux__
cpu_set_t mask;
int result = 0;
sched_getaffinity(0, sizeof(cpu_set_t), &mask);
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < nproc; ++i) {
if (CPU_ISSET(i, &mask)) ++result;
}
maxProcs = result;
#else // FreeBSD
maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
return maxProcs;
}
int get_start_affinity_process() {
#if __linux__
cpu_set_t mask;
sched_getaffinity(0, sizeof(cpu_set_t), &mask);
int result = -1;
int nproc = sysconf(_SC_NPROCESSORS_ONLN);
for (int i = 0; i < nproc; ++i) {
if (CPU_ISSET(i, &mask)) {
result = i;
break;
}
}
ASSERT(result != -1, nullptr);
return result;
#else
// TODO: add affinity support for Windows and FreeBSD
return 0;
#endif
}
int limit_number_of_threads( int max_threads ) {
ASSERT(max_threads >= 1,"The limited number of threads should be positive");
maxProcs = get_max_procs();
if (maxProcs < max_threads) {
// Suppose that process mask is not set so the number of available threads equals maxProcs
return maxProcs;
}
#if _WIN32 || _WIN64
ASSERT(max_threads <= 64, "LimitNumberOfThreads doesn't support max_threads to be more than 64 on Windows");
DWORD_PTR mask = 1;
for (int i = 1; i < max_threads; ++i) {
mask |= mask << 1;
}
bool err = !SetProcessAffinityMask(GetCurrentProcess(), mask);
#else
#if __linux__
using mask_t = cpu_set_t;
#define setaffinity(mask) sched_setaffinity(getpid(), sizeof(mask_t), &mask)
#else /*__FreeBSD*/
using mask_t = cpuset_t;
#define setaffinity(mask) cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(mask_t), &mask)
#endif
mask_t new_mask;
CPU_ZERO(&new_mask);
int mask_size = int(sizeof(mask_t) * CHAR_BIT);
if ( mask_size < maxProcs ) {
REPORT("The mask size doesn't seem to be big enough to call setaffinity. The call may return an error.");
}
ASSERT(max_threads <= int(sizeof(mask_t) * CHAR_BIT), "The mask size is not enough to set the requested number of threads.");
int st = get_start_affinity_process();
for (int i = st; i < st + max_threads; ++i) {
CPU_SET(i, &new_mask);
}
int err = setaffinity(new_mask);
#endif
ASSERT(!err, "Setting process affinity failed");
return max_threads;
}
#endif // __TBB_TEST_SKIP_AFFINITY
// TODO: consider using cpuset_setaffinity/sched_getaffinity on FreeBSD to enable the functionality
#define OS_AFFINITY_SYSCALL_PRESENT (__linux__ && !__ANDROID__)
#if OS_AFFINITY_SYSCALL_PRESENT
void get_thread_affinity_mask(std::size_t& ncpus, std::vector<int>& free_indexes) {
cpu_set_t* mask = nullptr;
ncpus = sizeof(cpu_set_t) * CHAR_BIT;
do {
mask = CPU_ALLOC(ncpus);
if (!mask) break;
const size_t size = CPU_ALLOC_SIZE(ncpus);
CPU_ZERO_S(size, mask);
const int err = sched_getaffinity(0, size, mask);
if (!err) break;
CPU_FREE(mask);
mask = nullptr;
if (errno != EINVAL) break;
ncpus <<= 1;
} while (ncpus < 16 * 1024 /* some reasonable limit */ );
ASSERT(mask, "Failed to obtain process affinity mask.");
const size_t size = CPU_ALLOC_SIZE(ncpus);
const int num_cpus = CPU_COUNT_S(size, mask);
for (int i = 0; i < num_cpus; ++i) {
if (CPU_ISSET_S(i, size, mask)) {
free_indexes.push_back(i);
}
}
CPU_FREE(mask);
}
void pin_thread_imp(std::size_t ncpus, std::vector<int>& free_indexes, std::atomic<int>& curr_idx) {
const size_t size = CPU_ALLOC_SIZE(ncpus);
ASSERT(free_indexes.size() > 0, nullptr);
int mapped_idx = free_indexes[curr_idx++ % free_indexes.size()];
cpu_set_t *target_mask = CPU_ALLOC(ncpus);
ASSERT(target_mask, nullptr);
CPU_ZERO_S(size, target_mask);
CPU_SET_S(mapped_idx, size, target_mask);
const int err = sched_setaffinity(0, size, target_mask);
ASSERT(err == 0, "Failed to set thread affinity");
CPU_FREE(target_mask);
}
#endif
class thread_pinner {
public:
thread_pinner() {
tbb::detail::suppress_unused_warning(thread_index);
#if OS_AFFINITY_SYSCALL_PRESENT
get_thread_affinity_mask(ncpus, free_indexes);
#endif
}
void pin_thread() {
#if OS_AFFINITY_SYSCALL_PRESENT
pin_thread_imp(ncpus, free_indexes, thread_index);
#endif
}
private:
#if OS_AFFINITY_SYSCALL_PRESENT
std::size_t ncpus;
std::vector<int> free_indexes{};
#endif
std::atomic<int> thread_index{};
};
class pinning_observer : public tbb::task_scheduler_observer {
thread_pinner pinner;
tbb::enumerable_thread_specific<bool> register_threads;
public:
pinning_observer(tbb::task_arena& arena) : tbb::task_scheduler_observer(arena), pinner() {
observe(true);
}
void on_scheduler_entry( bool ) override {
bool& is_pinned = register_threads.local();
if (is_pinned) return;
pinner.pin_thread();
is_pinned = true;
}
~pinning_observer() {
observe(false);
}
};
#if __unix__
#include <sched.h>
#endif
bool can_change_thread_priority() {
#if __unix__
pthread_t this_thread = pthread_self();
sched_param old_params;
int old_policy;
int err = pthread_getschedparam(this_thread, &old_policy, &old_params);
ASSERT(err == 0, nullptr);
sched_param params;
params.sched_priority = sched_get_priority_max(SCHED_FIFO);
ASSERT(params.sched_priority != -1, nullptr);
err = pthread_setschedparam(this_thread, SCHED_FIFO, &params);
if (err == 0) {
err = pthread_setschedparam(this_thread, old_policy, &old_params);
ASSERT(err == 0, nullptr);
}
return err == 0;
#endif
return false;
}
#if __unix__
class increased_priority_guard {
public:
increased_priority_guard() : m_backup(get_current_schedparam()) {
increase_thread_priority();
}
~increased_priority_guard() {
// restore priority on destruction
pthread_t this_thread = pthread_self();
int err = pthread_setschedparam(this_thread,
/*policy*/ m_backup.first, /*sched_param*/ &m_backup.second);
ASSERT(err == 0, nullptr);
}
private:
std::pair<int, sched_param> get_current_schedparam() {
pthread_t this_thread = pthread_self();
sched_param params;
int policy = 0;
int err = pthread_getschedparam(this_thread, &policy, &params);
ASSERT(err == 0, nullptr);
return std::make_pair(policy, params);
}
void increase_thread_priority() {
pthread_t this_thread = pthread_self();
sched_param params;
params.sched_priority = sched_get_priority_max(SCHED_FIFO);
ASSERT(params.sched_priority != -1, nullptr);
int err = pthread_setschedparam(this_thread, SCHED_FIFO, &params);
ASSERT(err == 0, "Can not change thread priority.");
}
std::pair<int, sched_param> m_backup;
};
#else
class increased_priority_guard{};
#endif
} // namespace utils
#endif // __TBB_test_common_utils_concurrency_limit_H