13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
20 #if KMP_AFFINITY_SUPPORTED
22 class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 Mask(
const Mask &other) =
delete;
33 Mask &operator=(
const Mask &other) =
delete;
34 ~Mask() { hwloc_bitmap_free(mask); }
35 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
36 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
37 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
38 void zero()
override { hwloc_bitmap_zero(mask); }
39 bool empty()
const override {
return hwloc_bitmap_iszero(mask); }
40 void copy(
const KMPAffinity::Mask *src)
override {
41 const Mask *convert =
static_cast<const Mask *
>(src);
42 hwloc_bitmap_copy(mask, convert->mask);
44 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
45 const Mask *convert =
static_cast<const Mask *
>(rhs);
46 hwloc_bitmap_and(mask, mask, convert->mask);
48 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
49 const Mask *convert =
static_cast<const Mask *
>(rhs);
50 hwloc_bitmap_or(mask, mask, convert->mask);
52 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
53 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
54 const Mask *convert =
static_cast<const Mask *
>(rhs);
55 return hwloc_bitmap_isequal(mask, convert->mask);
57 int begin()
const override {
return hwloc_bitmap_first(mask); }
58 int end()
const override {
return -1; }
59 int next(
int previous)
const override {
60 return hwloc_bitmap_next(mask, previous);
62 int get_system_affinity(
bool abort_on_error)
override {
63 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
64 "Illegal get affinity operation when not capable");
66 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
72 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_get_cpubind()"),
73 KMP_ERR(error), __kmp_msg_null);
77 int set_system_affinity(
bool abort_on_error)
const override {
78 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
79 "Illegal set affinity operation when not capable");
81 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
87 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
88 KMP_ERR(error), __kmp_msg_null);
93 int set_process_affinity(
bool abort_on_error)
const override {
94 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
95 "Illegal set process affinity operation when not capable");
97 const hwloc_topology_support *support =
98 hwloc_topology_get_support(__kmp_hwloc_topology);
99 if (support->cpubind->set_proc_cpubind) {
101 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
102 HWLOC_CPUBIND_PROCESS);
107 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
108 KMP_ERR(error), __kmp_msg_null);
113 int get_proc_group()
const override {
116 if (__kmp_num_proc_groups == 1) {
119 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
121 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
122 unsigned long second_32_bits =
123 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
124 if (first_32_bits == 0 && second_32_bits == 0) {
136 void determine_capable(
const char *var)
override {
137 const hwloc_topology_support *topology_support;
138 if (__kmp_hwloc_topology == NULL) {
139 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
140 __kmp_hwloc_error = TRUE;
141 if (__kmp_affinity.flags.verbose) {
142 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
145 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
146 __kmp_hwloc_error = TRUE;
147 if (__kmp_affinity.flags.verbose) {
148 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
152 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
157 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
158 topology_support->cpubind->get_thisthread_cpubind &&
159 topology_support->discovery->pu && !__kmp_hwloc_error) {
161 KMP_AFFINITY_ENABLE(TRUE);
164 __kmp_hwloc_error = TRUE;
165 KMP_AFFINITY_DISABLE();
168 void bind_thread(
int which)
override {
169 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
170 "Illegal set affinity operation when not capable");
171 KMPAffinity::Mask *mask;
172 KMP_CPU_ALLOC_ON_STACK(mask);
174 KMP_CPU_SET(which, mask);
175 __kmp_set_system_affinity(mask, TRUE);
176 KMP_CPU_FREE_FROM_STACK(mask);
178 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
179 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
180 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
181 return new Mask[num];
183 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
184 Mask *hwloc_array =
static_cast<Mask *
>(array);
185 delete[] hwloc_array;
187 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
188 int index)
override {
189 Mask *hwloc_array =
static_cast<Mask *
>(array);
190 return &(hwloc_array[index]);
192 api_type get_api_type()
const override {
return HWLOC; }
196 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
203 #include <sys/syscall.h>
204 #if KMP_ARCH_X86 || KMP_ARCH_ARM
205 #ifndef __NR_sched_setaffinity
206 #define __NR_sched_setaffinity 241
207 #elif __NR_sched_setaffinity != 241
208 #error Wrong code for setaffinity system call.
210 #ifndef __NR_sched_getaffinity
211 #define __NR_sched_getaffinity 242
212 #elif __NR_sched_getaffinity != 242
213 #error Wrong code for getaffinity system call.
215 #elif KMP_ARCH_AARCH64
216 #ifndef __NR_sched_setaffinity
217 #define __NR_sched_setaffinity 122
218 #elif __NR_sched_setaffinity != 122
219 #error Wrong code for setaffinity system call.
221 #ifndef __NR_sched_getaffinity
222 #define __NR_sched_getaffinity 123
223 #elif __NR_sched_getaffinity != 123
224 #error Wrong code for getaffinity system call.
226 #elif KMP_ARCH_RISCV64
227 #ifndef __NR_sched_setaffinity
228 #define __NR_sched_setaffinity 122
229 #elif __NR_sched_setaffinity != 122
230 #error Wrong code for setaffinity system call.
232 #ifndef __NR_sched_getaffinity
233 #define __NR_sched_getaffinity 123
234 #elif __NR_sched_getaffinity != 123
235 #error Wrong code for getaffinity system call.
237 #elif KMP_ARCH_X86_64
238 #ifndef __NR_sched_setaffinity
239 #define __NR_sched_setaffinity 203
240 #elif __NR_sched_setaffinity != 203
241 #error Wrong code for setaffinity system call.
243 #ifndef __NR_sched_getaffinity
244 #define __NR_sched_getaffinity 204
245 #elif __NR_sched_getaffinity != 204
246 #error Wrong code for getaffinity system call.
249 #ifndef __NR_sched_setaffinity
250 #define __NR_sched_setaffinity 222
251 #elif __NR_sched_setaffinity != 222
252 #error Wrong code for setaffinity system call.
254 #ifndef __NR_sched_getaffinity
255 #define __NR_sched_getaffinity 223
256 #elif __NR_sched_getaffinity != 223
257 #error Wrong code for getaffinity system call.
260 #ifndef __NR_sched_setaffinity
261 #define __NR_sched_setaffinity 4239
262 #elif __NR_sched_setaffinity != 4239
263 #error Wrong code for setaffinity system call.
265 #ifndef __NR_sched_getaffinity
266 #define __NR_sched_getaffinity 4240
267 #elif __NR_sched_getaffinity != 4240
268 #error Wrong code for getaffinity system call.
270 #elif KMP_ARCH_MIPS64
271 #ifndef __NR_sched_setaffinity
272 #define __NR_sched_setaffinity 5195
273 #elif __NR_sched_setaffinity != 5195
274 #error Wrong code for setaffinity system call.
276 #ifndef __NR_sched_getaffinity
277 #define __NR_sched_getaffinity 5196
278 #elif __NR_sched_getaffinity != 5196
279 #error Wrong code for getaffinity system call.
281 #elif KMP_ARCH_LOONGARCH64
282 #ifndef __NR_sched_setaffinity
283 #define __NR_sched_setaffinity 122
284 #elif __NR_sched_setaffinity != 122
285 #error Wrong code for setaffinity system call.
287 #ifndef __NR_sched_getaffinity
288 #define __NR_sched_getaffinity 123
289 #elif __NR_sched_getaffinity != 123
290 #error Wrong code for getaffinity system call.
292 #elif KMP_ARCH_RISCV64
293 #ifndef __NR_sched_setaffinity
294 #define __NR_sched_setaffinity 122
295 #elif __NR_sched_setaffinity != 122
296 #error Wrong code for setaffinity system call.
298 #ifndef __NR_sched_getaffinity
299 #define __NR_sched_getaffinity 123
300 #elif __NR_sched_getaffinity != 123
301 #error Wrong code for getaffinity system call.
304 #ifndef __NR_sched_setaffinity
305 #define __NR_sched_setaffinity 203
306 #elif __NR_sched_setaffinity != 203
307 #error Wrong code for setaffinity system call.
309 #ifndef __NR_sched_getaffinity
310 #define __NR_sched_getaffinity 204
311 #elif __NR_sched_getaffinity != 204
312 #error Wrong code for getaffinity system call.
315 #ifndef __NR_sched_setaffinity
316 #define __NR_sched_setaffinity 239
317 #elif __NR_sched_setaffinity != 239
318 #error Wrong code for setaffinity system call.
320 #ifndef __NR_sched_getaffinity
321 #define __NR_sched_getaffinity 240
322 #elif __NR_sched_getaffinity != 240
323 #error Wrong code for getaffinity system call.
326 #ifndef __NR_sched_setaffinity
327 #define __NR_sched_setaffinity 261
328 #elif __NR_sched_setaffinity != 261
329 #error Wrong code for setaffinity system call.
331 #ifndef __NR_sched_getaffinity
332 #define __NR_sched_getaffinity 260
333 #elif __NR_sched_getaffinity != 260
334 #error Wrong code for getaffinity system call.
337 #error Unknown or unsupported architecture
339 #elif KMP_OS_FREEBSD || KMP_OS_DRAGONFLY
341 #include <pthread_np.h>
347 #include <sys/rset.h>
348 #define VMI_MAXRADS 64
349 #define GET_NUMBER_SMT_SETS 0x0004
350 extern "C" int syssmt(
int flags,
int,
int,
int *);
352 class KMPNativeAffinity :
public KMPAffinity {
353 class Mask :
public KMPAffinity::Mask {
354 typedef unsigned long mask_t;
355 typedef decltype(__kmp_affin_mask_size) mask_size_type;
356 static const unsigned int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
357 static const mask_t ONE = 1;
358 mask_size_type get_num_mask_types()
const {
359 return __kmp_affin_mask_size /
sizeof(mask_t);
364 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
369 void set(
int i)
override {
370 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
372 bool is_set(
int i)
const override {
373 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
375 void clear(
int i)
override {
376 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
378 void zero()
override {
379 mask_size_type e = get_num_mask_types();
380 for (mask_size_type i = 0; i < e; ++i)
383 bool empty()
const override {
384 mask_size_type e = get_num_mask_types();
385 for (mask_size_type i = 0; i < e; ++i)
386 if (mask[i] != (mask_t)0)
390 void copy(
const KMPAffinity::Mask *src)
override {
391 const Mask *convert =
static_cast<const Mask *
>(src);
392 mask_size_type e = get_num_mask_types();
393 for (mask_size_type i = 0; i < e; ++i)
394 mask[i] = convert->mask[i];
396 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
397 const Mask *convert =
static_cast<const Mask *
>(rhs);
398 mask_size_type e = get_num_mask_types();
399 for (mask_size_type i = 0; i < e; ++i)
400 mask[i] &= convert->mask[i];
402 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
403 const Mask *convert =
static_cast<const Mask *
>(rhs);
404 mask_size_type e = get_num_mask_types();
405 for (mask_size_type i = 0; i < e; ++i)
406 mask[i] |= convert->mask[i];
408 void bitwise_not()
override {
409 mask_size_type e = get_num_mask_types();
410 for (mask_size_type i = 0; i < e; ++i)
411 mask[i] = ~(mask[i]);
413 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
414 const Mask *convert =
static_cast<const Mask *
>(rhs);
415 mask_size_type e = get_num_mask_types();
416 for (mask_size_type i = 0; i < e; ++i)
417 if (mask[i] != convert->mask[i])
421 int begin()
const override {
423 while (retval < end() && !is_set(retval))
427 int end()
const override {
429 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
432 int next(
int previous)
const override {
433 int retval = previous + 1;
434 while (retval < end() && !is_set(retval))
441 int get_system_affinity(
bool abort_on_error)
override {
442 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
443 "Illegal get affinity operation when not capable");
445 (void)abort_on_error;
448 for (
int i = 0; i < __kmp_xproc; ++i)
449 KMP_CPU_SET(i,
this);
452 int set_system_affinity(
bool abort_on_error)
const override {
453 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
455 "Illegal set affinity operation when not capable");
458 int gtid = __kmp_entry_gtid();
459 int tid = thread_self();
463 int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY);
467 KMP_CPU_SET_ITERATE(location,
this) {
468 if (KMP_CPU_ISSET(location,
this)) {
469 retval = bindprocessor(BINDTHREAD, tid, location);
470 if (retval == -1 && errno == 1) {
475 rsh = rs_alloc(RS_EMPTY);
476 rsid.at_pid = getpid();
477 if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) {
478 retval = ra_detachrset(R_PROCESS, rsid, 0);
479 retval = bindprocessor(BINDTHREAD, tid, location);
483 KA_TRACE(10, (
"__kmp_set_system_affinity: Done binding "
489 if (abort_on_error) {
490 __kmp_fatal(KMP_MSG(FunctionError,
"bindprocessor()"),
491 KMP_ERR(error), __kmp_msg_null);
492 KA_TRACE(10, (
"__kmp_set_system_affinity: Error binding "
493 "T#%d to cpu=%d, errno=%d.\n",
494 gtid, location, error));
502 int get_system_affinity(
bool abort_on_error)
override {
503 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
504 "Illegal get affinity operation when not capable");
507 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
508 #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
509 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
510 reinterpret_cast<cpuset_t *
>(mask));
511 int retval = (r == 0 ? 0 : -1);
517 if (abort_on_error) {
518 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_getaffinity_np()"),
519 KMP_ERR(error), __kmp_msg_null);
523 int set_system_affinity(
bool abort_on_error)
const override {
524 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
525 "Illegal set affinity operation when not capable");
528 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
529 #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
530 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
531 reinterpret_cast<cpuset_t *
>(mask));
532 int retval = (r == 0 ? 0 : -1);
538 if (abort_on_error) {
539 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_setaffinity_np()"),
540 KMP_ERR(error), __kmp_msg_null);
546 void determine_capable(
const char *env_var)
override {
547 __kmp_affinity_determine_capable(env_var);
549 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
550 KMPAffinity::Mask *allocate_mask()
override {
551 KMPNativeAffinity::Mask *retval =
new Mask();
554 void deallocate_mask(KMPAffinity::Mask *m)
override {
555 KMPNativeAffinity::Mask *native_mask =
556 static_cast<KMPNativeAffinity::Mask *
>(m);
559 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
560 return new Mask[num];
562 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
563 Mask *linux_array =
static_cast<Mask *
>(array);
564 delete[] linux_array;
566 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
567 int index)
override {
568 Mask *linux_array =
static_cast<Mask *
>(array);
569 return &(linux_array[index]);
571 api_type get_api_type()
const override {
return NATIVE_OS; }
577 class KMPNativeAffinity :
public KMPAffinity {
578 class Mask :
public KMPAffinity::Mask {
579 typedef ULONG_PTR mask_t;
580 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
585 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
591 void set(
int i)
override {
592 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
594 bool is_set(
int i)
const override {
595 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
597 void clear(
int i)
override {
598 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
600 void zero()
override {
601 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
604 bool empty()
const override {
605 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
610 void copy(
const KMPAffinity::Mask *src)
override {
611 const Mask *convert =
static_cast<const Mask *
>(src);
612 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
613 mask[i] = convert->mask[i];
615 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
616 const Mask *convert =
static_cast<const Mask *
>(rhs);
617 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
618 mask[i] &= convert->mask[i];
620 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
621 const Mask *convert =
static_cast<const Mask *
>(rhs);
622 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
623 mask[i] |= convert->mask[i];
625 void bitwise_not()
override {
626 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
627 mask[i] = ~(mask[i]);
629 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
630 const Mask *convert =
static_cast<const Mask *
>(rhs);
631 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
632 if (mask[i] != convert->mask[i])
636 int begin()
const override {
638 while (retval < end() && !is_set(retval))
642 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
643 int next(
int previous)
const override {
644 int retval = previous + 1;
645 while (retval < end() && !is_set(retval))
649 int set_process_affinity(
bool abort_on_error)
const override {
650 if (__kmp_num_proc_groups <= 1) {
651 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
652 DWORD error = GetLastError();
653 if (abort_on_error) {
654 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
662 int set_system_affinity(
bool abort_on_error)
const override {
663 if (__kmp_num_proc_groups > 1) {
666 int group = get_proc_group();
668 if (abort_on_error) {
669 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
676 ga.Mask = mask[group];
677 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
679 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
680 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
681 DWORD error = GetLastError();
682 if (abort_on_error) {
683 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
689 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
690 DWORD error = GetLastError();
691 if (abort_on_error) {
692 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
700 int get_system_affinity(
bool abort_on_error)
override {
701 if (__kmp_num_proc_groups > 1) {
704 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
705 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
706 DWORD error = GetLastError();
707 if (abort_on_error) {
708 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
709 KMP_ERR(error), __kmp_msg_null);
713 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
717 mask[ga.Group] = ga.Mask;
719 mask_t newMask, sysMask, retval;
720 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
721 DWORD error = GetLastError();
722 if (abort_on_error) {
723 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
724 KMP_ERR(error), __kmp_msg_null);
728 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
730 DWORD error = GetLastError();
731 if (abort_on_error) {
732 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
733 KMP_ERR(error), __kmp_msg_null);
737 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
739 DWORD error = GetLastError();
740 if (abort_on_error) {
741 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
742 KMP_ERR(error), __kmp_msg_null);
749 int get_proc_group()
const override {
751 if (__kmp_num_proc_groups == 1) {
754 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
764 void determine_capable(
const char *env_var)
override {
765 __kmp_affinity_determine_capable(env_var);
767 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
768 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
769 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
770 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
771 return new Mask[num];
773 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
774 Mask *windows_array =
static_cast<Mask *
>(array);
775 delete[] windows_array;
777 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
778 int index)
override {
779 Mask *windows_array =
static_cast<Mask *
>(array);
780 return &(windows_array[index]);
782 api_type get_api_type()
const override {
return NATIVE_OS; }
788 struct kmp_hw_attr_t {
792 unsigned reserved : 15;
794 static const int UNKNOWN_CORE_EFF = -1;
797 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
798 valid(0), reserved(0) {}
799 void set_core_type(kmp_hw_core_type_t type) {
803 void set_core_eff(
int eff) {
807 kmp_hw_core_type_t get_core_type()
const {
808 return (kmp_hw_core_type_t)core_type;
810 int get_core_eff()
const {
return core_eff; }
811 bool is_core_type_valid()
const {
812 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
814 bool is_core_eff_valid()
const {
return core_eff != UNKNOWN_CORE_EFF; }
815 operator bool()
const {
return valid; }
817 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
818 core_eff = UNKNOWN_CORE_EFF;
821 bool contains(
const kmp_hw_attr_t &other)
const {
822 if (!valid && !other.valid)
824 if (valid && other.valid) {
825 if (other.is_core_type_valid()) {
826 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
829 if (other.is_core_eff_valid()) {
830 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
837 #if KMP_AFFINITY_SUPPORTED
838 bool contains(
const kmp_affinity_attrs_t &attr)
const {
839 if (!valid && !attr.valid)
841 if (valid && attr.valid) {
842 if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
843 return (is_core_type_valid() &&
844 (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
845 if (attr.core_eff != UNKNOWN_CORE_EFF)
846 return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
852 bool operator==(
const kmp_hw_attr_t &rhs)
const {
853 return (rhs.valid == valid && rhs.core_eff == core_eff &&
854 rhs.core_type == core_type);
856 bool operator!=(
const kmp_hw_attr_t &rhs)
const {
return !operator==(rhs); }
859 #if KMP_AFFINITY_SUPPORTED
860 KMP_BUILD_ASSERT(
sizeof(kmp_hw_attr_t) ==
sizeof(kmp_affinity_attrs_t));
863 class kmp_hw_thread_t {
865 static const int UNKNOWN_ID = -1;
866 static const int MULTIPLE_ID = -2;
867 static int compare_ids(
const void *a,
const void *b);
868 static int compare_compact(
const void *a,
const void *b);
869 int ids[KMP_HW_LAST];
870 int sub_ids[KMP_HW_LAST];
878 for (
int i = 0; i < (int)KMP_HW_LAST; ++i)
885 class kmp_topology_t {
911 int num_core_efficiencies;
913 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
919 kmp_hw_thread_t *hw_threads;
925 kmp_hw_t equivalent[KMP_HW_LAST];
933 #if KMP_GROUP_AFFINITY
935 void _insert_windows_proc_groups();
941 void _gather_enumeration_information();
945 void _remove_radix1_layers();
948 void _discover_uniformity();
959 void _set_last_level_cache();
964 int _get_ncores_with_attr(
const kmp_hw_attr_t &attr,
int above,
965 bool find_all =
false)
const;
969 kmp_topology_t() =
delete;
970 kmp_topology_t(
const kmp_topology_t &t) =
delete;
971 kmp_topology_t(kmp_topology_t &&t) =
delete;
972 kmp_topology_t &operator=(
const kmp_topology_t &t) =
delete;
973 kmp_topology_t &operator=(kmp_topology_t &&t) =
delete;
975 static kmp_topology_t *allocate(
int nproc,
int ndepth,
const kmp_hw_t *types);
976 static void deallocate(kmp_topology_t *);
979 kmp_hw_thread_t &at(
int index) {
980 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
981 return hw_threads[index];
983 const kmp_hw_thread_t &at(
int index)
const {
984 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
985 return hw_threads[index];
987 int get_num_hw_threads()
const {
return num_hw_threads; }
989 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
990 kmp_hw_thread_t::compare_ids);
994 void insert_layer(kmp_hw_t type,
const int *ids);
998 bool check_ids()
const;
1001 void canonicalize();
1002 void canonicalize(
int pkgs,
int cores_per_pkg,
int thr_per_core,
int cores);
1006 #if KMP_AFFINITY_SUPPORTED
1008 void set_granularity(kmp_affinity_t &stgs)
const;
1009 bool is_close(
int hwt1,
int hwt2,
const kmp_affinity_t &stgs)
const;
1010 bool restrict_to_mask(
const kmp_affin_mask_t *mask);
1011 bool filter_hw_subset();
1013 bool is_uniform()
const {
return flags.uniform; }
1016 kmp_hw_t get_equivalent_type(kmp_hw_t type)
const {
1017 if (type == KMP_HW_UNKNOWN)
1018 return KMP_HW_UNKNOWN;
1019 return equivalent[type];
1022 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
1023 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
1024 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
1025 kmp_hw_t real_type2 = equivalent[type2];
1026 if (real_type2 == KMP_HW_UNKNOWN)
1028 equivalent[type1] = real_type2;
1031 KMP_FOREACH_HW_TYPE(type) {
1032 if (equivalent[type] == type1) {
1033 equivalent[type] = real_type2;
1039 int calculate_ratio(
int level1,
int level2)
const {
1040 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
1041 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
1043 for (
int level = level1; level > level2; --level)
1047 int get_ratio(
int level)
const {
1048 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1049 return ratio[level];
1051 int get_depth()
const {
return depth; };
1052 kmp_hw_t get_type(
int level)
const {
1053 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1054 return types[level];
1056 int get_level(kmp_hw_t type)
const {
1057 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
1058 int eq_type = equivalent[type];
1059 if (eq_type == KMP_HW_UNKNOWN)
1061 for (
int i = 0; i < depth; ++i)
1062 if (types[i] == eq_type)
1066 int get_count(
int level)
const {
1067 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1068 return count[level];
1071 int get_ncores_with_attr(
const kmp_hw_attr_t &attr)
const {
1072 return _get_ncores_with_attr(attr, -1,
true);
1076 int get_ncores_with_attr_per(
const kmp_hw_attr_t &attr,
int above)
const {
1077 return _get_ncores_with_attr(attr, above,
false);
1080 #if KMP_AFFINITY_SUPPORTED
1081 friend int kmp_hw_thread_t::compare_compact(
const void *a,
const void *b);
1082 void sort_compact(kmp_affinity_t &affinity) {
1083 compact = affinity.compact;
1084 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
1085 kmp_hw_thread_t::compare_compact);
1088 void print(
const char *env_var =
"KMP_AFFINITY")
const;
1091 extern kmp_topology_t *__kmp_topology;
1093 class kmp_hw_subset_t {
1094 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
1102 int offset[MAX_ATTRS];
1103 kmp_hw_attr_t attr[MAX_ATTRS];
1106 const static int USE_ALL = (std::numeric_limits<int>::max)();
1115 KMP_BUILD_ASSERT(
sizeof(set) * 8 >= KMP_HW_LAST);
1118 static int hw_subset_compare(
const void *i1,
const void *i2) {
1119 kmp_hw_t type1 = ((
const item_t *)i1)->type;
1120 kmp_hw_t type2 = ((
const item_t *)i2)->type;
1121 int level1 = __kmp_topology->get_level(type1);
1122 int level2 = __kmp_topology->get_level(type2);
1123 return level1 - level2;
1128 kmp_hw_subset_t() =
delete;
1129 kmp_hw_subset_t(
const kmp_hw_subset_t &t) =
delete;
1130 kmp_hw_subset_t(kmp_hw_subset_t &&t) =
delete;
1131 kmp_hw_subset_t &operator=(
const kmp_hw_subset_t &t) =
delete;
1132 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) =
delete;
1134 static kmp_hw_subset_t *allocate() {
1135 int initial_capacity = 5;
1136 kmp_hw_subset_t *retval =
1137 (kmp_hw_subset_t *)__kmp_allocate(
sizeof(kmp_hw_subset_t));
1139 retval->capacity = initial_capacity;
1141 retval->absolute =
false;
1142 retval->items = (item_t *)__kmp_allocate(
sizeof(item_t) * initial_capacity);
1145 static void deallocate(kmp_hw_subset_t *subset) {
1146 __kmp_free(subset->items);
1149 void set_absolute() { absolute =
true; }
1150 bool is_absolute()
const {
return absolute; }
1151 void push_back(
int num, kmp_hw_t type,
int offset, kmp_hw_attr_t attr) {
1152 for (
int i = 0; i < depth; ++i) {
1155 if (items[i].type == type) {
1156 int idx = items[i].num_attrs++;
1157 if ((
size_t)idx >= MAX_ATTRS)
1159 items[i].num[idx] = num;
1160 items[i].offset[idx] = offset;
1161 items[i].attr[idx] = attr;
1165 if (depth == capacity - 1) {
1167 item_t *new_items = (item_t *)__kmp_allocate(
sizeof(item_t) * capacity);
1168 for (
int i = 0; i < depth; ++i)
1169 new_items[i] = items[i];
1173 items[depth].num_attrs = 1;
1174 items[depth].type = type;
1175 items[depth].num[0] = num;
1176 items[depth].offset[0] = offset;
1177 items[depth].attr[0] = attr;
1179 set |= (1ull << type);
1181 int get_depth()
const {
return depth; }
1182 const item_t &at(
int index)
const {
1183 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1184 return items[index];
1186 item_t &at(
int index) {
1187 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1188 return items[index];
1190 void remove(
int index) {
1191 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1192 set &= ~(1ull << items[index].type);
1193 for (
int j = index + 1; j < depth; ++j) {
1194 items[j - 1] = items[j];
1199 KMP_DEBUG_ASSERT(__kmp_topology);
1200 qsort(items, depth,
sizeof(item_t), hw_subset_compare);
1202 bool specified(kmp_hw_t type)
const {
return ((set & (1ull << type)) > 0); }
1216 void canonicalize(
const kmp_topology_t *top) {
1218 kmp_hw_t targeted[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1226 for (kmp_hw_t type : targeted)
1227 if (top->get_level(type) == KMP_HW_UNKNOWN)
1231 for (kmp_hw_t type : targeted) {
1233 for (
int i = 0; i < get_depth(); ++i) {
1234 if (top->get_equivalent_type(items[i].type) == type) {
1240 push_back(USE_ALL, type, 0, kmp_hw_attr_t{});
1248 printf(
"**********************\n");
1249 printf(
"*** kmp_hw_subset: ***\n");
1250 printf(
"* depth: %d\n", depth);
1251 printf(
"* items:\n");
1252 for (
int i = 0; i < depth; ++i) {
1253 printf(
" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1254 for (
int j = 0; j < items[i].num_attrs; ++j) {
1255 printf(
" num: %d, offset: %d, attr: ", items[i].num[j],
1256 items[i].offset[j]);
1257 if (!items[i].attr[j]) {
1258 printf(
" (none)\n");
1261 " core_type = %s, core_eff = %d\n",
1262 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1263 items[i].attr[j].get_core_eff());
1267 printf(
"* set: 0x%llx\n", set);
1268 printf(
"* absolute: %d\n", absolute);
1269 printf(
"**********************\n");
1272 extern kmp_hw_subset_t *__kmp_hw_subset;
1280 class hierarchy_info {
1284 static const kmp_uint32 maxLeaves = 4;
1285 static const kmp_uint32 minBranch = 4;
1291 kmp_uint32 maxLevels;
1298 kmp_uint32 base_num_threads = 0;
1299 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1300 volatile kmp_int8 uninitialized;
1302 volatile kmp_int8 resizing;
1308 kmp_uint32 *numPerLevel =
nullptr;
1309 kmp_uint32 *skipPerLevel =
nullptr;
1311 void deriveLevels() {
1312 int hier_depth = __kmp_topology->get_depth();
1313 for (
int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1314 numPerLevel[level] = __kmp_topology->get_ratio(i);
1319 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1322 if (!uninitialized && numPerLevel) {
1323 __kmp_free(numPerLevel);
1325 uninitialized = not_initialized;
1329 void init(
int num_addrs) {
1330 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1331 &uninitialized, not_initialized, initializing);
1332 if (bool_result == 0) {
1333 while (TCR_1(uninitialized) != initialized)
1337 KMP_DEBUG_ASSERT(bool_result == 1);
1347 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1348 skipPerLevel = &(numPerLevel[maxLevels]);
1349 for (kmp_uint32 i = 0; i < maxLevels;
1352 skipPerLevel[i] = 1;
1356 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1359 numPerLevel[0] = maxLeaves;
1360 numPerLevel[1] = num_addrs / maxLeaves;
1361 if (num_addrs % maxLeaves)
1365 base_num_threads = num_addrs;
1366 for (
int i = maxLevels - 1; i >= 0;
1368 if (numPerLevel[i] != 1 || depth > 1)
1371 kmp_uint32 branch = minBranch;
1372 if (numPerLevel[0] == 1)
1373 branch = num_addrs / maxLeaves;
1374 if (branch < minBranch)
1376 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
1377 while (numPerLevel[d] > branch ||
1378 (d == 0 && numPerLevel[d] > maxLeaves)) {
1379 if (numPerLevel[d] & 1)
1381 numPerLevel[d] = numPerLevel[d] >> 1;
1382 if (numPerLevel[d + 1] == 1)
1384 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1386 if (numPerLevel[0] == 1) {
1387 branch = branch >> 1;
1393 for (kmp_uint32 i = 1; i < depth; ++i)
1394 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1396 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1397 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1399 uninitialized = initialized;
1403 void resize(kmp_uint32 nproc) {
1404 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1405 while (bool_result == 0) {
1407 if (nproc <= base_num_threads)
1410 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1412 KMP_DEBUG_ASSERT(bool_result != 0);
1413 if (nproc <= base_num_threads)
1417 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1418 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1420 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1421 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1422 numPerLevel[i - 1] *= 2;
1426 if (nproc > old_sz) {
1427 while (nproc > old_sz) {
1435 kmp_uint32 *old_numPerLevel = numPerLevel;
1436 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1437 numPerLevel = skipPerLevel = NULL;
1439 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1440 skipPerLevel = &(numPerLevel[maxLevels]);
1443 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1445 numPerLevel[i] = old_numPerLevel[i];
1446 skipPerLevel[i] = old_skipPerLevel[i];
1450 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1453 skipPerLevel[i] = 1;
1457 __kmp_free(old_numPerLevel);
1461 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1462 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1464 base_num_threads = nproc;