14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 static hierarchy_info machine_hierarchy;
26 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
28 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
32 if (TCR_1(machine_hierarchy.uninitialized))
33 machine_hierarchy.init(NULL, nproc);
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
39 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
42 thr_bar->depth = depth;
43 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
44 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
47 #if KMP_AFFINITY_SUPPORTED
49 bool KMPAffinity::picked_api =
false;
51 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
52 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
53 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
54 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
55 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
56 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
58 void KMPAffinity::pick_api() {
59 KMPAffinity *affinity_dispatch;
65 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
66 __kmp_affinity_type != affinity_disabled) {
67 affinity_dispatch =
new KMPHwlocAffinity();
71 affinity_dispatch =
new KMPNativeAffinity();
73 __kmp_affinity_dispatch = affinity_dispatch;
77 void KMPAffinity::destroy_api() {
78 if (__kmp_affinity_dispatch != NULL) {
79 delete __kmp_affinity_dispatch;
80 __kmp_affinity_dispatch = NULL;
85 #define KMP_ADVANCE_SCAN(scan) \
86 while (*scan != '\0') { \
94 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
95 kmp_affin_mask_t *mask) {
96 int start = 0, finish = 0, previous = 0;
99 KMP_ASSERT(buf_len >= 40);
102 char *end = buf + buf_len - 1;
105 if (mask->begin() == mask->end()) {
106 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
107 KMP_ADVANCE_SCAN(scan);
108 KMP_ASSERT(scan <= end);
113 start = mask->begin();
117 for (finish = mask->next(start), previous = start;
118 finish == previous + 1 && finish != mask->end();
119 finish = mask->next(finish)) {
126 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
127 KMP_ADVANCE_SCAN(scan);
132 if (previous - start > 1) {
133 KMP_SNPRINTF(scan, end - scan + 1,
"%d-%d", static_cast<int>(start),
134 static_cast<int>(previous));
137 KMP_SNPRINTF(scan, end - scan + 1,
"%d", static_cast<int>(start));
138 KMP_ADVANCE_SCAN(scan);
139 if (previous - start > 0) {
140 KMP_SNPRINTF(scan, end - scan + 1,
",%d", static_cast<int>(previous));
143 KMP_ADVANCE_SCAN(scan);
146 if (start == mask->end())
154 KMP_ASSERT(scan <= end);
157 #undef KMP_ADVANCE_SCAN
163 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
164 kmp_affin_mask_t *mask) {
165 int start = 0, finish = 0, previous = 0;
170 __kmp_str_buf_clear(buf);
173 if (mask->begin() == mask->end()) {
174 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
179 start = mask->begin();
183 for (finish = mask->next(start), previous = start;
184 finish == previous + 1 && finish != mask->end();
185 finish = mask->next(finish)) {
192 __kmp_str_buf_print(buf,
"%s",
",");
197 if (previous - start > 1) {
198 __kmp_str_buf_print(buf,
"%d-%d", static_cast<int>(start),
199 static_cast<int>(previous));
202 __kmp_str_buf_print(buf,
"%d", static_cast<int>(start));
203 if (previous - start > 0) {
204 __kmp_str_buf_print(buf,
",%d", static_cast<int>(previous));
209 if (start == mask->end())
215 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
218 #if KMP_GROUP_AFFINITY
220 if (__kmp_num_proc_groups > 1) {
222 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
223 for (group = 0; group < __kmp_num_proc_groups; group++) {
225 int num = __kmp_GetActiveProcessorCount(group);
226 for (i = 0; i < num; i++) {
227 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
236 for (proc = 0; proc < __kmp_xproc; proc++) {
237 KMP_CPU_SET(proc, mask);
253 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
255 KMP_DEBUG_ASSERT(numAddrs > 0);
256 int depth = address2os->first.depth;
257 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
258 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
260 for (labCt = 0; labCt < depth; labCt++) {
261 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
262 lastLabel[labCt] = address2os[0].first.labels[labCt];
265 for (i = 1; i < numAddrs; i++) {
266 for (labCt = 0; labCt < depth; labCt++) {
267 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
269 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
271 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
274 lastLabel[labCt] = address2os[i].first.labels[labCt];
278 for (labCt = 0; labCt < depth; labCt++) {
279 address2os[i].first.childNums[labCt] = counts[labCt];
281 for (; labCt < (int)Address::maxDepth; labCt++) {
282 address2os[i].first.childNums[labCt] = 0;
285 __kmp_free(lastLabel);
300 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
302 static int nCoresPerPkg, nPackages;
303 static int __kmp_nThreadsPerCore;
304 #ifndef KMP_DFLT_NTH_CORES
305 static int __kmp_ncores;
307 static int *__kmp_pu_os_idx = NULL;
313 inline static bool __kmp_affinity_uniform_topology() {
314 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
319 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
320 int depth,
int pkgLevel,
321 int coreLevel,
int threadLevel) {
324 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
325 for (proc = 0; proc < len; proc++) {
328 __kmp_str_buf_init(&buf);
329 for (level = 0; level < depth; level++) {
330 if (level == threadLevel) {
331 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
332 }
else if (level == coreLevel) {
333 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
334 }
else if (level == pkgLevel) {
335 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
336 }
else if (level > pkgLevel) {
337 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
338 level - pkgLevel - 1);
340 __kmp_str_buf_print(&buf,
"L%d ", level);
342 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
344 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
346 __kmp_str_buf_free(&buf);
352 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
353 int depth,
int *levels) {
356 __kmp_str_buf_init(&buf);
357 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
358 for (proc = 0; proc < len; proc++) {
359 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
360 addrP[proc].first.labels[0]);
364 if (__kmp_numa_detected)
366 if (levels[level++] > 0)
367 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
368 addrP[proc].first.labels[label++]);
369 if (__kmp_tile_depth > 0)
371 if (levels[level++] > 0)
372 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
373 addrP[proc].first.labels[label++]);
374 if (levels[level++] > 0)
376 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
377 addrP[proc].first.labels[label++]);
378 if (levels[level++] > 0)
380 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
381 addrP[proc].first.labels[label++]);
382 KMP_DEBUG_ASSERT(label == depth);
384 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
385 __kmp_str_buf_clear(&buf);
387 __kmp_str_buf_free(&buf);
390 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
397 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
398 int depth,
int *levels) {
402 int new_depth = depth;
403 for (level = depth - 1; level > 0; --level) {
406 for (i = 1; i < nTh; ++i) {
407 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
413 if (!radix1_detected)
418 if (level == new_depth) {
421 for (i = 0; i < nTh; ++i) {
422 addrP[i].first.depth--;
427 for (j = level; j < new_depth; ++j) {
428 for (i = 0; i < nTh; ++i) {
429 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
430 addrP[i].first.depth--;
443 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
444 hwloc_obj_type_t type) {
447 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
448 obj->logical_index, type, 0);
450 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
452 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
459 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
460 hwloc_obj_t o,
unsigned depth,
462 if (o->depth == depth) {
468 for (
unsigned i = 0; i < o->arity; i++)
469 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
473 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
474 hwloc_obj_type_t type,
476 if (!hwloc_compare_types(o->type, type)) {
482 for (
unsigned i = 0; i < o->arity; i++)
483 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
487 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
489 int &num_active_cores,
490 hwloc_obj_t obj,
int depth,
492 hwloc_obj_t core = NULL;
493 hwloc_topology_t &tp = __kmp_hwloc_topology;
494 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
495 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
496 hwloc_obj_t pu = NULL;
497 KMP_DEBUG_ASSERT(core != NULL);
498 int num_active_threads = 0;
499 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
501 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
502 KMP_DEBUG_ASSERT(pu != NULL);
503 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
505 Address addr(depth + 2);
506 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
507 obj->os_index, obj->logical_index, core->os_index,
508 core->logical_index, pu->os_index, pu->logical_index));
509 for (
int i = 0; i < depth; ++i)
510 addr.labels[i] = labels[i];
511 addr.labels[depth] = core_id;
512 addr.labels[depth + 1] = pu_id;
513 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
514 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
516 ++num_active_threads;
518 if (num_active_threads) {
521 if (num_active_threads > __kmp_nThreadsPerCore)
522 __kmp_nThreadsPerCore = num_active_threads;
530 static int __kmp_hwloc_check_numa() {
531 hwloc_topology_t &tp = __kmp_hwloc_topology;
532 hwloc_obj_t hT, hC, hL, hN, hS;
536 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
541 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
542 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
543 KMP_DEBUG_ASSERT(hS != NULL);
544 if (hN != NULL && hN->depth > hS->depth) {
545 __kmp_numa_detected = TRUE;
546 if (__kmp_affinity_gran == affinity_gran_node) {
547 __kmp_affinity_gran == affinity_gran_numa;
552 depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
553 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
556 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
557 __kmp_tile_depth = depth;
561 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
562 kmp_i18n_id_t *
const msg_id) {
563 hwloc_topology_t &tp = __kmp_hwloc_topology;
565 *msg_id = kmp_i18n_null;
568 kmp_affin_mask_t *oldMask;
569 KMP_CPU_ALLOC(oldMask);
570 __kmp_get_system_affinity(oldMask, TRUE);
571 __kmp_hwloc_check_numa();
573 if (!KMP_AFFINITY_CAPABLE()) {
576 KMP_ASSERT(__kmp_affinity_type == affinity_none);
578 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
579 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
580 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
581 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
582 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
583 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
584 if (__kmp_affinity_verbose) {
585 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
586 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
587 if (__kmp_affinity_uniform_topology()) {
588 KMP_INFORM(Uniform,
"KMP_AFFINITY");
590 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
592 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
593 __kmp_nThreadsPerCore, __kmp_ncores);
595 KMP_CPU_FREE(oldMask);
600 int levels[5] = {0, 1, 2, 3, 4};
602 if (__kmp_numa_detected)
604 if (__kmp_tile_depth)
608 AddrUnsPair *retval =
609 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
610 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
611 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
618 hwloc_obj_t socket, node, tile;
619 int nActiveThreads = 0;
622 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
623 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
624 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
625 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
627 labels[0] = socket_id;
628 if (__kmp_numa_detected) {
630 int n_active_nodes = 0;
632 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
634 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
636 if (__kmp_tile_depth) {
639 int n_active_tiles = 0;
641 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
643 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
645 int n_active_cores = 0;
646 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
647 n_active_cores, tile, 3, labels);
648 if (n_active_cores) {
650 if (n_active_cores > nCorePerTile)
651 nCorePerTile = n_active_cores;
654 if (n_active_tiles) {
656 if (n_active_tiles > nTilePerNode)
657 nTilePerNode = n_active_tiles;
661 int n_active_cores = 0;
662 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
663 n_active_cores, node, 2, labels);
664 if (n_active_cores) {
666 if (n_active_cores > nCorePerNode)
667 nCorePerNode = n_active_cores;
671 if (n_active_nodes) {
673 if (n_active_nodes > nNodePerPkg)
674 nNodePerPkg = n_active_nodes;
677 if (__kmp_tile_depth) {
680 int n_active_tiles = 0;
682 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
684 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
686 int n_active_cores = 0;
687 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
688 n_active_cores, tile, 2, labels);
689 if (n_active_cores) {
691 if (n_active_cores > nCorePerTile)
692 nCorePerTile = n_active_cores;
695 if (n_active_tiles) {
697 if (n_active_tiles > nTilePerPkg)
698 nTilePerPkg = n_active_tiles;
702 int n_active_cores = 0;
703 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
705 if (n_active_cores) {
707 if (n_active_cores > nCoresPerPkg)
708 nCoresPerPkg = n_active_cores;
715 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
716 KMP_ASSERT(nActiveThreads > 0);
717 if (nActiveThreads == 1) {
718 __kmp_ncores = nPackages = 1;
719 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
720 if (__kmp_affinity_verbose) {
721 char buf[KMP_AFFIN_MASK_PRINT_LEN];
722 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
724 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
725 if (__kmp_affinity_respect_mask) {
726 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
728 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
730 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
731 KMP_INFORM(Uniform,
"KMP_AFFINITY");
732 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
733 __kmp_nThreadsPerCore, __kmp_ncores);
736 if (__kmp_affinity_type == affinity_none) {
738 KMP_CPU_FREE(oldMask);
744 addr.labels[0] = retval[0].first.labels[0];
745 retval[0].first = addr;
747 if (__kmp_affinity_gran_levels < 0) {
748 __kmp_affinity_gran_levels = 0;
751 if (__kmp_affinity_verbose) {
752 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
755 *address2os = retval;
756 KMP_CPU_FREE(oldMask);
761 qsort(retval, nActiveThreads,
sizeof(*retval),
762 __kmp_affinity_cmp_Address_labels);
765 int nPUs = nPackages * __kmp_nThreadsPerCore;
766 if (__kmp_numa_detected) {
767 if (__kmp_tile_depth) {
768 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
770 nPUs *= (nNodePerPkg * nCorePerNode);
773 if (__kmp_tile_depth) {
774 nPUs *= (nTilePerPkg * nCorePerTile);
776 nPUs *= nCoresPerPkg;
779 unsigned uniform = (nPUs == nActiveThreads);
782 if (__kmp_affinity_verbose) {
783 char mask[KMP_AFFIN_MASK_PRINT_LEN];
784 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
785 if (__kmp_affinity_respect_mask) {
786 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
788 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
790 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
792 KMP_INFORM(Uniform,
"KMP_AFFINITY");
794 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
796 if (__kmp_numa_detected) {
797 if (__kmp_tile_depth) {
798 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
799 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
802 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
803 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
804 nPUs *= (nNodePerPkg * nCorePerNode);
807 if (__kmp_tile_depth) {
808 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
809 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
812 __kmp_str_buf_init(&buf);
813 __kmp_str_buf_print(&buf,
"%d", nPackages);
814 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
815 __kmp_nThreadsPerCore, __kmp_ncores);
816 __kmp_str_buf_free(&buf);
821 if (__kmp_affinity_type == affinity_none) {
823 KMP_CPU_FREE(oldMask);
827 int depth_full = depth;
830 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
832 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
833 if (__kmp_affinity_gran_levels < 0) {
836 __kmp_affinity_gran_levels = 0;
837 if (__kmp_affinity_gran > affinity_gran_thread) {
838 for (
int i = 1; i <= depth_full; ++i) {
839 if (__kmp_affinity_gran <= i)
841 if (levels[depth_full - i] > 0)
842 __kmp_affinity_gran_levels++;
845 if (__kmp_affinity_gran > affinity_gran_package)
846 __kmp_affinity_gran_levels++;
849 if (__kmp_affinity_verbose)
850 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
852 KMP_CPU_FREE(oldMask);
853 *address2os = retval;
856 #endif // KMP_USE_HWLOC
861 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
862 kmp_i18n_id_t *
const msg_id) {
864 *msg_id = kmp_i18n_null;
869 if (!KMP_AFFINITY_CAPABLE()) {
870 KMP_ASSERT(__kmp_affinity_type == affinity_none);
871 __kmp_ncores = nPackages = __kmp_xproc;
872 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
873 if (__kmp_affinity_verbose) {
874 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
875 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
876 KMP_INFORM(Uniform,
"KMP_AFFINITY");
877 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
878 __kmp_nThreadsPerCore, __kmp_ncores);
887 __kmp_ncores = nPackages = __kmp_avail_proc;
888 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
889 if (__kmp_affinity_verbose) {
890 char buf[KMP_AFFIN_MASK_PRINT_LEN];
891 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
892 __kmp_affin_fullMask);
894 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
895 if (__kmp_affinity_respect_mask) {
896 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
898 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
900 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
901 KMP_INFORM(Uniform,
"KMP_AFFINITY");
902 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
903 __kmp_nThreadsPerCore, __kmp_ncores);
905 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
906 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
907 if (__kmp_affinity_type == affinity_none) {
910 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
911 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
913 __kmp_pu_os_idx[avail_ct++] = i;
920 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
923 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
925 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
928 __kmp_pu_os_idx[avail_ct] = i;
931 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
933 if (__kmp_affinity_verbose) {
934 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
937 if (__kmp_affinity_gran_levels < 0) {
940 if (__kmp_affinity_gran > affinity_gran_package) {
941 __kmp_affinity_gran_levels = 1;
943 __kmp_affinity_gran_levels = 0;
949 #if KMP_GROUP_AFFINITY
955 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
956 kmp_i18n_id_t *
const msg_id) {
958 *msg_id = kmp_i18n_null;
962 if (!KMP_AFFINITY_CAPABLE()) {
969 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
970 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
971 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
974 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
976 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
979 __kmp_pu_os_idx[avail_ct] = i;
981 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
982 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
983 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
985 if (__kmp_affinity_verbose) {
986 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
991 if (__kmp_affinity_gran_levels < 0) {
992 if (__kmp_affinity_gran == affinity_gran_group) {
993 __kmp_affinity_gran_levels = 1;
994 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
995 (__kmp_affinity_gran == affinity_gran_thread)) {
996 __kmp_affinity_gran_levels = 0;
998 const char *gran_str = NULL;
999 if (__kmp_affinity_gran == affinity_gran_core) {
1001 }
else if (__kmp_affinity_gran == affinity_gran_package) {
1002 gran_str =
"package";
1003 }
else if (__kmp_affinity_gran == affinity_gran_node) {
1011 __kmp_affinity_gran_levels = 0;
1019 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1021 static int __kmp_cpuid_mask_width(
int count) {
1024 while ((1 << r) < count)
1029 class apicThreadInfo {
1033 unsigned maxCoresPerPkg;
1034 unsigned maxThreadsPerPkg;
1040 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1042 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1043 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1044 if (aa->pkgId < bb->pkgId)
1046 if (aa->pkgId > bb->pkgId)
1048 if (aa->coreId < bb->coreId)
1050 if (aa->coreId > bb->coreId)
1052 if (aa->threadId < bb->threadId)
1054 if (aa->threadId > bb->threadId)
1063 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1064 kmp_i18n_id_t *
const msg_id) {
1067 *msg_id = kmp_i18n_null;
1070 __kmp_x86_cpuid(0, 0, &buf);
1072 *msg_id = kmp_i18n_str_NoLeaf4Support;
1081 if (!KMP_AFFINITY_CAPABLE()) {
1084 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1090 __kmp_x86_cpuid(1, 0, &buf);
1091 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1092 if (maxThreadsPerPkg == 0) {
1093 maxThreadsPerPkg = 1;
1107 __kmp_x86_cpuid(0, 0, &buf);
1109 __kmp_x86_cpuid(4, 0, &buf);
1110 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1128 __kmp_ncores = __kmp_xproc;
1129 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1130 __kmp_nThreadsPerCore = 1;
1131 if (__kmp_affinity_verbose) {
1132 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1133 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1134 if (__kmp_affinity_uniform_topology()) {
1135 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1137 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1139 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1140 __kmp_nThreadsPerCore, __kmp_ncores);
1150 kmp_affin_mask_t *oldMask;
1151 KMP_CPU_ALLOC(oldMask);
1152 KMP_ASSERT(oldMask != NULL);
1153 __kmp_get_system_affinity(oldMask, TRUE);
1181 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1182 __kmp_avail_proc *
sizeof(apicThreadInfo));
1183 unsigned nApics = 0;
1184 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1186 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1189 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1191 __kmp_affinity_dispatch->bind_thread(i);
1192 threadInfo[nApics].osId = i;
1195 __kmp_x86_cpuid(1, 0, &buf);
1196 if (((buf.edx >> 9) & 1) == 0) {
1197 __kmp_set_system_affinity(oldMask, TRUE);
1198 __kmp_free(threadInfo);
1199 KMP_CPU_FREE(oldMask);
1200 *msg_id = kmp_i18n_str_ApicNotPresent;
1203 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1204 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1205 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1206 threadInfo[nApics].maxThreadsPerPkg = 1;
1215 __kmp_x86_cpuid(0, 0, &buf);
1217 __kmp_x86_cpuid(4, 0, &buf);
1218 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1220 threadInfo[nApics].maxCoresPerPkg = 1;
1224 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1225 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1227 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1228 int widthT = widthCT - widthC;
1233 __kmp_set_system_affinity(oldMask, TRUE);
1234 __kmp_free(threadInfo);
1235 KMP_CPU_FREE(oldMask);
1236 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1240 int maskC = (1 << widthC) - 1;
1241 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1243 int maskT = (1 << widthT) - 1;
1244 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1251 __kmp_set_system_affinity(oldMask, TRUE);
1260 KMP_ASSERT(nApics > 0);
1262 __kmp_ncores = nPackages = 1;
1263 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1264 if (__kmp_affinity_verbose) {
1265 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1266 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1268 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1269 if (__kmp_affinity_respect_mask) {
1270 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1272 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1274 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1275 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1276 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1277 __kmp_nThreadsPerCore, __kmp_ncores);
1280 if (__kmp_affinity_type == affinity_none) {
1281 __kmp_free(threadInfo);
1282 KMP_CPU_FREE(oldMask);
1286 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1288 addr.labels[0] = threadInfo[0].pkgId;
1289 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1291 if (__kmp_affinity_gran_levels < 0) {
1292 __kmp_affinity_gran_levels = 0;
1295 if (__kmp_affinity_verbose) {
1296 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1299 __kmp_free(threadInfo);
1300 KMP_CPU_FREE(oldMask);
1305 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1306 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1323 __kmp_nThreadsPerCore = 1;
1324 unsigned nCores = 1;
1327 unsigned lastPkgId = threadInfo[0].pkgId;
1328 unsigned coreCt = 1;
1329 unsigned lastCoreId = threadInfo[0].coreId;
1330 unsigned threadCt = 1;
1331 unsigned lastThreadId = threadInfo[0].threadId;
1334 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1335 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1337 for (i = 1; i < nApics; i++) {
1338 if (threadInfo[i].pkgId != lastPkgId) {
1341 lastPkgId = threadInfo[i].pkgId;
1342 if ((
int)coreCt > nCoresPerPkg)
1343 nCoresPerPkg = coreCt;
1345 lastCoreId = threadInfo[i].coreId;
1346 if ((
int)threadCt > __kmp_nThreadsPerCore)
1347 __kmp_nThreadsPerCore = threadCt;
1349 lastThreadId = threadInfo[i].threadId;
1353 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1354 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1358 if (threadInfo[i].coreId != lastCoreId) {
1361 lastCoreId = threadInfo[i].coreId;
1362 if ((
int)threadCt > __kmp_nThreadsPerCore)
1363 __kmp_nThreadsPerCore = threadCt;
1365 lastThreadId = threadInfo[i].threadId;
1366 }
else if (threadInfo[i].threadId != lastThreadId) {
1368 lastThreadId = threadInfo[i].threadId;
1370 __kmp_free(threadInfo);
1371 KMP_CPU_FREE(oldMask);
1372 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1378 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1379 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1380 __kmp_free(threadInfo);
1381 KMP_CPU_FREE(oldMask);
1382 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1387 if ((
int)coreCt > nCoresPerPkg)
1388 nCoresPerPkg = coreCt;
1389 if ((
int)threadCt > __kmp_nThreadsPerCore)
1390 __kmp_nThreadsPerCore = threadCt;
1396 __kmp_ncores = nCores;
1397 if (__kmp_affinity_verbose) {
1398 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1399 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1401 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1402 if (__kmp_affinity_respect_mask) {
1403 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1405 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1407 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1408 if (__kmp_affinity_uniform_topology()) {
1409 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1411 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1413 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1414 __kmp_nThreadsPerCore, __kmp_ncores);
1416 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1417 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1418 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1419 for (i = 0; i < nApics; ++i) {
1420 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1422 if (__kmp_affinity_type == affinity_none) {
1423 __kmp_free(threadInfo);
1424 KMP_CPU_FREE(oldMask);
1432 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1434 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1435 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1437 KMP_ASSERT(depth > 0);
1438 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1440 for (i = 0; i < nApics; ++i) {
1441 Address addr(depth);
1442 unsigned os = threadInfo[i].osId;
1445 if (pkgLevel >= 0) {
1446 addr.labels[d++] = threadInfo[i].pkgId;
1448 if (coreLevel >= 0) {
1449 addr.labels[d++] = threadInfo[i].coreId;
1451 if (threadLevel >= 0) {
1452 addr.labels[d++] = threadInfo[i].threadId;
1454 (*address2os)[i] = AddrUnsPair(addr, os);
1457 if (__kmp_affinity_gran_levels < 0) {
1460 __kmp_affinity_gran_levels = 0;
1461 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1462 __kmp_affinity_gran_levels++;
1464 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1465 __kmp_affinity_gran_levels++;
1467 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1468 __kmp_affinity_gran_levels++;
1472 if (__kmp_affinity_verbose) {
1473 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1474 coreLevel, threadLevel);
1477 __kmp_free(threadInfo);
1478 KMP_CPU_FREE(oldMask);
1485 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1486 kmp_i18n_id_t *
const msg_id) {
1489 *msg_id = kmp_i18n_null;
1492 __kmp_x86_cpuid(0, 0, &buf);
1494 *msg_id = kmp_i18n_str_NoLeaf11Support;
1497 __kmp_x86_cpuid(11, 0, &buf);
1499 *msg_id = kmp_i18n_str_NoLeaf11Support;
1508 int threadLevel = -1;
1511 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1513 for (level = 0;; level++) {
1524 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1527 __kmp_x86_cpuid(11, level, &buf);
1536 int kind = (buf.ecx >> 8) & 0xff;
1539 threadLevel = level;
1542 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1543 if (__kmp_nThreadsPerCore == 0) {
1544 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1547 }
else if (kind == 2) {
1551 nCoresPerPkg = buf.ebx & 0xffff;
1552 if (nCoresPerPkg == 0) {
1553 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1558 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1561 if (pkgLevel >= 0) {
1565 nPackages = buf.ebx & 0xffff;
1566 if (nPackages == 0) {
1567 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1578 if (threadLevel >= 0) {
1579 threadLevel = depth - threadLevel - 1;
1581 if (coreLevel >= 0) {
1582 coreLevel = depth - coreLevel - 1;
1584 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1585 pkgLevel = depth - pkgLevel - 1;
1592 if (!KMP_AFFINITY_CAPABLE()) {
1595 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1597 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1598 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1599 if (__kmp_affinity_verbose) {
1600 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1601 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1602 if (__kmp_affinity_uniform_topology()) {
1603 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1605 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1607 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1608 __kmp_nThreadsPerCore, __kmp_ncores);
1618 kmp_affin_mask_t *oldMask;
1619 KMP_CPU_ALLOC(oldMask);
1620 __kmp_get_system_affinity(oldMask, TRUE);
1623 AddrUnsPair *retval =
1624 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1630 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1632 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1635 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1637 __kmp_affinity_dispatch->bind_thread(proc);
1640 Address addr(depth);
1643 for (level = 0; level < depth; level++) {
1644 __kmp_x86_cpuid(11, level, &buf);
1645 unsigned apicId = buf.edx;
1647 if (level != depth - 1) {
1648 KMP_CPU_FREE(oldMask);
1649 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1652 addr.labels[depth - level - 1] = apicId >> prev_shift;
1656 int shift = buf.eax & 0x1f;
1657 int mask = (1 << shift) - 1;
1658 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1661 if (level != depth) {
1662 KMP_CPU_FREE(oldMask);
1663 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1667 retval[nApics] = AddrUnsPair(addr, proc);
1673 __kmp_set_system_affinity(oldMask, TRUE);
1676 KMP_ASSERT(nApics > 0);
1678 __kmp_ncores = nPackages = 1;
1679 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1680 if (__kmp_affinity_verbose) {
1681 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1682 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1684 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1685 if (__kmp_affinity_respect_mask) {
1686 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1688 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1690 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1691 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1692 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1693 __kmp_nThreadsPerCore, __kmp_ncores);
1696 if (__kmp_affinity_type == affinity_none) {
1698 KMP_CPU_FREE(oldMask);
1704 addr.labels[0] = retval[0].first.labels[pkgLevel];
1705 retval[0].first = addr;
1707 if (__kmp_affinity_gran_levels < 0) {
1708 __kmp_affinity_gran_levels = 0;
1711 if (__kmp_affinity_verbose) {
1712 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1715 *address2os = retval;
1716 KMP_CPU_FREE(oldMask);
1721 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1724 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1725 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1726 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1727 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1728 for (level = 0; level < depth; level++) {
1732 last[level] = retval[0].first.labels[level];
1739 for (proc = 1; (int)proc < nApics; proc++) {
1741 for (level = 0; level < depth; level++) {
1742 if (retval[proc].first.labels[level] != last[level]) {
1744 for (j = level + 1; j < depth; j++) {
1755 last[j] = retval[proc].first.labels[j];
1759 if (counts[level] > maxCt[level]) {
1760 maxCt[level] = counts[level];
1762 last[level] = retval[proc].first.labels[level];
1764 }
else if (level == depth - 1) {
1770 KMP_CPU_FREE(oldMask);
1771 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1781 if (threadLevel >= 0) {
1782 __kmp_nThreadsPerCore = maxCt[threadLevel];
1784 __kmp_nThreadsPerCore = 1;
1786 nPackages = totals[pkgLevel];
1788 if (coreLevel >= 0) {
1789 __kmp_ncores = totals[coreLevel];
1790 nCoresPerPkg = maxCt[coreLevel];
1792 __kmp_ncores = nPackages;
1797 unsigned prod = maxCt[0];
1798 for (level = 1; level < depth; level++) {
1799 prod *= maxCt[level];
1801 bool uniform = (prod == totals[level - 1]);
1804 if (__kmp_affinity_verbose) {
1805 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1806 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1808 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1809 if (__kmp_affinity_respect_mask) {
1810 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1812 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1814 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1816 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1818 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1822 __kmp_str_buf_init(&buf);
1824 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1825 for (level = 1; level <= pkgLevel; level++) {
1826 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1828 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1829 __kmp_nThreadsPerCore, __kmp_ncores);
1831 __kmp_str_buf_free(&buf);
1833 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1834 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1835 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1836 for (proc = 0; (int)proc < nApics; ++proc) {
1837 __kmp_pu_os_idx[proc] = retval[proc].second;
1839 if (__kmp_affinity_type == affinity_none) {
1845 KMP_CPU_FREE(oldMask);
1852 for (level = 0; level < depth; level++) {
1853 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1861 if (new_depth != depth) {
1862 AddrUnsPair *new_retval =
1863 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1864 for (proc = 0; (int)proc < nApics; proc++) {
1865 Address addr(new_depth);
1866 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1869 int newPkgLevel = -1;
1870 int newCoreLevel = -1;
1871 int newThreadLevel = -1;
1872 for (level = 0; level < depth; level++) {
1873 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1877 if (level == pkgLevel) {
1878 newPkgLevel = new_level;
1880 if (level == coreLevel) {
1881 newCoreLevel = new_level;
1883 if (level == threadLevel) {
1884 newThreadLevel = new_level;
1886 for (proc = 0; (int)proc < nApics; proc++) {
1887 new_retval[proc].first.labels[new_level] =
1888 retval[proc].first.labels[level];
1894 retval = new_retval;
1896 pkgLevel = newPkgLevel;
1897 coreLevel = newCoreLevel;
1898 threadLevel = newThreadLevel;
1901 if (__kmp_affinity_gran_levels < 0) {
1904 __kmp_affinity_gran_levels = 0;
1905 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1906 __kmp_affinity_gran_levels++;
1908 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1909 __kmp_affinity_gran_levels++;
1911 if (__kmp_affinity_gran > affinity_gran_package) {
1912 __kmp_affinity_gran_levels++;
1916 if (__kmp_affinity_verbose) {
1917 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1925 KMP_CPU_FREE(oldMask);
1926 *address2os = retval;
1933 #define threadIdIndex 1
1934 #define coreIdIndex 2
1935 #define pkgIdIndex 3
1936 #define nodeIdIndex 4
1938 typedef unsigned *ProcCpuInfo;
1939 static unsigned maxIndex = pkgIdIndex;
1941 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1944 const unsigned *aa = *(
unsigned *
const *)a;
1945 const unsigned *bb = *(
unsigned *
const *)b;
1946 for (i = maxIndex;; i--) {
1957 #if KMP_USE_HIER_SCHED
1959 static void __kmp_dispatch_set_hierarchy_values() {
1965 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1966 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1967 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1968 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1969 if (__kmp_mic_type >= mic3)
1970 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1972 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1973 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1974 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1975 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1976 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1979 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1980 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1981 __kmp_nThreadsPerCore;
1982 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1983 if (__kmp_mic_type >= mic3)
1984 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1985 2 * __kmp_nThreadsPerCore;
1987 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
1988 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1989 __kmp_nThreadsPerCore;
1990 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1991 nCoresPerPkg * __kmp_nThreadsPerCore;
1992 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1993 nCoresPerPkg * __kmp_nThreadsPerCore;
1994 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1995 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2000 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2001 int index = type + 1;
2002 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2003 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2004 if (type == kmp_hier_layer_e::LAYER_THREAD)
2006 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2008 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2009 if (tid >= num_hw_threads)
2010 tid = tid % num_hw_threads;
2011 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2015 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2018 KMP_DEBUG_ASSERT(i1 <= i2);
2019 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2020 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2021 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2023 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2025 #endif // KMP_USE_HIER_SCHED
2029 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2031 kmp_i18n_id_t *
const msg_id,
2034 *msg_id = kmp_i18n_null;
2039 unsigned num_records = 0;
2041 buf[
sizeof(buf) - 1] = 1;
2042 if (!fgets(buf,
sizeof(buf), f)) {
2047 char s1[] =
"processor";
2048 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2055 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2056 if (nodeIdIndex + level >= maxIndex) {
2057 maxIndex = nodeIdIndex + level;
2065 if (num_records == 0) {
2067 *msg_id = kmp_i18n_str_NoProcRecords;
2070 if (num_records > (
unsigned)__kmp_xproc) {
2072 *msg_id = kmp_i18n_str_TooManyProcRecords;
2081 if (fseek(f, 0, SEEK_SET) != 0) {
2083 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2089 unsigned **threadInfo =
2090 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2092 for (i = 0; i <= num_records; i++) {
2094 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2097 #define CLEANUP_THREAD_INFO \
2098 for (i = 0; i <= num_records; i++) { \
2099 __kmp_free(threadInfo[i]); \
2101 __kmp_free(threadInfo);
2106 #define INIT_PROC_INFO(p) \
2107 for (__index = 0; __index <= maxIndex; __index++) { \
2108 (p)[__index] = UINT_MAX; \
2111 for (i = 0; i <= num_records; i++) {
2112 INIT_PROC_INFO(threadInfo[i]);
2115 unsigned num_avail = 0;
2122 buf[
sizeof(buf) - 1] = 1;
2123 bool long_line =
false;
2124 if (!fgets(buf,
sizeof(buf), f)) {
2129 for (i = 0; i <= maxIndex; i++) {
2130 if (threadInfo[num_avail][i] != UINT_MAX) {
2138 }
else if (!buf[
sizeof(buf) - 1]) {
2143 #define CHECK_LINE \
2145 CLEANUP_THREAD_INFO; \
2146 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2152 char s1[] =
"processor";
2153 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2155 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2157 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2159 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2160 #if KMP_ARCH_AARCH64
2169 threadInfo[num_avail][osIdIndex] = val;
2170 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2174 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2175 threadInfo[num_avail][osIdIndex]);
2176 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2178 KMP_SNPRINTF(path,
sizeof(path),
2179 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2180 threadInfo[num_avail][osIdIndex]);
2181 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2185 char s2[] =
"physical id";
2186 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2188 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2190 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2192 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2194 threadInfo[num_avail][pkgIdIndex] = val;
2197 char s3[] =
"core id";
2198 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2200 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2202 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2204 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2206 threadInfo[num_avail][coreIdIndex] = val;
2208 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
2210 char s4[] =
"thread id";
2211 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2213 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2215 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2217 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2219 threadInfo[num_avail][threadIdIndex] = val;
2223 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2225 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2227 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2229 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2230 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2232 threadInfo[num_avail][nodeIdIndex + level] = val;
2239 if ((*buf != 0) && (*buf !=
'\n')) {
2244 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2252 if ((
int)num_avail == __kmp_xproc) {
2253 CLEANUP_THREAD_INFO;
2254 *msg_id = kmp_i18n_str_TooManyEntries;
2260 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2261 CLEANUP_THREAD_INFO;
2262 *msg_id = kmp_i18n_str_MissingProcField;
2265 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2266 CLEANUP_THREAD_INFO;
2267 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2272 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2273 __kmp_affin_fullMask)) {
2274 INIT_PROC_INFO(threadInfo[num_avail]);
2281 KMP_ASSERT(num_avail <= num_records);
2282 INIT_PROC_INFO(threadInfo[num_avail]);
2287 CLEANUP_THREAD_INFO;
2288 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2292 CLEANUP_THREAD_INFO;
2293 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2298 #if KMP_MIC && REDUCE_TEAM_SIZE
2299 unsigned teamSize = 0;
2300 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2311 KMP_ASSERT(num_avail > 0);
2312 KMP_ASSERT(num_avail <= num_records);
2313 if (num_avail == 1) {
2315 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2316 if (__kmp_affinity_verbose) {
2317 if (!KMP_AFFINITY_CAPABLE()) {
2318 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2319 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2320 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2322 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2323 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2324 __kmp_affin_fullMask);
2325 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2326 if (__kmp_affinity_respect_mask) {
2327 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2329 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2331 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2332 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2336 __kmp_str_buf_init(&buf);
2337 __kmp_str_buf_print(&buf,
"1");
2338 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2339 __kmp_str_buf_print(&buf,
" x 1");
2341 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2342 __kmp_str_buf_free(&buf);
2345 if (__kmp_affinity_type == affinity_none) {
2346 CLEANUP_THREAD_INFO;
2350 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2352 addr.labels[0] = threadInfo[0][pkgIdIndex];
2353 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2355 if (__kmp_affinity_gran_levels < 0) {
2356 __kmp_affinity_gran_levels = 0;
2359 if (__kmp_affinity_verbose) {
2360 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2363 CLEANUP_THREAD_INFO;
2368 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2369 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2381 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2383 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2385 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2387 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2389 bool assign_thread_ids =
false;
2390 unsigned threadIdCt;
2393 restart_radix_check:
2397 if (assign_thread_ids) {
2398 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2399 threadInfo[0][threadIdIndex] = threadIdCt++;
2400 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2401 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2404 for (index = 0; index <= maxIndex; index++) {
2408 lastId[index] = threadInfo[0][index];
2413 for (i = 1; i < num_avail; i++) {
2416 for (index = maxIndex; index >= threadIdIndex; index--) {
2417 if (assign_thread_ids && (index == threadIdIndex)) {
2419 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2420 threadInfo[i][threadIdIndex] = threadIdCt++;
2424 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2425 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2428 if (threadInfo[i][index] != lastId[index]) {
2433 for (index2 = threadIdIndex; index2 < index; index2++) {
2435 if (counts[index2] > maxCt[index2]) {
2436 maxCt[index2] = counts[index2];
2439 lastId[index2] = threadInfo[i][index2];
2443 lastId[index] = threadInfo[i][index];
2445 if (assign_thread_ids && (index > threadIdIndex)) {
2447 #if KMP_MIC && REDUCE_TEAM_SIZE
2450 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2451 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2457 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2458 threadInfo[i][threadIdIndex] = threadIdCt++;
2464 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2465 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2471 if (index < threadIdIndex) {
2475 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2480 CLEANUP_THREAD_INFO;
2481 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2487 assign_thread_ids =
true;
2488 goto restart_radix_check;
2492 #if KMP_MIC && REDUCE_TEAM_SIZE
2495 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2496 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2498 for (index = threadIdIndex; index <= maxIndex; index++) {
2499 if (counts[index] > maxCt[index]) {
2500 maxCt[index] = counts[index];
2504 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2505 nCoresPerPkg = maxCt[coreIdIndex];
2506 nPackages = totals[pkgIdIndex];
2509 unsigned prod = totals[maxIndex];
2510 for (index = threadIdIndex; index < maxIndex; index++) {
2511 prod *= maxCt[index];
2513 bool uniform = (prod == totals[threadIdIndex]);
2519 __kmp_ncores = totals[coreIdIndex];
2521 if (__kmp_affinity_verbose) {
2522 if (!KMP_AFFINITY_CAPABLE()) {
2523 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2524 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2526 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2528 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2531 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2532 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2533 __kmp_affin_fullMask);
2534 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2535 if (__kmp_affinity_respect_mask) {
2536 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2538 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2540 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2542 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2544 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2548 __kmp_str_buf_init(&buf);
2550 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2551 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2552 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2554 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2555 maxCt[threadIdIndex], __kmp_ncores);
2557 __kmp_str_buf_free(&buf);
2560 #if KMP_MIC && REDUCE_TEAM_SIZE
2562 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2563 __kmp_dflt_team_nth = teamSize;
2564 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2565 "__kmp_dflt_team_nth = %d\n",
2566 __kmp_dflt_team_nth));
2568 #endif // KMP_MIC && REDUCE_TEAM_SIZE
2570 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2571 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2572 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2573 for (i = 0; i < num_avail; ++i) {
2574 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2577 if (__kmp_affinity_type == affinity_none) {
2582 CLEANUP_THREAD_INFO;
2591 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2592 for (index = threadIdIndex; index < maxIndex; index++) {
2593 KMP_ASSERT(totals[index] >= totals[index + 1]);
2594 inMap[index] = (totals[index] > totals[index + 1]);
2596 inMap[maxIndex] = (totals[maxIndex] > 1);
2597 inMap[pkgIdIndex] =
true;
2600 for (index = threadIdIndex; index <= maxIndex; index++) {
2605 KMP_ASSERT(depth > 0);
2608 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2611 int threadLevel = -1;
2613 for (i = 0; i < num_avail; ++i) {
2614 Address addr(depth);
2615 unsigned os = threadInfo[i][osIdIndex];
2619 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2620 if (!inMap[src_index]) {
2623 addr.labels[dst_index] = threadInfo[i][src_index];
2624 if (src_index == pkgIdIndex) {
2625 pkgLevel = dst_index;
2626 }
else if (src_index == coreIdIndex) {
2627 coreLevel = dst_index;
2628 }
else if (src_index == threadIdIndex) {
2629 threadLevel = dst_index;
2633 (*address2os)[i] = AddrUnsPair(addr, os);
2636 if (__kmp_affinity_gran_levels < 0) {
2640 __kmp_affinity_gran_levels = 0;
2641 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2642 if (!inMap[src_index]) {
2645 switch (src_index) {
2647 if (__kmp_affinity_gran > affinity_gran_thread) {
2648 __kmp_affinity_gran_levels++;
2653 if (__kmp_affinity_gran > affinity_gran_core) {
2654 __kmp_affinity_gran_levels++;
2659 if (__kmp_affinity_gran > affinity_gran_package) {
2660 __kmp_affinity_gran_levels++;
2667 if (__kmp_affinity_verbose) {
2668 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2669 coreLevel, threadLevel);
2677 CLEANUP_THREAD_INFO;
2684 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2685 unsigned *numUnique,
2686 AddrUnsPair *address2os,
2687 unsigned numAddrs) {
2693 KMP_ASSERT(numAddrs > 0);
2694 depth = address2os[0].first.depth;
2697 for (i = numAddrs - 1;; --i) {
2698 unsigned osId = address2os[i].second;
2699 if (osId > maxOsId) {
2705 kmp_affin_mask_t *osId2Mask;
2706 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2710 qsort(address2os, numAddrs,
sizeof(*address2os),
2711 __kmp_affinity_cmp_Address_labels);
2713 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2714 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2715 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2717 if (__kmp_affinity_gran_levels >= (
int)depth) {
2718 if (__kmp_affinity_verbose ||
2719 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2720 KMP_WARNING(AffThreadsMayMigrate);
2728 unsigned unique = 0;
2730 unsigned leader = 0;
2731 Address *leaderAddr = &(address2os[0].first);
2732 kmp_affin_mask_t *sum;
2733 KMP_CPU_ALLOC_ON_STACK(sum);
2735 KMP_CPU_SET(address2os[0].second, sum);
2736 for (i = 1; i < numAddrs; i++) {
2740 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2741 KMP_CPU_SET(address2os[i].second, sum);
2747 for (; j < i; j++) {
2748 unsigned osId = address2os[j].second;
2749 KMP_DEBUG_ASSERT(osId <= maxOsId);
2750 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2751 KMP_CPU_COPY(mask, sum);
2752 address2os[j].first.leader = (j == leader);
2758 leaderAddr = &(address2os[i].first);
2760 KMP_CPU_SET(address2os[i].second, sum);
2765 for (; j < i; j++) {
2766 unsigned osId = address2os[j].second;
2767 KMP_DEBUG_ASSERT(osId <= maxOsId);
2768 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2769 KMP_CPU_COPY(mask, sum);
2770 address2os[j].first.leader = (j == leader);
2773 KMP_CPU_FREE_FROM_STACK(sum);
2775 *maxIndex = maxOsId;
2776 *numUnique = unique;
2783 static kmp_affin_mask_t *newMasks;
2784 static int numNewMasks;
2785 static int nextNewMask;
2787 #define ADD_MASK(_mask) \
2789 if (nextNewMask >= numNewMasks) { \
2792 kmp_affin_mask_t *temp; \
2793 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2794 for (i = 0; i < numNewMasks / 2; i++) { \
2795 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2796 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2797 KMP_CPU_COPY(dest, src); \
2799 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2802 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2806 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2808 if (((_osId) > _maxOsId) || \
2809 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2810 if (__kmp_affinity_verbose || \
2811 (__kmp_affinity_warnings && \
2812 (__kmp_affinity_type != affinity_none))) { \
2813 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2816 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2822 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2823 unsigned int *out_numMasks,
2824 const char *proclist,
2825 kmp_affin_mask_t *osId2Mask,
2828 const char *scan = proclist;
2829 const char *next = proclist;
2834 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2836 kmp_affin_mask_t *sumMask;
2837 KMP_CPU_ALLOC(sumMask);
2841 int start, end, stride;
2845 if (*next ==
'\0') {
2857 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2859 num = __kmp_str_to_int(scan, *next);
2860 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2863 if ((num > maxOsId) ||
2864 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2865 if (__kmp_affinity_verbose ||
2866 (__kmp_affinity_warnings &&
2867 (__kmp_affinity_type != affinity_none))) {
2868 KMP_WARNING(AffIgnoreInvalidProcID, num);
2870 KMP_CPU_ZERO(sumMask);
2872 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2892 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2895 num = __kmp_str_to_int(scan, *next);
2896 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2899 if ((num > maxOsId) ||
2900 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2901 if (__kmp_affinity_verbose ||
2902 (__kmp_affinity_warnings &&
2903 (__kmp_affinity_type != affinity_none))) {
2904 KMP_WARNING(AffIgnoreInvalidProcID, num);
2907 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2924 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2926 start = __kmp_str_to_int(scan, *next);
2927 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2932 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2946 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2948 end = __kmp_str_to_int(scan, *next);
2949 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2966 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2968 stride = __kmp_str_to_int(scan, *next);
2969 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2974 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2976 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2978 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2980 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2985 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2987 }
while (start <= end);
2990 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2992 }
while (start >= end);
3003 *out_numMasks = nextNewMask;
3004 if (nextNewMask == 0) {
3006 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3009 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3010 for (i = 0; i < nextNewMask; i++) {
3011 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3012 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3013 KMP_CPU_COPY(dest, src);
3015 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3016 KMP_CPU_FREE(sumMask);
3042 static void __kmp_process_subplace_list(
const char **scan,
3043 kmp_affin_mask_t *osId2Mask,
3044 int maxOsId, kmp_affin_mask_t *tempMask,
3049 int start, count, stride, i;
3053 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3056 start = __kmp_str_to_int(*scan, *next);
3057 KMP_ASSERT(start >= 0);
3062 if (**scan ==
'}' || **scan ==
',') {
3063 if ((start > maxOsId) ||
3064 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3065 if (__kmp_affinity_verbose ||
3066 (__kmp_affinity_warnings &&
3067 (__kmp_affinity_type != affinity_none))) {
3068 KMP_WARNING(AffIgnoreInvalidProcID, start);
3071 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3074 if (**scan ==
'}') {
3080 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3085 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3088 count = __kmp_str_to_int(*scan, *next);
3089 KMP_ASSERT(count >= 0);
3094 if (**scan ==
'}' || **scan ==
',') {
3095 for (i = 0; i < count; i++) {
3096 if ((start > maxOsId) ||
3097 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3098 if (__kmp_affinity_verbose ||
3099 (__kmp_affinity_warnings &&
3100 (__kmp_affinity_type != affinity_none))) {
3101 KMP_WARNING(AffIgnoreInvalidProcID, start);
3105 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3110 if (**scan ==
'}') {
3116 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3123 if (**scan ==
'+') {
3127 if (**scan ==
'-') {
3135 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3138 stride = __kmp_str_to_int(*scan, *next);
3139 KMP_ASSERT(stride >= 0);
3145 if (**scan ==
'}' || **scan ==
',') {
3146 for (i = 0; i < count; i++) {
3147 if ((start > maxOsId) ||
3148 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3149 if (__kmp_affinity_verbose ||
3150 (__kmp_affinity_warnings &&
3151 (__kmp_affinity_type != affinity_none))) {
3152 KMP_WARNING(AffIgnoreInvalidProcID, start);
3156 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3161 if (**scan ==
'}') {
3168 KMP_ASSERT2(0,
"bad explicit places list");
3172 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3173 int maxOsId, kmp_affin_mask_t *tempMask,
3179 if (**scan ==
'{') {
3181 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3182 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3184 }
else if (**scan ==
'!') {
3186 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3187 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3188 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3191 int num = __kmp_str_to_int(*scan, *next);
3192 KMP_ASSERT(num >= 0);
3193 if ((num > maxOsId) ||
3194 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3195 if (__kmp_affinity_verbose ||
3196 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3197 KMP_WARNING(AffIgnoreInvalidProcID, num);
3200 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3205 KMP_ASSERT2(0,
"bad explicit places list");
3210 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3211 unsigned int *out_numMasks,
3212 const char *placelist,
3213 kmp_affin_mask_t *osId2Mask,
3215 int i, j, count, stride, sign;
3216 const char *scan = placelist;
3217 const char *next = placelist;
3220 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3226 kmp_affin_mask_t *tempMask;
3227 kmp_affin_mask_t *previousMask;
3228 KMP_CPU_ALLOC(tempMask);
3229 KMP_CPU_ZERO(tempMask);
3230 KMP_CPU_ALLOC(previousMask);
3231 KMP_CPU_ZERO(previousMask);
3235 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3239 if (*scan ==
'\0' || *scan ==
',') {
3243 KMP_CPU_ZERO(tempMask);
3245 if (*scan ==
'\0') {
3252 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3257 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3260 count = __kmp_str_to_int(scan, *next);
3261 KMP_ASSERT(count >= 0);
3266 if (*scan ==
'\0' || *scan ==
',') {
3269 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3288 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3291 stride = __kmp_str_to_int(scan, *next);
3292 KMP_DEBUG_ASSERT(stride >= 0);
3298 for (i = 0; i < count; i++) {
3303 KMP_CPU_COPY(previousMask, tempMask);
3304 ADD_MASK(previousMask);
3305 KMP_CPU_ZERO(tempMask);
3307 KMP_CPU_SET_ITERATE(j, previousMask) {
3308 if (!KMP_CPU_ISSET(j, previousMask)) {
3311 if ((j + stride > maxOsId) || (j + stride < 0) ||
3312 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3313 (!KMP_CPU_ISSET(j + stride,
3314 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3315 if ((__kmp_affinity_verbose ||
3316 (__kmp_affinity_warnings &&
3317 (__kmp_affinity_type != affinity_none))) &&
3319 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3323 KMP_CPU_SET(j + stride, tempMask);
3327 KMP_CPU_ZERO(tempMask);
3332 if (*scan ==
'\0') {
3340 KMP_ASSERT2(0,
"bad explicit places list");
3343 *out_numMasks = nextNewMask;
3344 if (nextNewMask == 0) {
3346 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3349 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3350 KMP_CPU_FREE(tempMask);
3351 KMP_CPU_FREE(previousMask);
3352 for (i = 0; i < nextNewMask; i++) {
3353 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3354 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3355 KMP_CPU_COPY(dest, src);
3357 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3363 #undef ADD_MASK_OSID
3366 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3369 hwloc_obj_t hT = NULL;
3370 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3371 for (
int i = 0; i < N; ++i) {
3372 KMP_DEBUG_ASSERT(hT);
3373 unsigned idx = hT->os_index;
3374 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3375 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3376 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3379 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3384 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3386 hwloc_obj_t hT = NULL;
3387 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3388 for (
int i = 0; i < N; ++i) {
3389 KMP_DEBUG_ASSERT(hT);
3390 unsigned idx = hT->os_index;
3391 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3393 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3397 #endif // KMP_USE_HWLOC
3399 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3400 AddrUnsPair *newAddr;
3401 if (__kmp_hws_requested == 0)
3404 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3408 hwloc_topology_t tp = __kmp_hwloc_topology;
3409 int nS = 0, nN = 0, nL = 0, nC = 0,
3411 int nCr = 0, nTr = 0;
3412 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3413 hwloc_obj_t hT, hC, hL, hN, hS;
3417 int numa_support = 0, tile_support = 0;
3418 if (__kmp_pu_os_idx)
3419 hT = hwloc_get_pu_obj_by_os_index(tp,
3420 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3422 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3424 KMP_WARNING(AffHWSubsetUnsupported);
3428 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3429 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3430 if (hN != NULL && hN->depth > hS->depth) {
3432 }
else if (__kmp_hws_node.num > 0) {
3434 KMP_WARNING(AffHWSubsetUnsupported);
3438 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3439 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3441 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3443 }
else if (__kmp_hws_tile.num > 0) {
3444 if (__kmp_hws_core.num == 0) {
3445 __kmp_hws_core = __kmp_hws_tile;
3446 __kmp_hws_tile.num = 0;
3449 KMP_WARNING(AffHWSubsetInvalid);
3456 if (__kmp_hws_socket.num == 0)
3457 __kmp_hws_socket.num = nPackages;
3458 if (__kmp_hws_socket.offset >= nPackages) {
3459 KMP_WARNING(AffHWSubsetManySockets);
3464 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3466 if (__kmp_hws_node.num == 0)
3467 __kmp_hws_node.num = NN;
3468 if (__kmp_hws_node.offset >= NN) {
3469 KMP_WARNING(AffHWSubsetManyNodes);
3474 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3475 if (__kmp_hws_tile.num == 0) {
3476 __kmp_hws_tile.num = NL + 1;
3478 if (__kmp_hws_tile.offset >= NL) {
3479 KMP_WARNING(AffHWSubsetManyTiles);
3482 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3484 if (__kmp_hws_core.num == 0)
3485 __kmp_hws_core.num = NC;
3486 if (__kmp_hws_core.offset >= NC) {
3487 KMP_WARNING(AffHWSubsetManyCores);
3491 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3493 if (__kmp_hws_core.num == 0)
3494 __kmp_hws_core.num = NC;
3495 if (__kmp_hws_core.offset >= NC) {
3496 KMP_WARNING(AffHWSubsetManyCores);
3503 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3504 if (__kmp_hws_tile.num == 0)
3505 __kmp_hws_tile.num = NL;
3506 if (__kmp_hws_tile.offset >= NL) {
3507 KMP_WARNING(AffHWSubsetManyTiles);
3510 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3512 if (__kmp_hws_core.num == 0)
3513 __kmp_hws_core.num = NC;
3514 if (__kmp_hws_core.offset >= NC) {
3515 KMP_WARNING(AffHWSubsetManyCores);
3519 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3521 if (__kmp_hws_core.num == 0)
3522 __kmp_hws_core.num = NC;
3523 if (__kmp_hws_core.offset >= NC) {
3524 KMP_WARNING(AffHWSubsetManyCores);
3529 if (__kmp_hws_proc.num == 0)
3530 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3531 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3532 KMP_WARNING(AffHWSubsetManyProcs);
3538 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3542 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3543 for (
int s = 0; s < NP; ++s) {
3545 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3546 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3549 if (nS <= __kmp_hws_socket.offset ||
3550 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3551 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3562 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3563 for (
int n = 0; n < NN; ++n) {
3565 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3566 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3570 if (nN <= __kmp_hws_node.offset ||
3571 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3573 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3574 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3581 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3582 for (
int l = 0; l < NL; ++l) {
3584 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3585 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3589 if (nL <= __kmp_hws_tile.offset ||
3590 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3592 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3593 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3600 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3601 HWLOC_OBJ_CORE, &hC);
3602 for (
int c = 0; c < NC; ++c) {
3604 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3605 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3609 if (nC <= __kmp_hws_core.offset ||
3610 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3612 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3613 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3621 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3623 for (
int t = 0; t < NT; ++t) {
3626 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3627 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3631 if (nT <= __kmp_hws_proc.offset ||
3632 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3634 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3636 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3637 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3642 newAddr[n_new] = (*pAddr)[n_old];
3645 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3653 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3655 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3663 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3664 for (
int c = 0; c < NC; ++c) {
3666 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3667 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3671 if (nC <= __kmp_hws_core.offset ||
3672 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3674 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3675 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3683 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3684 for (
int t = 0; t < NT; ++t) {
3687 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3688 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3692 if (nT <= __kmp_hws_proc.offset ||
3693 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3695 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3697 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3698 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3703 newAddr[n_new] = (*pAddr)[n_old];
3706 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3714 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3717 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3725 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3726 for (
int l = 0; l < NL; ++l) {
3728 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3729 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3733 if (nL <= __kmp_hws_tile.offset ||
3734 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3736 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3737 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3745 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3746 for (
int c = 0; c < NC; ++c) {
3748 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3749 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3753 if (nC <= __kmp_hws_core.offset ||
3754 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3756 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3757 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3766 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3767 for (
int t = 0; t < NT; ++t) {
3770 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3771 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3775 if (nT <= __kmp_hws_proc.offset ||
3776 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3778 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3780 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3781 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3786 newAddr[n_new] = (*pAddr)[n_old];
3789 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3797 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3799 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3807 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3808 for (
int c = 0; c < NC; ++c) {
3810 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3811 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3815 if (nC <= __kmp_hws_core.offset ||
3816 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3818 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3819 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3828 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3829 for (
int t = 0; t < NT; ++t) {
3832 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3833 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3837 if (nT <= __kmp_hws_proc.offset ||
3838 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3840 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3842 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3843 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3848 newAddr[n_new] = (*pAddr)[n_old];
3851 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3859 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3871 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3872 KMP_DEBUG_ASSERT(nPkg > 0);
3873 KMP_DEBUG_ASSERT(nCpP > 0);
3874 KMP_DEBUG_ASSERT(nTpC > 0);
3875 KMP_DEBUG_ASSERT(nCo > 0);
3876 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3877 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3878 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3879 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3882 nCoresPerPkg = nCpP;
3883 __kmp_nThreadsPerCore = nTpC;
3884 __kmp_avail_proc = n_new;
3888 #endif // KMP_USE_HWLOC
3890 int n_old = 0, n_new = 0, proc_num = 0;
3891 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3892 KMP_WARNING(AffHWSubsetNoHWLOC);
3895 if (__kmp_hws_socket.num == 0)
3896 __kmp_hws_socket.num = nPackages;
3897 if (__kmp_hws_core.num == 0)
3898 __kmp_hws_core.num = nCoresPerPkg;
3899 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3900 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3901 if (!__kmp_affinity_uniform_topology()) {
3902 KMP_WARNING(AffHWSubsetNonUniform);
3906 KMP_WARNING(AffHWSubsetNonThreeLevel);
3909 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3910 KMP_WARNING(AffHWSubsetManySockets);
3913 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3914 KMP_WARNING(AffHWSubsetManyCores);
3919 newAddr = (AddrUnsPair *)__kmp_allocate(
3920 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3921 __kmp_hws_proc.num);
3922 for (
int i = 0; i < nPackages; ++i) {
3923 if (i < __kmp_hws_socket.offset ||
3924 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3926 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3927 if (__kmp_pu_os_idx != NULL) {
3929 for (
int j = 0; j < nCoresPerPkg; ++j) {
3930 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3931 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3938 for (
int j = 0; j < nCoresPerPkg; ++j) {
3939 if (j < __kmp_hws_core.offset ||
3940 j >= __kmp_hws_core.offset +
3941 __kmp_hws_core.num) {
3942 n_old += __kmp_nThreadsPerCore;
3943 if (__kmp_pu_os_idx != NULL) {
3944 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3945 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3951 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3952 if (k < __kmp_hws_proc.num) {
3954 newAddr[n_new] = (*pAddr)[n_old];
3957 if (__kmp_pu_os_idx != NULL)
3958 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3967 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3968 KMP_DEBUG_ASSERT(n_new ==
3969 __kmp_hws_socket.num * __kmp_hws_core.num *
3970 __kmp_hws_proc.num);
3971 nPackages = __kmp_hws_socket.num;
3972 nCoresPerPkg = __kmp_hws_core.num;
3973 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3974 __kmp_avail_proc = n_new;
3975 __kmp_ncores = nPackages * __kmp_hws_core.num;
3981 if (__kmp_affinity_verbose) {
3982 char m[KMP_AFFIN_MASK_PRINT_LEN];
3983 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3984 __kmp_affin_fullMask);
3985 if (__kmp_affinity_respect_mask) {
3986 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3988 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3990 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3992 __kmp_str_buf_init(&buf);
3993 __kmp_str_buf_print(&buf,
"%d", nPackages);
3994 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3995 __kmp_nThreadsPerCore, __kmp_ncores);
3996 __kmp_str_buf_free(&buf);
3999 if (__kmp_pu_os_idx != NULL) {
4000 __kmp_free(__kmp_pu_os_idx);
4001 __kmp_pu_os_idx = NULL;
4007 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
4008 int nprocs,
int bottom_level) {
4011 for (
int i = 0; i < nprocs; i++) {
4012 for (
int j = bottom_level; j > 0; j--) {
4013 if (address2os[i].first.labels[j] > 0) {
4014 if (core_level < (j - 1)) {
4024 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
4025 int nprocs,
int bottom_level,
4031 for (i = 0; i < nprocs; i++) {
4032 for (j = bottom_level; j > core_level; j--) {
4033 if ((i + 1) < nprocs) {
4034 if (address2os[i + 1].first.labels[j] > 0) {
4039 if (j == core_level) {
4043 if (j > core_level) {
4052 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
4053 int bottom_level,
int core_level) {
4054 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4061 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4062 int nprocs,
int bottom_level,
4064 int maxprocpercore = 0;
4066 if (core_level < bottom_level) {
4067 for (
int i = 0; i < nprocs; i++) {
4068 int percore = address2os[i].first.labels[core_level + 1] + 1;
4070 if (percore > maxprocpercore) {
4071 maxprocpercore = percore;
4077 return maxprocpercore;
4080 static AddrUnsPair *address2os = NULL;
4081 static int *procarr = NULL;
4082 static int __kmp_aff_depth = 0;
4084 #if KMP_USE_HIER_SCHED
4085 #define KMP_EXIT_AFF_NONE \
4086 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4087 KMP_ASSERT(address2os == NULL); \
4088 __kmp_apply_thread_places(NULL, 0); \
4089 __kmp_create_affinity_none_places(); \
4090 __kmp_dispatch_set_hierarchy_values(); \
4093 #define KMP_EXIT_AFF_NONE \
4094 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4095 KMP_ASSERT(address2os == NULL); \
4096 __kmp_apply_thread_places(NULL, 0); \
4097 __kmp_create_affinity_none_places(); \
4103 static void __kmp_create_affinity_none_places() {
4104 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4105 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4106 __kmp_affinity_num_masks = 1;
4107 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4108 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4109 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4112 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4113 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4114 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4115 unsigned depth = aa->depth;
4117 KMP_DEBUG_ASSERT(depth == bb->depth);
4118 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4119 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4120 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4121 int j = depth - i - 1;
4122 if (aa->childNums[j] < bb->childNums[j])
4124 if (aa->childNums[j] > bb->childNums[j])
4127 for (; i < depth; i++) {
4128 int j = i - __kmp_affinity_compact;
4129 if (aa->childNums[j] < bb->childNums[j])
4131 if (aa->childNums[j] > bb->childNums[j])
4137 static void __kmp_aux_affinity_initialize(
void) {
4138 if (__kmp_affinity_masks != NULL) {
4139 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4147 if (__kmp_affin_fullMask == NULL) {
4148 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4150 if (KMP_AFFINITY_CAPABLE()) {
4151 if (__kmp_affinity_respect_mask) {
4152 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4156 __kmp_avail_proc = 0;
4157 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4158 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4163 if (__kmp_avail_proc > __kmp_xproc) {
4164 if (__kmp_affinity_verbose ||
4165 (__kmp_affinity_warnings &&
4166 (__kmp_affinity_type != affinity_none))) {
4167 KMP_WARNING(ErrorInitializeAffinity);
4169 __kmp_affinity_type = affinity_none;
4170 KMP_AFFINITY_DISABLE();
4174 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4175 __kmp_avail_proc = __kmp_xproc;
4179 if (__kmp_affinity_gran == affinity_gran_tile &&
4181 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4182 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4183 __kmp_affinity_gran = affinity_gran_package;
4187 kmp_i18n_id_t msg_id = kmp_i18n_null;
4191 if ((__kmp_cpuinfo_file != NULL) &&
4192 (__kmp_affinity_top_method == affinity_top_method_all)) {
4193 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4196 if (__kmp_affinity_top_method == affinity_top_method_all) {
4200 const char *file_name = NULL;
4204 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4205 if (__kmp_affinity_verbose) {
4206 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4208 if (!__kmp_hwloc_error) {
4209 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4212 }
else if (depth < 0 && __kmp_affinity_verbose) {
4213 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4215 }
else if (__kmp_affinity_verbose) {
4216 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4221 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4224 if (__kmp_affinity_verbose) {
4225 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4229 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4235 if (__kmp_affinity_verbose) {
4236 if (msg_id != kmp_i18n_null) {
4237 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4238 __kmp_i18n_catgets(msg_id),
4239 KMP_I18N_STR(DecodingLegacyAPIC));
4241 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4242 KMP_I18N_STR(DecodingLegacyAPIC));
4247 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4259 if (__kmp_affinity_verbose) {
4260 if (msg_id != kmp_i18n_null) {
4261 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4262 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4264 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4268 FILE *f = fopen(
"/proc/cpuinfo",
"r");
4270 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4272 file_name =
"/proc/cpuinfo";
4274 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4284 #if KMP_GROUP_AFFINITY
4286 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4287 if (__kmp_affinity_verbose) {
4288 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4291 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4292 KMP_ASSERT(depth != 0);
4298 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4299 if (file_name == NULL) {
4300 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4301 }
else if (line == 0) {
4302 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4304 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4305 __kmp_i18n_catgets(msg_id));
4311 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4315 KMP_ASSERT(depth > 0);
4316 KMP_ASSERT(address2os != NULL);
4321 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4322 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4323 if (__kmp_affinity_verbose) {
4324 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4326 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4331 #endif // KMP_USE_HWLOC
4337 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4339 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4340 if (__kmp_affinity_verbose) {
4341 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4344 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4349 KMP_ASSERT(msg_id != kmp_i18n_null);
4350 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4352 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4353 if (__kmp_affinity_verbose) {
4354 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4357 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4362 KMP_ASSERT(msg_id != kmp_i18n_null);
4363 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4369 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4370 const char *filename;
4371 if (__kmp_cpuinfo_file != NULL) {
4372 filename = __kmp_cpuinfo_file;
4374 filename =
"/proc/cpuinfo";
4377 if (__kmp_affinity_verbose) {
4378 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4381 FILE *f = fopen(filename,
"r");
4384 if (__kmp_cpuinfo_file != NULL) {
4385 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4386 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
4388 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4393 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4396 KMP_ASSERT(msg_id != kmp_i18n_null);
4398 KMP_FATAL(FileLineMsgExiting, filename, line,
4399 __kmp_i18n_catgets(msg_id));
4401 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4404 if (__kmp_affinity_type == affinity_none) {
4405 KMP_ASSERT(depth == 0);
4410 #if KMP_GROUP_AFFINITY
4412 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4413 if (__kmp_affinity_verbose) {
4414 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4417 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4418 KMP_ASSERT(depth != 0);
4420 KMP_ASSERT(msg_id != kmp_i18n_null);
4421 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4427 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4428 if (__kmp_affinity_verbose) {
4429 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4432 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4437 KMP_ASSERT(depth > 0);
4438 KMP_ASSERT(address2os != NULL);
4441 #if KMP_USE_HIER_SCHED
4442 __kmp_dispatch_set_hierarchy_values();
4445 if (address2os == NULL) {
4446 if (KMP_AFFINITY_CAPABLE() &&
4447 (__kmp_affinity_verbose ||
4448 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4449 KMP_WARNING(ErrorInitializeAffinity);
4451 __kmp_affinity_type = affinity_none;
4452 __kmp_create_affinity_none_places();
4453 KMP_AFFINITY_DISABLE();
4457 if (__kmp_affinity_gran == affinity_gran_tile
4459 && __kmp_tile_depth == 0
4463 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4466 __kmp_apply_thread_places(&address2os, depth);
4471 kmp_affin_mask_t *osId2Mask =
4472 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4473 if (__kmp_affinity_gran_levels == 0) {
4474 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4480 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4482 switch (__kmp_affinity_type) {
4484 case affinity_explicit:
4485 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4487 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4490 __kmp_affinity_process_proclist(
4491 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4492 __kmp_affinity_proclist, osId2Mask, maxIndex);
4496 __kmp_affinity_process_placelist(
4497 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4498 __kmp_affinity_proclist, osId2Mask, maxIndex);
4501 if (__kmp_affinity_num_masks == 0) {
4502 if (__kmp_affinity_verbose ||
4503 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4504 KMP_WARNING(AffNoValidProcID);
4506 __kmp_affinity_type = affinity_none;
4507 __kmp_create_affinity_none_places();
4517 case affinity_logical:
4518 __kmp_affinity_compact = 0;
4519 if (__kmp_affinity_offset) {
4520 __kmp_affinity_offset =
4521 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4525 case affinity_physical:
4526 if (__kmp_nThreadsPerCore > 1) {
4527 __kmp_affinity_compact = 1;
4528 if (__kmp_affinity_compact >= depth) {
4529 __kmp_affinity_compact = 0;
4532 __kmp_affinity_compact = 0;
4534 if (__kmp_affinity_offset) {
4535 __kmp_affinity_offset =
4536 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4540 case affinity_scatter:
4541 if (__kmp_affinity_compact >= depth) {
4542 __kmp_affinity_compact = 0;
4544 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4548 case affinity_compact:
4549 if (__kmp_affinity_compact >= depth) {
4550 __kmp_affinity_compact = depth - 1;
4554 case affinity_balanced:
4556 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4557 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4559 __kmp_affinity_type = affinity_none;
4560 __kmp_create_affinity_none_places();
4562 }
else if (!__kmp_affinity_uniform_topology()) {
4564 __kmp_aff_depth = depth;
4566 int core_level = __kmp_affinity_find_core_level(
4567 address2os, __kmp_avail_proc, depth - 1);
4568 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4569 depth - 1, core_level);
4570 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4571 address2os, __kmp_avail_proc, depth - 1, core_level);
4573 int nproc = ncores * maxprocpercore;
4574 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4575 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4576 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4578 __kmp_affinity_type = affinity_none;
4582 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4583 for (
int i = 0; i < nproc; i++) {
4589 for (
int i = 0; i < __kmp_avail_proc; i++) {
4590 int proc = address2os[i].second;
4592 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4594 if (core == lastcore) {
4601 procarr[core * maxprocpercore + inlastcore] = proc;
4604 if (__kmp_affinity_compact >= depth) {
4605 __kmp_affinity_compact = depth - 1;
4610 if (__kmp_affinity_dups) {
4611 __kmp_affinity_num_masks = __kmp_avail_proc;
4613 __kmp_affinity_num_masks = numUnique;
4617 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4618 (__kmp_affinity_num_places > 0) &&
4619 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4620 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4624 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4628 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4629 __kmp_affinity_cmp_Address_child_num);
4633 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4634 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4637 unsigned osId = address2os[i].second;
4638 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4639 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4640 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4641 KMP_CPU_COPY(dest, src);
4642 if (++j >= __kmp_affinity_num_masks) {
4646 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4651 KMP_ASSERT2(0,
"Unexpected affinity setting");
4654 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4655 machine_hierarchy.init(address2os, __kmp_avail_proc);
4657 #undef KMP_EXIT_AFF_NONE
4659 void __kmp_affinity_initialize(
void) {
4668 int disabled = (__kmp_affinity_type == affinity_disabled);
4669 if (!KMP_AFFINITY_CAPABLE()) {
4670 KMP_ASSERT(disabled);
4673 __kmp_affinity_type = affinity_none;
4675 __kmp_aux_affinity_initialize();
4677 __kmp_affinity_type = affinity_disabled;
4681 void __kmp_affinity_uninitialize(
void) {
4682 if (__kmp_affinity_masks != NULL) {
4683 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4684 __kmp_affinity_masks = NULL;
4686 if (__kmp_affin_fullMask != NULL) {
4687 KMP_CPU_FREE(__kmp_affin_fullMask);
4688 __kmp_affin_fullMask = NULL;
4690 __kmp_affinity_num_masks = 0;
4691 __kmp_affinity_type = affinity_default;
4693 __kmp_affinity_num_places = 0;
4695 if (__kmp_affinity_proclist != NULL) {
4696 __kmp_free(__kmp_affinity_proclist);
4697 __kmp_affinity_proclist = NULL;
4699 if (address2os != NULL) {
4700 __kmp_free(address2os);
4703 if (procarr != NULL) {
4704 __kmp_free(procarr);
4708 if (__kmp_hwloc_topology != NULL) {
4709 hwloc_topology_destroy(__kmp_hwloc_topology);
4710 __kmp_hwloc_topology = NULL;
4713 KMPAffinity::destroy_api();
4716 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4717 if (!KMP_AFFINITY_CAPABLE()) {
4721 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4722 if (th->th.th_affin_mask == NULL) {
4723 KMP_CPU_ALLOC(th->th.th_affin_mask);
4725 KMP_CPU_ZERO(th->th.th_affin_mask);
4732 kmp_affin_mask_t *mask;
4736 if (KMP_AFFINITY_NON_PROC_BIND)
4739 if ((__kmp_affinity_type == affinity_none) ||
4740 (__kmp_affinity_type == affinity_balanced)) {
4741 #if KMP_GROUP_AFFINITY
4742 if (__kmp_num_proc_groups > 1) {
4746 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4748 mask = __kmp_affin_fullMask;
4750 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4751 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4752 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4758 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4759 #if KMP_GROUP_AFFINITY
4760 if (__kmp_num_proc_groups > 1) {
4764 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4766 mask = __kmp_affin_fullMask;
4770 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4771 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4772 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4778 th->th.th_current_place = i;
4780 th->th.th_new_place = i;
4781 th->th.th_first_place = 0;
4782 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4783 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4786 th->th.th_first_place = 0;
4787 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4790 if (i == KMP_PLACE_ALL) {
4791 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4794 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4801 (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4804 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4809 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4811 if (__kmp_affinity_verbose
4813 && (__kmp_affinity_type == affinity_none ||
4814 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4815 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4816 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4817 th->th.th_affin_mask);
4818 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4819 __kmp_gettid(), gtid, buf);
4826 if (__kmp_affinity_type == affinity_none) {
4827 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4830 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4835 void __kmp_affinity_set_place(
int gtid) {
4836 if (!KMP_AFFINITY_CAPABLE()) {
4840 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4842 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4844 gtid, th->th.th_new_place, th->th.th_current_place));
4847 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4848 KMP_ASSERT(th->th.th_new_place >= 0);
4849 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4850 if (th->th.th_first_place <= th->th.th_last_place) {
4851 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4852 (th->th.th_new_place <= th->th.th_last_place));
4854 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4855 (th->th.th_new_place >= th->th.th_last_place));
4860 kmp_affin_mask_t *mask =
4861 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4862 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4863 th->th.th_current_place = th->th.th_new_place;
4865 if (__kmp_affinity_verbose) {
4866 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4867 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4868 th->th.th_affin_mask);
4869 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4870 __kmp_gettid(), gtid, buf);
4872 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4877 int __kmp_aux_set_affinity(
void **mask) {
4882 if (!KMP_AFFINITY_CAPABLE()) {
4886 gtid = __kmp_entry_gtid();
4887 KA_TRACE(1000, (
""); {
4888 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4889 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4890 (kmp_affin_mask_t *)(*mask));
4892 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4896 if (__kmp_env_consistency_check) {
4897 if ((mask == NULL) || (*mask == NULL)) {
4898 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4903 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4904 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4905 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4907 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4912 if (num_procs == 0) {
4913 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4916 #if KMP_GROUP_AFFINITY
4917 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4918 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4924 th = __kmp_threads[gtid];
4925 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4926 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4928 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4932 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4933 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4934 th->th.th_first_place = 0;
4935 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4938 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4944 int __kmp_aux_get_affinity(
void **mask) {
4949 if (!KMP_AFFINITY_CAPABLE()) {
4953 gtid = __kmp_entry_gtid();
4954 th = __kmp_threads[gtid];
4955 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4957 KA_TRACE(1000, (
""); {
4958 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4959 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4960 th->th.th_affin_mask);
4961 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4965 if (__kmp_env_consistency_check) {
4966 if ((mask == NULL) || (*mask == NULL)) {
4967 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4973 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4974 KA_TRACE(1000, (
""); {
4975 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4976 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4977 (kmp_affin_mask_t *)(*mask));
4978 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4985 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4991 int __kmp_aux_get_affinity_max_proc() {
4992 if (!KMP_AFFINITY_CAPABLE()) {
4995 #if KMP_GROUP_AFFINITY
4996 if (__kmp_num_proc_groups > 1) {
4997 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
5003 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
5004 if (!KMP_AFFINITY_CAPABLE()) {
5008 KA_TRACE(1000, (
""); {
5009 int gtid = __kmp_entry_gtid();
5010 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5011 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5012 (kmp_affin_mask_t *)(*mask));
5013 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
5014 "affinity mask for thread %d = %s\n",
5018 if (__kmp_env_consistency_check) {
5019 if ((mask == NULL) || (*mask == NULL)) {
5020 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
5024 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5027 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5031 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5035 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
5036 if (!KMP_AFFINITY_CAPABLE()) {
5040 KA_TRACE(1000, (
""); {
5041 int gtid = __kmp_entry_gtid();
5042 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5043 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5044 (kmp_affin_mask_t *)(*mask));
5045 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
5046 "affinity mask for thread %d = %s\n",
5050 if (__kmp_env_consistency_check) {
5051 if ((mask == NULL) || (*mask == NULL)) {
5052 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5056 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5059 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5063 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5067 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5068 if (!KMP_AFFINITY_CAPABLE()) {
5072 KA_TRACE(1000, (
""); {
5073 int gtid = __kmp_entry_gtid();
5074 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5075 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5076 (kmp_affin_mask_t *)(*mask));
5077 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
5078 "affinity mask for thread %d = %s\n",
5082 if (__kmp_env_consistency_check) {
5083 if ((mask == NULL) || (*mask == NULL)) {
5084 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5088 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5091 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5095 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5099 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5100 KMP_DEBUG_ASSERT(th);
5101 bool fine_gran =
true;
5102 int tid = th->th.th_info.ds.ds_tid;
5104 switch (__kmp_affinity_gran) {
5105 case affinity_gran_fine:
5106 case affinity_gran_thread:
5108 case affinity_gran_core:
5109 if (__kmp_nThreadsPerCore > 1) {
5113 case affinity_gran_package:
5114 if (nCoresPerPkg > 1) {
5122 if (__kmp_affinity_uniform_topology()) {
5126 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5128 int ncores = __kmp_ncores;
5129 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5130 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5134 int chunk = nthreads / ncores;
5136 int big_cores = nthreads % ncores;
5138 int big_nth = (chunk + 1) * big_cores;
5139 if (tid < big_nth) {
5140 coreID = tid / (chunk + 1);
5141 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5143 coreID = (tid - big_cores) / chunk;
5144 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5147 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5148 "Illegal set affinity operation when not capable");
5150 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5154 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5155 KMP_CPU_SET(osID, mask);
5157 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5159 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5160 KMP_CPU_SET(osID, mask);
5163 if (__kmp_affinity_verbose) {
5164 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5165 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5166 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5167 __kmp_gettid(), tid, buf);
5169 __kmp_set_system_affinity(mask, TRUE);
5172 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5175 int core_level = __kmp_affinity_find_core_level(
5176 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5177 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5178 __kmp_aff_depth - 1, core_level);
5179 int nth_per_core = __kmp_affinity_max_proc_per_core(
5180 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5184 if (nthreads == __kmp_avail_proc) {
5186 int osID = address2os[tid].second;
5187 KMP_CPU_SET(osID, mask);
5189 int core = __kmp_affinity_find_core(address2os, tid,
5190 __kmp_aff_depth - 1, core_level);
5191 for (
int i = 0; i < __kmp_avail_proc; i++) {
5192 int osID = address2os[i].second;
5193 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5194 core_level) == core) {
5195 KMP_CPU_SET(osID, mask);
5199 }
else if (nthreads <= ncores) {
5202 for (
int i = 0; i < ncores; i++) {
5205 for (
int j = 0; j < nth_per_core; j++) {
5206 if (procarr[i * nth_per_core + j] != -1) {
5213 for (
int j = 0; j < nth_per_core; j++) {
5214 int osID = procarr[i * nth_per_core + j];
5216 KMP_CPU_SET(osID, mask);
5232 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5234 int *ncores_with_x_procs =
5235 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5237 int *ncores_with_x_to_max_procs =
5238 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5240 for (
int i = 0; i <= nth_per_core; i++) {
5241 ncores_with_x_procs[i] = 0;
5242 ncores_with_x_to_max_procs[i] = 0;
5245 for (
int i = 0; i < ncores; i++) {
5247 for (
int j = 0; j < nth_per_core; j++) {
5248 if (procarr[i * nth_per_core + j] != -1) {
5252 nproc_at_core[i] = cnt;
5253 ncores_with_x_procs[cnt]++;
5256 for (
int i = 0; i <= nth_per_core; i++) {
5257 for (
int j = i; j <= nth_per_core; j++) {
5258 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5263 int nproc = nth_per_core * ncores;
5265 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5266 for (
int i = 0; i < nproc; i++) {
5273 for (
int j = 1; j <= nth_per_core; j++) {
5274 int cnt = ncores_with_x_to_max_procs[j];
5275 for (
int i = 0; i < ncores; i++) {
5277 if (nproc_at_core[i] == 0) {
5280 for (
int k = 0; k < nth_per_core; k++) {
5281 if (procarr[i * nth_per_core + k] != -1) {
5282 if (newarr[i * nth_per_core + k] == 0) {
5283 newarr[i * nth_per_core + k] = 1;
5289 newarr[i * nth_per_core + k]++;
5297 if (cnt == 0 || nth == 0) {
5308 for (
int i = 0; i < nproc; i++) {
5312 int osID = procarr[i];
5313 KMP_CPU_SET(osID, mask);
5315 int coreID = i / nth_per_core;
5316 for (
int ii = 0; ii < nth_per_core; ii++) {
5317 int osID = procarr[coreID * nth_per_core + ii];
5319 KMP_CPU_SET(osID, mask);
5329 if (__kmp_affinity_verbose) {
5330 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5331 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5332 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5333 __kmp_gettid(), tid, buf);
5335 __kmp_set_system_affinity(mask, TRUE);
5353 kmp_set_thread_affinity_mask_initial()
5358 int gtid = __kmp_get_gtid();
5361 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5362 "non-omp thread, returning\n"));
5365 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5366 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5367 "affinity not initialized, returning\n"));
5370 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5371 "set full mask for thread %d\n",
5373 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5374 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5378 #endif // KMP_AFFINITY_SUPPORTED