14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 #define HWLOC_GROUP_KIND_INTEL_MODULE 102
25 #define HWLOC_GROUP_KIND_INTEL_TILE 103
26 #define HWLOC_GROUP_KIND_INTEL_DIE 104
27 #define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
32 kmp_topology_t *__kmp_topology =
nullptr;
34 kmp_hw_subset_t *__kmp_hw_subset =
nullptr;
37 static hierarchy_info machine_hierarchy;
39 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
41 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
45 if (TCR_1(machine_hierarchy.uninitialized))
46 machine_hierarchy.init(nproc);
49 if (nproc > machine_hierarchy.base_num_threads)
50 machine_hierarchy.resize(nproc);
52 depth = machine_hierarchy.depth;
53 KMP_DEBUG_ASSERT(depth > 0);
55 thr_bar->depth = depth;
56 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
57 &(thr_bar->base_leaf_kids));
58 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
61 static int nCoresPerPkg, nPackages;
62 static int __kmp_nThreadsPerCore;
63 #ifndef KMP_DFLT_NTH_CORES
64 static int __kmp_ncores;
67 const char *__kmp_hw_get_catalog_string(kmp_hw_t type,
bool plural) {
70 return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
72 return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
74 return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
76 return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
78 return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
80 return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
82 return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
84 return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
86 return ((plural) ? KMP_I18N_STR(LLCaches) : KMP_I18N_STR(LLCache));
88 return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
90 return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
91 case KMP_HW_PROC_GROUP:
92 return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
94 return KMP_I18N_STR(Unknown);
97 const char *__kmp_hw_get_keyword(kmp_hw_t type,
bool plural) {
100 return ((plural) ?
"sockets" :
"socket");
102 return ((plural) ?
"dice" :
"die");
104 return ((plural) ?
"modules" :
"module");
106 return ((plural) ?
"tiles" :
"tile");
108 return ((plural) ?
"numa_domains" :
"numa_domain");
110 return ((plural) ?
"l3_caches" :
"l3_cache");
112 return ((plural) ?
"l2_caches" :
"l2_cache");
114 return ((plural) ?
"l1_caches" :
"l1_cache");
116 return ((plural) ?
"ll_caches" :
"ll_cache");
118 return ((plural) ?
"cores" :
"core");
120 return ((plural) ?
"threads" :
"thread");
121 case KMP_HW_PROC_GROUP:
122 return ((plural) ?
"proc_groups" :
"proc_group");
124 return ((plural) ?
"unknowns" :
"unknown");
127 const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
129 case KMP_HW_CORE_TYPE_UNKNOWN:
131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
132 case KMP_HW_CORE_TYPE_ATOM:
133 return "Intel Atom(R) processor";
134 case KMP_HW_CORE_TYPE_CORE:
135 return "Intel(R) Core(TM) processor";
143 int kmp_hw_thread_t::compare_ids(
const void *a,
const void *b) {
144 const kmp_hw_thread_t *ahwthread = (
const kmp_hw_thread_t *)a;
145 const kmp_hw_thread_t *bhwthread = (
const kmp_hw_thread_t *)b;
146 int depth = __kmp_topology->get_depth();
147 for (
int level = 0; level < depth; ++level) {
148 if (ahwthread->ids[level] < bhwthread->ids[level])
150 else if (ahwthread->ids[level] > bhwthread->ids[level])
153 if (ahwthread->os_id < bhwthread->os_id)
155 else if (ahwthread->os_id > bhwthread->os_id)
160 #if KMP_AFFINITY_SUPPORTED
161 int kmp_hw_thread_t::compare_compact(
const void *a,
const void *b) {
163 const kmp_hw_thread_t *aa = (
const kmp_hw_thread_t *)a;
164 const kmp_hw_thread_t *bb = (
const kmp_hw_thread_t *)b;
165 int depth = __kmp_topology->get_depth();
166 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
167 KMP_DEBUG_ASSERT(__kmp_affinity_compact <= depth);
168 for (i = 0; i < __kmp_affinity_compact; i++) {
169 int j = depth - i - 1;
170 if (aa->sub_ids[j] < bb->sub_ids[j])
172 if (aa->sub_ids[j] > bb->sub_ids[j])
175 for (; i < depth; i++) {
176 int j = i - __kmp_affinity_compact;
177 if (aa->sub_ids[j] < bb->sub_ids[j])
179 if (aa->sub_ids[j] > bb->sub_ids[j])
186 void kmp_hw_thread_t::print()
const {
187 int depth = __kmp_topology->get_depth();
188 printf(
"%4d ", os_id);
189 for (
int i = 0; i < depth; ++i) {
190 printf(
"%4d ", ids[i]);
192 if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
193 printf(
" (%s)", __kmp_hw_get_core_type_string(core_type));
203 void kmp_topology_t::_insert_layer(kmp_hw_t type,
const int *ids) {
207 int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
208 int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
212 for (target_layer = 0; target_layer < depth; ++target_layer) {
213 bool layers_equal =
true;
214 bool strictly_above_target_layer =
false;
215 for (
int i = 0; i < num_hw_threads; ++i) {
216 int id = hw_threads[i].ids[target_layer];
218 if (
id != previous_id && new_id == previous_new_id) {
220 strictly_above_target_layer =
true;
221 layers_equal =
false;
223 }
else if (
id == previous_id && new_id != previous_new_id) {
225 layers_equal =
false;
229 previous_new_id = new_id;
231 if (strictly_above_target_layer || layers_equal)
237 for (
int i = depth - 1, j = depth; i >= target_layer; --i, --j)
239 types[target_layer] = type;
240 for (
int k = 0; k < num_hw_threads; ++k) {
241 for (
int i = depth - 1, j = depth; i >= target_layer; --i, --j)
242 hw_threads[k].ids[j] = hw_threads[k].ids[i];
243 hw_threads[k].ids[target_layer] = ids[k];
245 equivalent[type] = type;
249 #if KMP_GROUP_AFFINITY
251 void kmp_topology_t::_insert_windows_proc_groups() {
253 if (__kmp_num_proc_groups == 1)
255 kmp_affin_mask_t *mask;
256 int *ids = (
int *)__kmp_allocate(
sizeof(
int) * num_hw_threads);
258 for (
int i = 0; i < num_hw_threads; ++i) {
260 KMP_CPU_SET(hw_threads[i].os_id, mask);
261 ids[i] = __kmp_get_proc_group(mask);
264 _insert_layer(KMP_HW_PROC_GROUP, ids);
271 void kmp_topology_t::_remove_radix1_layers() {
272 int preference[KMP_HW_LAST];
273 int top_index1, top_index2;
275 preference[KMP_HW_SOCKET] = 110;
276 preference[KMP_HW_PROC_GROUP] = 100;
277 preference[KMP_HW_CORE] = 95;
278 preference[KMP_HW_THREAD] = 90;
279 preference[KMP_HW_NUMA] = 85;
280 preference[KMP_HW_DIE] = 80;
281 preference[KMP_HW_TILE] = 75;
282 preference[KMP_HW_MODULE] = 73;
283 preference[KMP_HW_L3] = 70;
284 preference[KMP_HW_L2] = 65;
285 preference[KMP_HW_L1] = 60;
286 preference[KMP_HW_LLC] = 5;
289 while (top_index1 < depth - 1 && top_index2 < depth) {
290 kmp_hw_t type1 = types[top_index1];
291 kmp_hw_t type2 = types[top_index2];
292 KMP_ASSERT_VALID_HW_TYPE(type1);
293 KMP_ASSERT_VALID_HW_TYPE(type2);
296 if ((type1 == KMP_HW_THREAD || type1 == KMP_HW_CORE ||
297 type1 == KMP_HW_SOCKET) &&
298 (type2 == KMP_HW_THREAD || type2 == KMP_HW_CORE ||
299 type2 == KMP_HW_SOCKET)) {
300 top_index1 = top_index2++;
304 bool all_same =
true;
305 int id1 = hw_threads[0].ids[top_index1];
306 int id2 = hw_threads[0].ids[top_index2];
307 int pref1 = preference[type1];
308 int pref2 = preference[type2];
309 for (
int hwidx = 1; hwidx < num_hw_threads; ++hwidx) {
310 if (hw_threads[hwidx].ids[top_index1] == id1 &&
311 hw_threads[hwidx].ids[top_index2] != id2) {
315 if (hw_threads[hwidx].ids[top_index2] != id2)
317 id1 = hw_threads[hwidx].ids[top_index1];
318 id2 = hw_threads[hwidx].ids[top_index2];
322 kmp_hw_t remove_type, keep_type;
323 int remove_layer, remove_layer_ids;
326 remove_layer = remove_layer_ids = top_index2;
330 remove_layer = remove_layer_ids = top_index1;
336 remove_layer_ids = top_index2;
339 set_equivalent_type(remove_type, keep_type);
340 for (
int idx = 0; idx < num_hw_threads; ++idx) {
341 kmp_hw_thread_t &hw_thread = hw_threads[idx];
342 for (
int d = remove_layer_ids; d < depth - 1; ++d)
343 hw_thread.ids[d] = hw_thread.ids[d + 1];
345 for (
int idx = remove_layer; idx < depth - 1; ++idx)
346 types[idx] = types[idx + 1];
349 top_index1 = top_index2++;
352 KMP_ASSERT(depth > 0);
355 void kmp_topology_t::_set_last_level_cache() {
356 if (get_equivalent_type(KMP_HW_L3) != KMP_HW_UNKNOWN)
357 set_equivalent_type(KMP_HW_LLC, KMP_HW_L3);
358 else if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
359 set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
360 #if KMP_MIC_SUPPORTED
361 else if (__kmp_mic_type == mic3) {
362 if (get_equivalent_type(KMP_HW_L2) != KMP_HW_UNKNOWN)
363 set_equivalent_type(KMP_HW_LLC, KMP_HW_L2);
364 else if (get_equivalent_type(KMP_HW_TILE) != KMP_HW_UNKNOWN)
365 set_equivalent_type(KMP_HW_LLC, KMP_HW_TILE);
368 set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
371 else if (get_equivalent_type(KMP_HW_L1) != KMP_HW_UNKNOWN)
372 set_equivalent_type(KMP_HW_LLC, KMP_HW_L1);
374 if (get_equivalent_type(KMP_HW_LLC) == KMP_HW_UNKNOWN) {
375 if (get_equivalent_type(KMP_HW_SOCKET) != KMP_HW_UNKNOWN)
376 set_equivalent_type(KMP_HW_LLC, KMP_HW_SOCKET);
377 else if (get_equivalent_type(KMP_HW_CORE) != KMP_HW_UNKNOWN)
378 set_equivalent_type(KMP_HW_LLC, KMP_HW_CORE);
380 KMP_ASSERT(get_equivalent_type(KMP_HW_LLC) != KMP_HW_UNKNOWN);
384 void kmp_topology_t::_gather_enumeration_information() {
385 int previous_id[KMP_HW_LAST];
386 int max[KMP_HW_LAST];
388 for (
int i = 0; i < depth; ++i) {
389 previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
394 if (__kmp_is_hybrid_cpu()) {
395 for (
int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
396 core_types_count[i] = 0;
397 core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
400 int core_level = get_level(KMP_HW_CORE);
401 for (
int i = 0; i < num_hw_threads; ++i) {
402 kmp_hw_thread_t &hw_thread = hw_threads[i];
403 for (
int layer = 0; layer < depth; ++layer) {
404 int id = hw_thread.ids[layer];
405 if (
id != previous_id[layer]) {
407 for (
int l = layer; l < depth; ++l)
411 for (
int l = layer + 1; l < depth; ++l) {
412 if (max[l] > ratio[l])
417 if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level)
418 _increment_core_type(hw_thread.core_type);
422 for (
int layer = 0; layer < depth; ++layer) {
423 previous_id[layer] = hw_thread.ids[layer];
426 for (
int layer = 0; layer < depth; ++layer) {
427 if (max[layer] > ratio[layer])
428 ratio[layer] = max[layer];
433 void kmp_topology_t::_discover_uniformity() {
435 for (
int level = 0; level < depth; ++level)
437 flags.uniform = (num == count[depth - 1]);
441 void kmp_topology_t::_set_sub_ids() {
442 int previous_id[KMP_HW_LAST];
443 int sub_id[KMP_HW_LAST];
445 for (
int i = 0; i < depth; ++i) {
449 for (
int i = 0; i < num_hw_threads; ++i) {
450 kmp_hw_thread_t &hw_thread = hw_threads[i];
452 for (
int j = 0; j < depth; ++j) {
453 if (hw_thread.ids[j] != previous_id[j]) {
455 for (
int k = j + 1; k < depth; ++k) {
462 for (
int j = 0; j < depth; ++j) {
463 previous_id[j] = hw_thread.ids[j];
466 for (
int j = 0; j < depth; ++j) {
467 hw_thread.sub_ids[j] = sub_id[j];
472 void kmp_topology_t::_set_globals() {
474 int core_level, thread_level, package_level;
475 package_level = get_level(KMP_HW_SOCKET);
476 #if KMP_GROUP_AFFINITY
477 if (package_level == -1)
478 package_level = get_level(KMP_HW_PROC_GROUP);
480 core_level = get_level(KMP_HW_CORE);
481 thread_level = get_level(KMP_HW_THREAD);
483 KMP_ASSERT(core_level != -1);
484 KMP_ASSERT(thread_level != -1);
486 __kmp_nThreadsPerCore = calculate_ratio(thread_level, core_level);
487 if (package_level != -1) {
488 nCoresPerPkg = calculate_ratio(core_level, package_level);
489 nPackages = get_count(package_level);
492 nCoresPerPkg = get_count(core_level);
495 #ifndef KMP_DFLT_NTH_CORES
496 __kmp_ncores = get_count(core_level);
500 kmp_topology_t *kmp_topology_t::allocate(
int nproc,
int ndepth,
501 const kmp_hw_t *types) {
502 kmp_topology_t *retval;
504 size_t size =
sizeof(kmp_topology_t) +
sizeof(kmp_hw_thread_t) * nproc +
505 sizeof(int) * (
size_t)KMP_HW_LAST * 3;
506 char *bytes = (
char *)__kmp_allocate(size);
507 retval = (kmp_topology_t *)bytes;
509 retval->hw_threads = (kmp_hw_thread_t *)(bytes +
sizeof(kmp_topology_t));
511 retval->hw_threads =
nullptr;
513 retval->num_hw_threads = nproc;
514 retval->depth = ndepth;
516 (
int *)(bytes +
sizeof(kmp_topology_t) +
sizeof(kmp_hw_thread_t) * nproc);
517 retval->types = (kmp_hw_t *)arr;
518 retval->ratio = arr + (size_t)KMP_HW_LAST;
519 retval->count = arr + 2 * (size_t)KMP_HW_LAST;
520 KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
521 for (
int i = 0; i < ndepth; ++i) {
522 retval->types[i] = types[i];
523 retval->equivalent[types[i]] = types[i];
528 void kmp_topology_t::deallocate(kmp_topology_t *topology) {
530 __kmp_free(topology);
533 bool kmp_topology_t::check_ids()
const {
535 if (num_hw_threads == 0)
537 for (
int i = 1; i < num_hw_threads; ++i) {
538 kmp_hw_thread_t ¤t_thread = hw_threads[i];
539 kmp_hw_thread_t &previous_thread = hw_threads[i - 1];
541 for (
int j = 0; j < depth; ++j) {
542 if (previous_thread.ids[j] != current_thread.ids[j]) {
554 void kmp_topology_t::dump()
const {
555 printf(
"***********************\n");
556 printf(
"*** __kmp_topology: ***\n");
557 printf(
"***********************\n");
558 printf(
"* depth: %d\n", depth);
561 for (
int i = 0; i < depth; ++i)
562 printf(
"%15s ", __kmp_hw_get_keyword(types[i]));
566 for (
int i = 0; i < depth; ++i) {
567 printf(
"%15d ", ratio[i]);
572 for (
int i = 0; i < depth; ++i) {
573 printf(
"%15d ", count[i]);
577 printf(
"* core_types:\n");
578 for (
int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
579 if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
580 printf(
" %d %s core%c\n", core_types_count[i],
581 __kmp_hw_get_core_type_string(core_types[i]),
582 ((core_types_count[i] > 1) ?
's' :
' '));
585 printf(
"No hybrid information available\n");
590 printf(
"* equivalent map:\n");
591 KMP_FOREACH_HW_TYPE(i) {
592 const char *key = __kmp_hw_get_keyword(i);
593 const char *value = __kmp_hw_get_keyword(equivalent[i]);
594 printf(
"%-15s -> %-15s\n", key, value);
597 printf(
"* uniform: %s\n", (is_uniform() ?
"Yes" :
"No"));
599 printf(
"* num_hw_threads: %d\n", num_hw_threads);
600 printf(
"* hw_threads:\n");
601 for (
int i = 0; i < num_hw_threads; ++i) {
602 hw_threads[i].print();
604 printf(
"***********************\n");
607 void kmp_topology_t::print(
const char *env_var)
const {
609 int print_types_depth;
610 __kmp_str_buf_init(&buf);
611 kmp_hw_t print_types[KMP_HW_LAST + 2];
614 KMP_INFORM(AvailableOSProc, env_var, num_hw_threads);
618 KMP_INFORM(Uniform, env_var);
620 KMP_INFORM(NonUniform, env_var);
624 KMP_FOREACH_HW_TYPE(type) {
625 kmp_hw_t eq_type = equivalent[type];
626 if (eq_type != KMP_HW_UNKNOWN && eq_type != type) {
627 KMP_INFORM(AffEqualTopologyTypes, env_var,
628 __kmp_hw_get_catalog_string(type),
629 __kmp_hw_get_catalog_string(eq_type));
634 KMP_ASSERT(depth > 0 && depth <= (
int)KMP_HW_LAST);
637 print_types_depth = 0;
638 for (
int level = 0; level < depth; ++level)
639 print_types[print_types_depth++] = types[level];
640 if (equivalent[KMP_HW_CORE] != KMP_HW_CORE) {
642 if (print_types[print_types_depth - 1] == KMP_HW_THREAD) {
645 print_types[print_types_depth - 1] = KMP_HW_CORE;
646 print_types[print_types_depth++] = KMP_HW_THREAD;
648 print_types[print_types_depth++] = KMP_HW_CORE;
652 if (equivalent[KMP_HW_THREAD] != KMP_HW_THREAD)
653 print_types[print_types_depth++] = KMP_HW_THREAD;
655 __kmp_str_buf_clear(&buf);
656 kmp_hw_t numerator_type;
657 kmp_hw_t denominator_type = KMP_HW_UNKNOWN;
658 int core_level = get_level(KMP_HW_CORE);
659 int ncores = get_count(core_level);
661 for (
int plevel = 0, level = 0; plevel < print_types_depth; ++plevel) {
664 numerator_type = print_types[plevel];
665 KMP_ASSERT_VALID_HW_TYPE(numerator_type);
666 if (equivalent[numerator_type] != numerator_type)
669 c = get_ratio(level++);
672 __kmp_str_buf_print(&buf,
"%d %s", c,
673 __kmp_hw_get_catalog_string(numerator_type, plural));
675 __kmp_str_buf_print(&buf,
" x %d %s/%s", c,
676 __kmp_hw_get_catalog_string(numerator_type, plural),
677 __kmp_hw_get_catalog_string(denominator_type));
679 denominator_type = numerator_type;
681 KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
683 if (__kmp_is_hybrid_cpu()) {
684 for (
int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
685 if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
687 KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
688 __kmp_hw_get_core_type_string(core_types[i]));
692 if (num_hw_threads <= 0) {
693 __kmp_str_buf_free(&buf);
698 KMP_INFORM(OSProcToPhysicalThreadMap, env_var);
699 for (
int i = 0; i < num_hw_threads; i++) {
700 __kmp_str_buf_clear(&buf);
701 for (
int level = 0; level < depth; ++level) {
702 kmp_hw_t type = types[level];
703 __kmp_str_buf_print(&buf,
"%s ", __kmp_hw_get_catalog_string(type));
704 __kmp_str_buf_print(&buf,
"%d ", hw_threads[i].ids[level]);
706 if (__kmp_is_hybrid_cpu())
708 &buf,
"(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
709 KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
712 __kmp_str_buf_free(&buf);
715 void kmp_topology_t::canonicalize() {
716 #if KMP_GROUP_AFFINITY
717 _insert_windows_proc_groups();
719 _remove_radix1_layers();
720 _gather_enumeration_information();
721 _discover_uniformity();
724 _set_last_level_cache();
726 #if KMP_MIC_SUPPORTED
728 if (__kmp_mic_type == mic3) {
729 if (get_level(KMP_HW_L2) != -1)
730 set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
731 else if (get_level(KMP_HW_TILE) != -1)
732 set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
737 KMP_ASSERT(depth > 0);
738 for (
int level = 0; level < depth; ++level) {
740 KMP_ASSERT(count[level] > 0 && ratio[level] > 0);
741 KMP_ASSERT_VALID_HW_TYPE(types[level]);
743 KMP_ASSERT(equivalent[types[level]] == types[level]);
746 #if KMP_AFFINITY_SUPPORTED
748 if (__kmp_affinity_gran_levels < 0) {
749 kmp_hw_t gran_type = get_equivalent_type(__kmp_affinity_gran);
751 if (gran_type == KMP_HW_UNKNOWN) {
753 kmp_hw_t gran_types[3] = {KMP_HW_CORE, KMP_HW_THREAD, KMP_HW_SOCKET};
754 for (
auto g : gran_types) {
755 if (__kmp_topology->get_equivalent_type(g) != KMP_HW_UNKNOWN) {
760 KMP_ASSERT(gran_type != KMP_HW_UNKNOWN);
762 KMP_WARNING(AffGranularityBad,
"KMP_AFFINITY",
763 __kmp_hw_get_catalog_string(__kmp_affinity_gran),
764 __kmp_hw_get_catalog_string(gran_type));
765 __kmp_affinity_gran = gran_type;
767 #if KMP_GROUP_AFFINITY
775 if (__kmp_num_proc_groups > 1) {
776 int gran_depth = __kmp_topology->get_level(gran_type);
777 int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP);
778 if (gran_depth >= 0 && proc_group_depth >= 0 &&
779 gran_depth < proc_group_depth) {
780 KMP_WARNING(AffGranTooCoarseProcGroup,
"KMP_AFFINITY",
781 __kmp_hw_get_catalog_string(__kmp_affinity_gran));
782 __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP;
786 __kmp_affinity_gran_levels = 0;
787 for (
int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
788 __kmp_affinity_gran_levels++;
794 void kmp_topology_t::canonicalize(
int npackages,
int ncores_per_pkg,
795 int nthreads_per_core,
int ncores) {
798 KMP_FOREACH_HW_TYPE(i) { equivalent[i] = KMP_HW_UNKNOWN; }
799 for (
int level = 0; level < depth; ++level) {
803 count[0] = npackages;
805 count[2] = __kmp_xproc;
806 ratio[0] = npackages;
807 ratio[1] = ncores_per_pkg;
808 ratio[2] = nthreads_per_core;
809 equivalent[KMP_HW_SOCKET] = KMP_HW_SOCKET;
810 equivalent[KMP_HW_CORE] = KMP_HW_CORE;
811 equivalent[KMP_HW_THREAD] = KMP_HW_THREAD;
812 types[0] = KMP_HW_SOCKET;
813 types[1] = KMP_HW_CORE;
814 types[2] = KMP_HW_THREAD;
816 _discover_uniformity();
822 bool kmp_topology_t::filter_hw_subset() {
824 if (!__kmp_hw_subset)
828 __kmp_hw_subset->sort();
831 int hw_subset_depth = __kmp_hw_subset->get_depth();
832 kmp_hw_t specified[KMP_HW_LAST];
833 KMP_ASSERT(hw_subset_depth > 0);
834 KMP_FOREACH_HW_TYPE(i) { specified[i] = KMP_HW_UNKNOWN; }
835 for (
int i = 0; i < hw_subset_depth; ++i) {
837 int num = __kmp_hw_subset->at(i).num;
838 int offset = __kmp_hw_subset->at(i).offset;
839 kmp_hw_t type = __kmp_hw_subset->at(i).type;
840 kmp_hw_t equivalent_type = equivalent[type];
841 int level = get_level(type);
844 if (equivalent_type != KMP_HW_UNKNOWN) {
845 __kmp_hw_subset->at(i).type = equivalent_type;
847 KMP_WARNING(AffHWSubsetNotExistGeneric,
848 __kmp_hw_get_catalog_string(type));
854 if (specified[equivalent_type] != KMP_HW_UNKNOWN) {
855 KMP_WARNING(AffHWSubsetEqvLayers, __kmp_hw_get_catalog_string(type),
856 __kmp_hw_get_catalog_string(specified[equivalent_type]));
859 specified[equivalent_type] = type;
862 max_count = get_ratio(level);
863 if (max_count < 0 || num + offset > max_count) {
864 bool plural = (num > 1);
865 KMP_WARNING(AffHWSubsetManyGeneric,
866 __kmp_hw_get_catalog_string(type, plural));
873 for (
int i = 0; i < num_hw_threads; ++i) {
874 kmp_hw_thread_t &hw_thread = hw_threads[i];
876 bool should_be_filtered =
false;
877 for (
int level = 0, hw_subset_index = 0;
878 level < depth && hw_subset_index < hw_subset_depth; ++level) {
879 kmp_hw_t topology_type = types[level];
880 auto hw_subset_item = __kmp_hw_subset->at(hw_subset_index);
881 kmp_hw_t hw_subset_type = hw_subset_item.type;
882 if (topology_type != hw_subset_type)
884 int num = hw_subset_item.num;
885 int offset = hw_subset_item.offset;
887 if (hw_thread.sub_ids[level] < offset ||
888 hw_thread.sub_ids[level] >= offset + num) {
889 should_be_filtered =
true;
893 if (!should_be_filtered) {
895 hw_threads[new_index] = hw_thread;
898 #if KMP_AFFINITY_SUPPORTED
899 KMP_CPU_CLR(hw_thread.os_id, __kmp_affin_fullMask);
904 KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
905 num_hw_threads = new_index;
908 _gather_enumeration_information();
909 _discover_uniformity();
911 _set_last_level_cache();
915 bool kmp_topology_t::is_close(
int hwt1,
int hwt2,
int hw_level)
const {
916 if (hw_level >= depth)
919 const kmp_hw_thread_t &t1 = hw_threads[hwt1];
920 const kmp_hw_thread_t &t2 = hw_threads[hwt2];
921 for (
int i = 0; i < (depth - hw_level); ++i) {
922 if (t1.ids[i] != t2.ids[i])
930 #if KMP_AFFINITY_SUPPORTED
931 class kmp_affinity_raii_t {
932 kmp_affin_mask_t *mask;
936 kmp_affinity_raii_t() : restored(false) {
938 KMP_ASSERT(mask != NULL);
939 __kmp_get_system_affinity(mask, TRUE);
942 __kmp_set_system_affinity(mask, TRUE);
946 ~kmp_affinity_raii_t() {
948 __kmp_set_system_affinity(mask, TRUE);
954 bool KMPAffinity::picked_api =
false;
956 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
957 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
958 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
959 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
960 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
961 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
963 void KMPAffinity::pick_api() {
964 KMPAffinity *affinity_dispatch;
970 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
971 __kmp_affinity_type != affinity_disabled) {
972 affinity_dispatch =
new KMPHwlocAffinity();
976 affinity_dispatch =
new KMPNativeAffinity();
978 __kmp_affinity_dispatch = affinity_dispatch;
982 void KMPAffinity::destroy_api() {
983 if (__kmp_affinity_dispatch != NULL) {
984 delete __kmp_affinity_dispatch;
985 __kmp_affinity_dispatch = NULL;
990 #define KMP_ADVANCE_SCAN(scan) \
991 while (*scan != '\0') { \
999 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
1000 kmp_affin_mask_t *mask) {
1001 int start = 0, finish = 0, previous = 0;
1004 KMP_ASSERT(buf_len >= 40);
1007 char *end = buf + buf_len - 1;
1010 if (mask->begin() == mask->end()) {
1011 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
1012 KMP_ADVANCE_SCAN(scan);
1013 KMP_ASSERT(scan <= end);
1018 start = mask->begin();
1022 for (finish = mask->next(start), previous = start;
1023 finish == previous + 1 && finish != mask->end();
1024 finish = mask->next(finish)) {
1031 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
1032 KMP_ADVANCE_SCAN(scan);
1034 first_range =
false;
1037 if (previous - start > 1) {
1038 KMP_SNPRINTF(scan, end - scan + 1,
"%u-%u", start, previous);
1041 KMP_SNPRINTF(scan, end - scan + 1,
"%u", start);
1042 KMP_ADVANCE_SCAN(scan);
1043 if (previous - start > 0) {
1044 KMP_SNPRINTF(scan, end - scan + 1,
",%u", previous);
1047 KMP_ADVANCE_SCAN(scan);
1050 if (start == mask->end())
1058 KMP_ASSERT(scan <= end);
1061 #undef KMP_ADVANCE_SCAN
1067 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
1068 kmp_affin_mask_t *mask) {
1069 int start = 0, finish = 0, previous = 0;
1074 __kmp_str_buf_clear(buf);
1077 if (mask->begin() == mask->end()) {
1078 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
1083 start = mask->begin();
1087 for (finish = mask->next(start), previous = start;
1088 finish == previous + 1 && finish != mask->end();
1089 finish = mask->next(finish)) {
1096 __kmp_str_buf_print(buf,
"%s",
",");
1098 first_range =
false;
1101 if (previous - start > 1) {
1102 __kmp_str_buf_print(buf,
"%u-%u", start, previous);
1105 __kmp_str_buf_print(buf,
"%u", start);
1106 if (previous - start > 0) {
1107 __kmp_str_buf_print(buf,
",%u", previous);
1112 if (start == mask->end())
1120 kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
1121 kmp_affin_mask_t *offline;
1122 KMP_CPU_ALLOC(offline);
1123 KMP_CPU_ZERO(offline);
1125 int n, begin_cpu, end_cpu;
1127 auto skip_ws = [](FILE *f) {
1131 }
while (isspace(c));
1137 int status = offline_file.
try_open(
"/sys/devices/system/cpu/offline",
"r");
1140 while (!feof(offline_file)) {
1141 skip_ws(offline_file);
1142 n = fscanf(offline_file,
"%d", &begin_cpu);
1145 skip_ws(offline_file);
1146 int c = fgetc(offline_file);
1147 if (c == EOF || c ==
',') {
1149 end_cpu = begin_cpu;
1150 }
else if (c ==
'-') {
1152 skip_ws(offline_file);
1153 n = fscanf(offline_file,
"%d", &end_cpu);
1156 skip_ws(offline_file);
1157 c = fgetc(offline_file);
1163 if (begin_cpu < 0 || begin_cpu >= __kmp_xproc || end_cpu < 0 ||
1164 end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
1168 for (
int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
1169 KMP_CPU_SET(cpu, offline);
1177 int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
1181 #if KMP_GROUP_AFFINITY
1183 if (__kmp_num_proc_groups > 1) {
1185 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
1186 for (group = 0; group < __kmp_num_proc_groups; group++) {
1188 int num = __kmp_GetActiveProcessorCount(group);
1189 for (i = 0; i < num; i++) {
1190 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
1200 kmp_affin_mask_t *offline_cpus = __kmp_affinity_get_offline_cpus();
1201 for (proc = 0; proc < __kmp_xproc; proc++) {
1203 if (KMP_CPU_ISSET(proc, offline_cpus))
1205 KMP_CPU_SET(proc, mask);
1208 KMP_CPU_FREE(offline_cpus);
1217 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
1220 static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
1221 #if HWLOC_API_VERSION >= 0x00020000
1222 return hwloc_obj_type_is_cache(obj->type);
1224 return obj->type == HWLOC_OBJ_CACHE;
1229 static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
1231 if (__kmp_hwloc_is_cache_type(obj)) {
1232 if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
1233 return KMP_HW_UNKNOWN;
1234 switch (obj->attr->cache.depth) {
1238 #if KMP_MIC_SUPPORTED
1239 if (__kmp_mic_type == mic3) {
1247 return KMP_HW_UNKNOWN;
1250 switch (obj->type) {
1251 case HWLOC_OBJ_PACKAGE:
1252 return KMP_HW_SOCKET;
1253 case HWLOC_OBJ_NUMANODE:
1255 case HWLOC_OBJ_CORE:
1258 return KMP_HW_THREAD;
1259 case HWLOC_OBJ_GROUP:
1260 if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
1262 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_TILE)
1264 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_MODULE)
1265 return KMP_HW_MODULE;
1266 else if (obj->attr->group.kind == HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP)
1267 return KMP_HW_PROC_GROUP;
1268 return KMP_HW_UNKNOWN;
1269 #if HWLOC_API_VERSION >= 0x00020100
1274 return KMP_HW_UNKNOWN;
1281 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
1282 hwloc_obj_type_t type) {
1285 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
1286 obj->logical_index, type, 0);
1287 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology,
1288 obj->type, first) == obj;
1289 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
1298 static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
1299 hwloc_obj_t lower) {
1301 hwloc_obj_type_t ltype = lower->type;
1302 int lindex = lower->logical_index - 1;
1305 obj = hwloc_get_obj_by_type(t, ltype, lindex);
1306 while (obj && lindex >= 0 &&
1307 hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
1308 if (obj->userdata) {
1309 sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
1314 obj = hwloc_get_obj_by_type(t, ltype, lindex);
1317 lower->userdata = RCAST(
void *, sub_id + 1);
1321 static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *
const msg_id) {
1323 int hw_thread_index, sub_id;
1325 hwloc_obj_t pu, obj, root, prev;
1326 kmp_hw_t types[KMP_HW_LAST];
1327 hwloc_obj_type_t hwloc_types[KMP_HW_LAST];
1329 hwloc_topology_t tp = __kmp_hwloc_topology;
1330 *msg_id = kmp_i18n_null;
1331 if (__kmp_affinity_verbose) {
1332 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
1335 if (!KMP_AFFINITY_CAPABLE()) {
1338 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1340 hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
1342 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
1345 o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
1347 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
1349 __kmp_nThreadsPerCore = 1;
1350 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1351 if (nCoresPerPkg == 0)
1353 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1358 int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
1360 typedef struct kmp_hwloc_cpukinds_info_t {
1362 kmp_hw_core_type_t core_type;
1363 hwloc_bitmap_t mask;
1364 } kmp_hwloc_cpukinds_info_t;
1365 kmp_hwloc_cpukinds_info_t *cpukinds =
nullptr;
1367 if (nr_cpu_kinds > 0) {
1369 struct hwloc_info_s *infos;
1370 cpukinds = (kmp_hwloc_cpukinds_info_t *)__kmp_allocate(
1371 sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds);
1372 for (
unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
1373 cpukinds[idx].efficiency = -1;
1374 cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
1375 cpukinds[idx].mask = hwloc_bitmap_alloc();
1376 if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
1377 &cpukinds[idx].efficiency, &nr_infos, &infos,
1379 for (
unsigned i = 0; i < nr_infos; ++i) {
1380 if (__kmp_str_match(
"CoreType", 8, infos[i].name)) {
1381 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1382 if (__kmp_str_match(
"IntelAtom", 9, infos[i].value)) {
1383 cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
1385 }
else if (__kmp_str_match(
"IntelCore", 9, infos[i].value)) {
1386 cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
1396 root = hwloc_get_root_obj(tp);
1400 pu = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
1403 types[depth] = KMP_HW_THREAD;
1404 hwloc_types[depth] = obj->type;
1406 while (obj != root && obj != NULL) {
1408 #if HWLOC_API_VERSION >= 0x00020000
1409 if (obj->memory_arity) {
1411 for (memory = obj->memory_first_child; memory;
1412 memory = hwloc_get_next_child(tp, obj, memory)) {
1413 if (memory->type == HWLOC_OBJ_NUMANODE)
1416 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1417 types[depth] = KMP_HW_NUMA;
1418 hwloc_types[depth] = memory->type;
1423 type = __kmp_hwloc_type_2_topology_type(obj);
1424 if (type != KMP_HW_UNKNOWN) {
1425 types[depth] = type;
1426 hwloc_types[depth] = obj->type;
1430 KMP_ASSERT(depth > 0);
1433 for (
int i = 0, j = depth - 1; i < j; ++i, --j) {
1434 hwloc_obj_type_t hwloc_temp = hwloc_types[i];
1435 kmp_hw_t temp = types[i];
1436 types[i] = types[j];
1438 hwloc_types[i] = hwloc_types[j];
1439 hwloc_types[j] = hwloc_temp;
1443 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1445 hw_thread_index = 0;
1447 while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
1448 int index = depth - 1;
1449 bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
1450 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
1453 hw_thread.ids[index] = pu->logical_index;
1454 hw_thread.os_id = pu->os_index;
1457 int cpukind_index = -1;
1458 for (
int i = 0; i < nr_cpu_kinds; ++i) {
1459 if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
1464 if (cpukind_index >= 0)
1465 hw_thread.core_type = cpukinds[cpukind_index].core_type;
1471 while (obj != root && obj != NULL) {
1473 #if HWLOC_API_VERSION >= 0x00020000
1477 if (obj->memory_arity) {
1479 for (memory = obj->memory_first_child; memory;
1480 memory = hwloc_get_next_child(tp, obj, memory)) {
1481 if (memory->type == HWLOC_OBJ_NUMANODE)
1484 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
1485 sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
1487 hw_thread.ids[index] = memory->logical_index;
1488 hw_thread.ids[index + 1] = sub_id;
1496 type = __kmp_hwloc_type_2_topology_type(obj);
1497 if (type != KMP_HW_UNKNOWN) {
1498 sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
1500 hw_thread.ids[index] = obj->logical_index;
1501 hw_thread.ids[index + 1] = sub_id;
1513 for (
int idx = 0; idx < nr_cpu_kinds; ++idx)
1514 hwloc_bitmap_free(cpukinds[idx].mask);
1515 __kmp_free(cpukinds);
1517 __kmp_topology->sort_ids();
1525 static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *
const msg_id) {
1526 *msg_id = kmp_i18n_null;
1528 kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1530 if (__kmp_affinity_verbose) {
1531 KMP_INFORM(UsingFlatOS,
"KMP_AFFINITY");
1537 if (!KMP_AFFINITY_CAPABLE()) {
1538 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1539 __kmp_ncores = nPackages = __kmp_xproc;
1540 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1548 __kmp_ncores = nPackages = __kmp_avail_proc;
1549 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1552 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1555 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1557 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1560 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct);
1562 hw_thread.os_id = i;
1563 hw_thread.ids[0] = i;
1564 hw_thread.ids[1] = 0;
1565 hw_thread.ids[2] = 0;
1568 if (__kmp_affinity_verbose) {
1569 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
1574 #if KMP_GROUP_AFFINITY
1579 static bool __kmp_affinity_create_proc_group_map(kmp_i18n_id_t *
const msg_id) {
1580 *msg_id = kmp_i18n_null;
1582 kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD};
1583 const static size_t BITS_PER_GROUP = CHAR_BIT *
sizeof(DWORD_PTR);
1585 if (__kmp_affinity_verbose) {
1586 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
1590 if (!KMP_AFFINITY_CAPABLE()) {
1591 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1592 nPackages = __kmp_num_proc_groups;
1593 __kmp_nThreadsPerCore = 1;
1594 __kmp_ncores = __kmp_xproc;
1595 nCoresPerPkg = nPackages / __kmp_ncores;
1600 __kmp_topology = kmp_topology_t::allocate(__kmp_avail_proc, depth, types);
1603 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1605 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1608 kmp_hw_thread_t &hw_thread = __kmp_topology->at(avail_ct++);
1610 hw_thread.os_id = i;
1611 hw_thread.ids[0] = i / BITS_PER_GROUP;
1612 hw_thread.ids[1] = hw_thread.ids[2] = i % BITS_PER_GROUP;
1618 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1620 template <kmp_u
int32 LSB, kmp_u
int32 MSB>
1621 static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
1622 const kmp_uint32 SHIFT_LEFT =
sizeof(kmp_uint32) * 8 - 1 - MSB;
1623 const kmp_uint32 SHIFT_RIGHT = LSB;
1624 kmp_uint32 retval = v;
1625 retval <<= SHIFT_LEFT;
1626 retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
1630 static int __kmp_cpuid_mask_width(
int count) {
1633 while ((1 << r) < count)
1638 class apicThreadInfo {
1642 unsigned maxCoresPerPkg;
1643 unsigned maxThreadsPerPkg;
1649 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1651 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1652 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1653 if (aa->pkgId < bb->pkgId)
1655 if (aa->pkgId > bb->pkgId)
1657 if (aa->coreId < bb->coreId)
1659 if (aa->coreId > bb->coreId)
1661 if (aa->threadId < bb->threadId)
1663 if (aa->threadId > bb->threadId)
1668 class kmp_cache_info_t {
1671 unsigned level, mask;
1673 kmp_cache_info_t() : depth(0) { get_leaf4_levels(); }
1674 size_t get_depth()
const {
return depth; }
1675 info_t &operator[](
size_t index) {
return table[index]; }
1676 const info_t &operator[](
size_t index)
const {
return table[index]; }
1678 static kmp_hw_t get_topology_type(
unsigned level) {
1679 KMP_DEBUG_ASSERT(level >= 1 && level <= MAX_CACHE_LEVEL);
1688 return KMP_HW_UNKNOWN;
1692 static const int MAX_CACHE_LEVEL = 3;
1695 info_t table[MAX_CACHE_LEVEL];
1697 void get_leaf4_levels() {
1699 while (depth < MAX_CACHE_LEVEL) {
1700 unsigned cache_type, max_threads_sharing;
1701 unsigned cache_level, cache_mask_width;
1703 __kmp_x86_cpuid(4, level, &buf2);
1704 cache_type = __kmp_extract_bits<0, 4>(buf2.eax);
1708 if (cache_type == 2) {
1712 max_threads_sharing = __kmp_extract_bits<14, 25>(buf2.eax) + 1;
1713 cache_mask_width = __kmp_cpuid_mask_width(max_threads_sharing);
1714 cache_level = __kmp_extract_bits<5, 7>(buf2.eax);
1715 table[depth].level = cache_level;
1716 table[depth].mask = ((-1) << cache_mask_width);
1727 static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *
const msg_id) {
1729 *msg_id = kmp_i18n_null;
1731 if (__kmp_affinity_verbose) {
1732 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
1736 __kmp_x86_cpuid(0, 0, &buf);
1738 *msg_id = kmp_i18n_str_NoLeaf4Support;
1747 if (!KMP_AFFINITY_CAPABLE()) {
1750 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1756 __kmp_x86_cpuid(1, 0, &buf);
1757 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1758 if (maxThreadsPerPkg == 0) {
1759 maxThreadsPerPkg = 1;
1773 __kmp_x86_cpuid(0, 0, &buf);
1775 __kmp_x86_cpuid(4, 0, &buf);
1776 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1794 __kmp_ncores = __kmp_xproc;
1795 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1796 __kmp_nThreadsPerCore = 1;
1805 kmp_affinity_raii_t previous_affinity;
1833 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1834 __kmp_avail_proc *
sizeof(apicThreadInfo));
1835 unsigned nApics = 0;
1836 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1838 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1841 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1843 __kmp_affinity_dispatch->bind_thread(i);
1844 threadInfo[nApics].osId = i;
1847 __kmp_x86_cpuid(1, 0, &buf);
1848 if (((buf.edx >> 9) & 1) == 0) {
1849 __kmp_free(threadInfo);
1850 *msg_id = kmp_i18n_str_ApicNotPresent;
1853 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1854 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1855 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1856 threadInfo[nApics].maxThreadsPerPkg = 1;
1865 __kmp_x86_cpuid(0, 0, &buf);
1867 __kmp_x86_cpuid(4, 0, &buf);
1868 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1870 threadInfo[nApics].maxCoresPerPkg = 1;
1874 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1875 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1877 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1878 int widthT = widthCT - widthC;
1883 __kmp_free(threadInfo);
1884 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1888 int maskC = (1 << widthC) - 1;
1889 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1891 int maskT = (1 << widthT) - 1;
1892 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1899 previous_affinity.restore();
1902 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1903 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1920 __kmp_nThreadsPerCore = 1;
1921 unsigned nCores = 1;
1924 unsigned lastPkgId = threadInfo[0].pkgId;
1925 unsigned coreCt = 1;
1926 unsigned lastCoreId = threadInfo[0].coreId;
1927 unsigned threadCt = 1;
1928 unsigned lastThreadId = threadInfo[0].threadId;
1931 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1932 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1934 for (i = 1; i < nApics; i++) {
1935 if (threadInfo[i].pkgId != lastPkgId) {
1938 lastPkgId = threadInfo[i].pkgId;
1939 if ((
int)coreCt > nCoresPerPkg)
1940 nCoresPerPkg = coreCt;
1942 lastCoreId = threadInfo[i].coreId;
1943 if ((
int)threadCt > __kmp_nThreadsPerCore)
1944 __kmp_nThreadsPerCore = threadCt;
1946 lastThreadId = threadInfo[i].threadId;
1950 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1951 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1955 if (threadInfo[i].coreId != lastCoreId) {
1958 lastCoreId = threadInfo[i].coreId;
1959 if ((
int)threadCt > __kmp_nThreadsPerCore)
1960 __kmp_nThreadsPerCore = threadCt;
1962 lastThreadId = threadInfo[i].threadId;
1963 }
else if (threadInfo[i].threadId != lastThreadId) {
1965 lastThreadId = threadInfo[i].threadId;
1967 __kmp_free(threadInfo);
1968 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1974 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1975 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1976 __kmp_free(threadInfo);
1977 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1985 if ((
int)coreCt > nCoresPerPkg)
1986 nCoresPerPkg = coreCt;
1987 if ((
int)threadCt > __kmp_nThreadsPerCore)
1988 __kmp_nThreadsPerCore = threadCt;
1989 __kmp_ncores = nCores;
1990 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1998 int threadLevel = 2;
2000 int depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
2003 types[idx++] = KMP_HW_SOCKET;
2005 types[idx++] = KMP_HW_CORE;
2006 if (threadLevel >= 0)
2007 types[idx++] = KMP_HW_THREAD;
2009 KMP_ASSERT(depth > 0);
2010 __kmp_topology = kmp_topology_t::allocate(nApics, depth, types);
2012 for (i = 0; i < nApics; ++i) {
2014 unsigned os = threadInfo[i].osId;
2015 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2018 if (pkgLevel >= 0) {
2019 hw_thread.ids[idx++] = threadInfo[i].pkgId;
2021 if (coreLevel >= 0) {
2022 hw_thread.ids[idx++] = threadInfo[i].coreId;
2024 if (threadLevel >= 0) {
2025 hw_thread.ids[idx++] = threadInfo[i].threadId;
2027 hw_thread.os_id = os;
2030 __kmp_free(threadInfo);
2031 __kmp_topology->sort_ids();
2032 if (!__kmp_topology->check_ids()) {
2033 kmp_topology_t::deallocate(__kmp_topology);
2034 __kmp_topology =
nullptr;
2035 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
2043 static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
2044 unsigned *native_model_id) {
2046 __kmp_x86_cpuid(0x1a, 0, &buf);
2047 *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
2048 *native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
2070 INTEL_LEVEL_TYPE_INVALID = 0,
2071 INTEL_LEVEL_TYPE_SMT = 1,
2072 INTEL_LEVEL_TYPE_CORE = 2,
2073 INTEL_LEVEL_TYPE_TILE = 3,
2074 INTEL_LEVEL_TYPE_MODULE = 4,
2075 INTEL_LEVEL_TYPE_DIE = 5,
2076 INTEL_LEVEL_TYPE_LAST = 6,
2079 struct cpuid_level_info_t {
2080 unsigned level_type, mask, mask_width, nitems, cache_mask;
2083 static kmp_hw_t __kmp_intel_type_2_topology_type(
int intel_type) {
2084 switch (intel_type) {
2085 case INTEL_LEVEL_TYPE_INVALID:
2086 return KMP_HW_SOCKET;
2087 case INTEL_LEVEL_TYPE_SMT:
2088 return KMP_HW_THREAD;
2089 case INTEL_LEVEL_TYPE_CORE:
2091 case INTEL_LEVEL_TYPE_TILE:
2093 case INTEL_LEVEL_TYPE_MODULE:
2094 return KMP_HW_MODULE;
2095 case INTEL_LEVEL_TYPE_DIE:
2098 return KMP_HW_UNKNOWN;
2105 __kmp_x2apicid_get_levels(
int leaf,
2106 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
2107 kmp_uint64 known_levels) {
2108 unsigned level, levels_index;
2109 unsigned level_type, mask_width, nitems;
2119 level = levels_index = 0;
2121 __kmp_x86_cpuid(leaf, level, &buf);
2122 level_type = __kmp_extract_bits<8, 15>(buf.ecx);
2123 mask_width = __kmp_extract_bits<0, 4>(buf.eax);
2124 nitems = __kmp_extract_bits<0, 15>(buf.ebx);
2125 if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
2128 if (known_levels & (1ull << level_type)) {
2130 KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST);
2131 levels[levels_index].level_type = level_type;
2132 levels[levels_index].mask_width = mask_width;
2133 levels[levels_index].nitems = nitems;
2137 if (levels_index > 0) {
2138 levels[levels_index - 1].mask_width = mask_width;
2139 levels[levels_index - 1].nitems = nitems;
2143 }
while (level_type != INTEL_LEVEL_TYPE_INVALID);
2146 for (
unsigned i = 0; i < levels_index; ++i) {
2147 if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
2148 levels[i].mask = ~((-1) << levels[i].mask_width);
2149 levels[i].cache_mask = (-1) << levels[i].mask_width;
2150 for (
unsigned j = 0; j < i; ++j)
2151 levels[i].mask ^= levels[j].mask;
2153 KMP_DEBUG_ASSERT(levels_index > 0);
2154 levels[i].mask = (-1) << levels[i - 1].mask_width;
2155 levels[i].cache_mask = 0;
2158 return levels_index;
2161 static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *
const msg_id) {
2163 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
2164 kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
2165 unsigned levels_index;
2167 kmp_uint64 known_levels;
2168 int topology_leaf, highest_leaf, apic_id;
2170 static int leaves[] = {0, 0};
2172 kmp_i18n_id_t leaf_message_id;
2174 KMP_BUILD_ASSERT(
sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
2176 *msg_id = kmp_i18n_null;
2177 if (__kmp_affinity_verbose) {
2178 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
2182 known_levels = 0ull;
2183 for (
int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
2184 if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
2185 known_levels |= (1ull << i);
2190 __kmp_x86_cpuid(0, 0, &buf);
2191 highest_leaf = buf.eax;
2196 if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
2199 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2200 }
else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
2203 leaf_message_id = kmp_i18n_str_NoLeaf31Support;
2208 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
2212 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2214 for (
int i = 0; i < num_leaves; ++i) {
2215 int leaf = leaves[i];
2216 if (highest_leaf < leaf)
2218 __kmp_x86_cpuid(leaf, 0, &buf);
2221 topology_leaf = leaf;
2222 levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
2223 if (levels_index == 0)
2227 if (topology_leaf == -1 || levels_index == 0) {
2228 *msg_id = leaf_message_id;
2231 KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST);
2238 if (!KMP_AFFINITY_CAPABLE()) {
2241 KMP_ASSERT(__kmp_affinity_type == affinity_none);
2242 for (
unsigned i = 0; i < levels_index; ++i) {
2243 if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
2244 __kmp_nThreadsPerCore = levels[i].nitems;
2245 }
else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
2246 nCoresPerPkg = levels[i].nitems;
2249 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
2250 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
2255 int depth = levels_index;
2256 for (
int i = depth - 1, j = 0; i >= 0; --i, ++j)
2257 types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
2259 kmp_topology_t::allocate(__kmp_avail_proc, levels_index, types);
2262 kmp_cache_info_t cache_info;
2263 for (
size_t i = 0; i < cache_info.get_depth(); ++i) {
2264 const kmp_cache_info_t::info_t &info = cache_info[i];
2265 unsigned cache_mask = info.mask;
2266 unsigned cache_level = info.level;
2267 for (
unsigned j = 0; j < levels_index; ++j) {
2268 unsigned hw_cache_mask = levels[j].cache_mask;
2269 kmp_hw_t cache_type = kmp_cache_info_t::get_topology_type(cache_level);
2270 if (hw_cache_mask == cache_mask && j < levels_index - 1) {
2272 __kmp_intel_type_2_topology_type(levels[j + 1].level_type);
2273 __kmp_topology->set_equivalent_type(cache_type, type);
2283 kmp_affinity_raii_t previous_affinity;
2288 int hw_thread_index = 0;
2289 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
2290 cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
2291 unsigned my_levels_index;
2294 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
2297 KMP_DEBUG_ASSERT(hw_thread_index < __kmp_avail_proc);
2299 __kmp_affinity_dispatch->bind_thread(proc);
2302 __kmp_x86_cpuid(topology_leaf, 0, &buf);
2304 kmp_hw_thread_t &hw_thread = __kmp_topology->at(hw_thread_index);
2306 __kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
2307 if (my_levels_index == 0 || my_levels_index != levels_index) {
2308 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
2312 hw_thread.os_id = proc;
2314 for (
unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
2315 hw_thread.ids[idx] = apic_id & my_levels[j].mask;
2317 hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
2321 if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
2322 kmp_hw_core_type_t type;
2323 unsigned native_model_id;
2324 __kmp_get_hybrid_info(&type, &native_model_id);
2325 hw_thread.core_type = type;
2329 KMP_ASSERT(hw_thread_index > 0);
2330 __kmp_topology->sort_ids();
2331 if (!__kmp_topology->check_ids()) {
2332 kmp_topology_t::deallocate(__kmp_topology);
2333 __kmp_topology =
nullptr;
2334 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
2342 #define threadIdIndex 1
2343 #define coreIdIndex 2
2344 #define pkgIdIndex 3
2345 #define nodeIdIndex 4
2347 typedef unsigned *ProcCpuInfo;
2348 static unsigned maxIndex = pkgIdIndex;
2350 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
2353 const unsigned *aa = *(
unsigned *
const *)a;
2354 const unsigned *bb = *(
unsigned *
const *)b;
2355 for (i = maxIndex;; i--) {
2366 #if KMP_USE_HIER_SCHED
2368 static void __kmp_dispatch_set_hierarchy_values() {
2374 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
2375 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2376 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2377 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2379 if (__kmp_mic_type >= mic3)
2380 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
2383 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
2384 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
2385 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
2386 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
2389 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
2390 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
2391 __kmp_nThreadsPerCore;
2392 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2394 if (__kmp_mic_type >= mic3)
2395 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2396 2 * __kmp_nThreadsPerCore;
2399 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2400 __kmp_nThreadsPerCore;
2401 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
2402 nCoresPerPkg * __kmp_nThreadsPerCore;
2403 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
2404 nCoresPerPkg * __kmp_nThreadsPerCore;
2405 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
2406 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2411 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2412 int index = type + 1;
2413 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2414 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2415 if (type == kmp_hier_layer_e::LAYER_THREAD)
2417 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2419 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2420 if (tid >= num_hw_threads)
2421 tid = tid % num_hw_threads;
2422 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2426 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2429 KMP_DEBUG_ASSERT(i1 <= i2);
2430 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2431 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2432 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2434 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2438 static inline const char *__kmp_cpuinfo_get_filename() {
2439 const char *filename;
2440 if (__kmp_cpuinfo_file !=
nullptr)
2441 filename = __kmp_cpuinfo_file;
2443 filename =
"/proc/cpuinfo";
2447 static inline const char *__kmp_cpuinfo_get_envvar() {
2448 const char *envvar =
nullptr;
2449 if (__kmp_cpuinfo_file !=
nullptr)
2450 envvar =
"KMP_CPUINFO_FILE";
2456 static bool __kmp_affinity_create_cpuinfo_map(
int *line,
2457 kmp_i18n_id_t *
const msg_id) {
2458 const char *filename = __kmp_cpuinfo_get_filename();
2459 const char *envvar = __kmp_cpuinfo_get_envvar();
2460 *msg_id = kmp_i18n_null;
2462 if (__kmp_affinity_verbose) {
2463 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
2471 unsigned num_records = 0;
2473 buf[
sizeof(buf) - 1] = 1;
2474 if (!fgets(buf,
sizeof(buf), f)) {
2479 char s1[] =
"processor";
2480 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2487 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2489 if (level > (
unsigned)__kmp_xproc) {
2490 level = __kmp_xproc;
2492 if (nodeIdIndex + level >= maxIndex) {
2493 maxIndex = nodeIdIndex + level;
2501 if (num_records == 0) {
2502 *msg_id = kmp_i18n_str_NoProcRecords;
2505 if (num_records > (
unsigned)__kmp_xproc) {
2506 *msg_id = kmp_i18n_str_TooManyProcRecords;
2515 if (fseek(f, 0, SEEK_SET) != 0) {
2516 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2522 unsigned **threadInfo =
2523 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2525 for (i = 0; i <= num_records; i++) {
2527 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2530 #define CLEANUP_THREAD_INFO \
2531 for (i = 0; i <= num_records; i++) { \
2532 __kmp_free(threadInfo[i]); \
2534 __kmp_free(threadInfo);
2539 #define INIT_PROC_INFO(p) \
2540 for (__index = 0; __index <= maxIndex; __index++) { \
2541 (p)[__index] = UINT_MAX; \
2544 for (i = 0; i <= num_records; i++) {
2545 INIT_PROC_INFO(threadInfo[i]);
2548 unsigned num_avail = 0;
2555 buf[
sizeof(buf) - 1] = 1;
2556 bool long_line =
false;
2557 if (!fgets(buf,
sizeof(buf), f)) {
2562 for (i = 0; i <= maxIndex; i++) {
2563 if (threadInfo[num_avail][i] != UINT_MAX) {
2571 }
else if (!buf[
sizeof(buf) - 1]) {
2576 #define CHECK_LINE \
2578 CLEANUP_THREAD_INFO; \
2579 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2585 char s1[] =
"processor";
2586 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2588 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2590 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2592 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2593 #if KMP_ARCH_AARCH64
2602 threadInfo[num_avail][osIdIndex] = val;
2603 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2607 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2608 threadInfo[num_avail][osIdIndex]);
2609 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2611 KMP_SNPRINTF(path,
sizeof(path),
2612 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2613 threadInfo[num_avail][osIdIndex]);
2614 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2618 char s2[] =
"physical id";
2619 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2621 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2623 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2625 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2627 threadInfo[num_avail][pkgIdIndex] = val;
2630 char s3[] =
"core id";
2631 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2633 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2635 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2637 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2639 threadInfo[num_avail][coreIdIndex] = val;
2643 char s4[] =
"thread id";
2644 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2646 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2648 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2650 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2652 threadInfo[num_avail][threadIdIndex] = val;
2656 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2658 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2660 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2663 if (level > (
unsigned)__kmp_xproc) {
2664 level = __kmp_xproc;
2666 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2668 threadInfo[num_avail][nodeIdIndex + level] = val;
2675 if ((*buf != 0) && (*buf !=
'\n')) {
2680 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2688 if ((
int)num_avail == __kmp_xproc) {
2689 CLEANUP_THREAD_INFO;
2690 *msg_id = kmp_i18n_str_TooManyEntries;
2696 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2697 CLEANUP_THREAD_INFO;
2698 *msg_id = kmp_i18n_str_MissingProcField;
2701 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2702 CLEANUP_THREAD_INFO;
2703 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2708 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2709 __kmp_affin_fullMask)) {
2710 INIT_PROC_INFO(threadInfo[num_avail]);
2717 KMP_ASSERT(num_avail <= num_records);
2718 INIT_PROC_INFO(threadInfo[num_avail]);
2723 CLEANUP_THREAD_INFO;
2724 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2728 CLEANUP_THREAD_INFO;
2729 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2734 #if KMP_MIC && REDUCE_TEAM_SIZE
2735 unsigned teamSize = 0;
2743 KMP_ASSERT(num_avail > 0);
2744 KMP_ASSERT(num_avail <= num_records);
2747 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2748 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2760 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2762 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2764 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2766 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2768 bool assign_thread_ids =
false;
2769 unsigned threadIdCt;
2772 restart_radix_check:
2776 if (assign_thread_ids) {
2777 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2778 threadInfo[0][threadIdIndex] = threadIdCt++;
2779 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2780 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2783 for (index = 0; index <= maxIndex; index++) {
2787 lastId[index] = threadInfo[0][index];
2792 for (i = 1; i < num_avail; i++) {
2795 for (index = maxIndex; index >= threadIdIndex; index--) {
2796 if (assign_thread_ids && (index == threadIdIndex)) {
2798 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2799 threadInfo[i][threadIdIndex] = threadIdCt++;
2803 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2804 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2807 if (threadInfo[i][index] != lastId[index]) {
2812 for (index2 = threadIdIndex; index2 < index; index2++) {
2814 if (counts[index2] > maxCt[index2]) {
2815 maxCt[index2] = counts[index2];
2818 lastId[index2] = threadInfo[i][index2];
2822 lastId[index] = threadInfo[i][index];
2824 if (assign_thread_ids && (index > threadIdIndex)) {
2826 #if KMP_MIC && REDUCE_TEAM_SIZE
2829 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2836 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2837 threadInfo[i][threadIdIndex] = threadIdCt++;
2843 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2844 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2850 if (index < threadIdIndex) {
2854 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2859 CLEANUP_THREAD_INFO;
2860 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2866 assign_thread_ids =
true;
2867 goto restart_radix_check;
2871 #if KMP_MIC && REDUCE_TEAM_SIZE
2874 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2877 for (index = threadIdIndex; index <= maxIndex; index++) {
2878 if (counts[index] > maxCt[index]) {
2879 maxCt[index] = counts[index];
2883 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2884 nCoresPerPkg = maxCt[coreIdIndex];
2885 nPackages = totals[pkgIdIndex];
2891 __kmp_ncores = totals[coreIdIndex];
2892 if (!KMP_AFFINITY_CAPABLE()) {
2893 KMP_ASSERT(__kmp_affinity_type == affinity_none);
2897 #if KMP_MIC && REDUCE_TEAM_SIZE
2899 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2900 __kmp_dflt_team_nth = teamSize;
2901 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2902 "__kmp_dflt_team_nth = %d\n",
2903 __kmp_dflt_team_nth));
2907 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2914 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2915 for (index = threadIdIndex; index < maxIndex; index++) {
2916 KMP_ASSERT(totals[index] >= totals[index + 1]);
2917 inMap[index] = (totals[index] > totals[index + 1]);
2919 inMap[maxIndex] = (totals[maxIndex] > 1);
2920 inMap[pkgIdIndex] =
true;
2921 inMap[coreIdIndex] =
true;
2922 inMap[threadIdIndex] =
true;
2926 kmp_hw_t types[KMP_HW_LAST];
2929 int threadLevel = -1;
2930 for (index = threadIdIndex; index <= maxIndex; index++) {
2935 if (inMap[pkgIdIndex]) {
2937 types[idx++] = KMP_HW_SOCKET;
2939 if (inMap[coreIdIndex]) {
2941 types[idx++] = KMP_HW_CORE;
2943 if (inMap[threadIdIndex]) {
2945 types[idx++] = KMP_HW_THREAD;
2947 KMP_ASSERT(depth > 0);
2950 __kmp_topology = kmp_topology_t::allocate(num_avail, depth, types);
2952 for (i = 0; i < num_avail; ++i) {
2953 unsigned os = threadInfo[i][osIdIndex];
2956 kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
2958 hw_thread.os_id = os;
2961 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2962 if (!inMap[src_index]) {
2965 if (src_index == pkgIdIndex) {
2966 hw_thread.ids[pkgLevel] = threadInfo[i][src_index];
2967 }
else if (src_index == coreIdIndex) {
2968 hw_thread.ids[coreLevel] = threadInfo[i][src_index];
2969 }
else if (src_index == threadIdIndex) {
2970 hw_thread.ids[threadLevel] = threadInfo[i][src_index];
2981 CLEANUP_THREAD_INFO;
2982 __kmp_topology->sort_ids();
2983 if (!__kmp_topology->check_ids()) {
2984 kmp_topology_t::deallocate(__kmp_topology);
2985 __kmp_topology =
nullptr;
2986 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2995 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2996 unsigned *numUnique) {
3000 int numAddrs = __kmp_topology->get_num_hw_threads();
3001 int depth = __kmp_topology->get_depth();
3002 KMP_ASSERT(numAddrs);
3006 for (i = numAddrs - 1;; --i) {
3007 int osId = __kmp_topology->at(i).os_id;
3008 if (osId > maxOsId) {
3014 kmp_affin_mask_t *osId2Mask;
3015 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
3016 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
3017 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
3018 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
3020 if (__kmp_affinity_gran_levels >= (
int)depth) {
3021 if (__kmp_affinity_verbose ||
3022 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3023 KMP_WARNING(AffThreadsMayMigrate);
3034 kmp_affin_mask_t *sum;
3035 KMP_CPU_ALLOC_ON_STACK(sum);
3037 KMP_CPU_SET(__kmp_topology->at(0).os_id, sum);
3038 for (i = 1; i < numAddrs; i++) {
3042 if (__kmp_topology->is_close(leader, i, __kmp_affinity_gran_levels)) {
3043 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
3049 for (; j < i; j++) {
3050 int osId = __kmp_topology->at(j).os_id;
3051 KMP_DEBUG_ASSERT(osId <= maxOsId);
3052 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
3053 KMP_CPU_COPY(mask, sum);
3054 __kmp_topology->at(j).leader = (j == leader);
3061 KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
3066 for (; j < i; j++) {
3067 int osId = __kmp_topology->at(j).os_id;
3068 KMP_DEBUG_ASSERT(osId <= maxOsId);
3069 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
3070 KMP_CPU_COPY(mask, sum);
3071 __kmp_topology->at(j).leader = (j == leader);
3074 KMP_CPU_FREE_FROM_STACK(sum);
3076 *maxIndex = maxOsId;
3077 *numUnique = unique;
3084 static kmp_affin_mask_t *newMasks;
3085 static int numNewMasks;
3086 static int nextNewMask;
3088 #define ADD_MASK(_mask) \
3090 if (nextNewMask >= numNewMasks) { \
3093 kmp_affin_mask_t *temp; \
3094 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
3095 for (i = 0; i < numNewMasks / 2; i++) { \
3096 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
3097 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
3098 KMP_CPU_COPY(dest, src); \
3100 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
3103 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
3107 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
3109 if (((_osId) > _maxOsId) || \
3110 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
3111 if (__kmp_affinity_verbose || \
3112 (__kmp_affinity_warnings && \
3113 (__kmp_affinity_type != affinity_none))) { \
3114 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
3117 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
3123 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
3124 unsigned int *out_numMasks,
3125 const char *proclist,
3126 kmp_affin_mask_t *osId2Mask,
3129 const char *scan = proclist;
3130 const char *next = proclist;
3135 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3137 kmp_affin_mask_t *sumMask;
3138 KMP_CPU_ALLOC(sumMask);
3142 int start, end, stride;
3146 if (*next ==
'\0') {
3158 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
3160 num = __kmp_str_to_int(scan, *next);
3161 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
3164 if ((num > maxOsId) ||
3165 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3166 if (__kmp_affinity_verbose ||
3167 (__kmp_affinity_warnings &&
3168 (__kmp_affinity_type != affinity_none))) {
3169 KMP_WARNING(AffIgnoreInvalidProcID, num);
3171 KMP_CPU_ZERO(sumMask);
3173 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
3193 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3196 num = __kmp_str_to_int(scan, *next);
3197 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
3200 if ((num > maxOsId) ||
3201 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3202 if (__kmp_affinity_verbose ||
3203 (__kmp_affinity_warnings &&
3204 (__kmp_affinity_type != affinity_none))) {
3205 KMP_WARNING(AffIgnoreInvalidProcID, num);
3208 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
3225 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3227 start = __kmp_str_to_int(scan, *next);
3228 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
3233 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3247 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3249 end = __kmp_str_to_int(scan, *next);
3250 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
3267 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3269 stride = __kmp_str_to_int(scan, *next);
3270 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
3275 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
3277 KMP_ASSERT2(start <= end,
"bad explicit proc list");
3279 KMP_ASSERT2(start >= end,
"bad explicit proc list");
3281 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
3286 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3288 }
while (start <= end);
3291 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3293 }
while (start >= end);
3304 *out_numMasks = nextNewMask;
3305 if (nextNewMask == 0) {
3307 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3310 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3311 for (i = 0; i < nextNewMask; i++) {
3312 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3313 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3314 KMP_CPU_COPY(dest, src);
3316 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3317 KMP_CPU_FREE(sumMask);
3340 static void __kmp_process_subplace_list(
const char **scan,
3341 kmp_affin_mask_t *osId2Mask,
3342 int maxOsId, kmp_affin_mask_t *tempMask,
3347 int start, count, stride, i;
3351 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3354 start = __kmp_str_to_int(*scan, *next);
3355 KMP_ASSERT(start >= 0);
3360 if (**scan ==
'}' || **scan ==
',') {
3361 if ((start > maxOsId) ||
3362 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3363 if (__kmp_affinity_verbose ||
3364 (__kmp_affinity_warnings &&
3365 (__kmp_affinity_type != affinity_none))) {
3366 KMP_WARNING(AffIgnoreInvalidProcID, start);
3369 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3372 if (**scan ==
'}') {
3378 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3383 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3386 count = __kmp_str_to_int(*scan, *next);
3387 KMP_ASSERT(count >= 0);
3392 if (**scan ==
'}' || **scan ==
',') {
3393 for (i = 0; i < count; i++) {
3394 if ((start > maxOsId) ||
3395 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3396 if (__kmp_affinity_verbose ||
3397 (__kmp_affinity_warnings &&
3398 (__kmp_affinity_type != affinity_none))) {
3399 KMP_WARNING(AffIgnoreInvalidProcID, start);
3403 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3408 if (**scan ==
'}') {
3414 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3421 if (**scan ==
'+') {
3425 if (**scan ==
'-') {
3433 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3436 stride = __kmp_str_to_int(*scan, *next);
3437 KMP_ASSERT(stride >= 0);
3443 if (**scan ==
'}' || **scan ==
',') {
3444 for (i = 0; i < count; i++) {
3445 if ((start > maxOsId) ||
3446 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3447 if (__kmp_affinity_verbose ||
3448 (__kmp_affinity_warnings &&
3449 (__kmp_affinity_type != affinity_none))) {
3450 KMP_WARNING(AffIgnoreInvalidProcID, start);
3454 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3459 if (**scan ==
'}') {
3466 KMP_ASSERT2(0,
"bad explicit places list");
3470 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3471 int maxOsId, kmp_affin_mask_t *tempMask,
3477 if (**scan ==
'{') {
3479 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3480 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3482 }
else if (**scan ==
'!') {
3484 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3485 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3486 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3489 int num = __kmp_str_to_int(*scan, *next);
3490 KMP_ASSERT(num >= 0);
3491 if ((num > maxOsId) ||
3492 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3493 if (__kmp_affinity_verbose ||
3494 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3495 KMP_WARNING(AffIgnoreInvalidProcID, num);
3498 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3503 KMP_ASSERT2(0,
"bad explicit places list");
3508 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3509 unsigned int *out_numMasks,
3510 const char *placelist,
3511 kmp_affin_mask_t *osId2Mask,
3513 int i, j, count, stride, sign;
3514 const char *scan = placelist;
3515 const char *next = placelist;
3518 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3524 kmp_affin_mask_t *tempMask;
3525 kmp_affin_mask_t *previousMask;
3526 KMP_CPU_ALLOC(tempMask);
3527 KMP_CPU_ZERO(tempMask);
3528 KMP_CPU_ALLOC(previousMask);
3529 KMP_CPU_ZERO(previousMask);
3533 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3537 if (*scan ==
'\0' || *scan ==
',') {
3541 KMP_CPU_ZERO(tempMask);
3543 if (*scan ==
'\0') {
3550 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3555 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3558 count = __kmp_str_to_int(scan, *next);
3559 KMP_ASSERT(count >= 0);
3564 if (*scan ==
'\0' || *scan ==
',') {
3567 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3586 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3589 stride = __kmp_str_to_int(scan, *next);
3590 KMP_DEBUG_ASSERT(stride >= 0);
3596 for (i = 0; i < count; i++) {
3601 KMP_CPU_COPY(previousMask, tempMask);
3602 ADD_MASK(previousMask);
3603 KMP_CPU_ZERO(tempMask);
3605 KMP_CPU_SET_ITERATE(j, previousMask) {
3606 if (!KMP_CPU_ISSET(j, previousMask)) {
3609 if ((j + stride > maxOsId) || (j + stride < 0) ||
3610 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3611 (!KMP_CPU_ISSET(j + stride,
3612 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3613 if ((__kmp_affinity_verbose ||
3614 (__kmp_affinity_warnings &&
3615 (__kmp_affinity_type != affinity_none))) &&
3617 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3621 KMP_CPU_SET(j + stride, tempMask);
3625 KMP_CPU_ZERO(tempMask);
3630 if (*scan ==
'\0') {
3638 KMP_ASSERT2(0,
"bad explicit places list");
3641 *out_numMasks = nextNewMask;
3642 if (nextNewMask == 0) {
3644 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3647 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3648 KMP_CPU_FREE(tempMask);
3649 KMP_CPU_FREE(previousMask);
3650 for (i = 0; i < nextNewMask; i++) {
3651 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3652 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3653 KMP_CPU_COPY(dest, src);
3655 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3659 #undef ADD_MASK_OSID
3663 static int __kmp_affinity_find_core_level(
int nprocs,
int bottom_level) {
3666 for (
int i = 0; i < nprocs; i++) {
3667 const kmp_hw_thread_t &hw_thread = __kmp_topology->at(i);
3668 for (
int j = bottom_level; j > 0; j--) {
3669 if (hw_thread.ids[j] > 0) {
3670 if (core_level < (j - 1)) {
3680 static int __kmp_affinity_compute_ncores(
int nprocs,
int bottom_level,
3682 return __kmp_topology->get_count(core_level);
3685 static int __kmp_affinity_find_core(
int proc,
int bottom_level,
3688 KMP_DEBUG_ASSERT(proc >= 0 && proc < __kmp_topology->get_num_hw_threads());
3689 for (
int i = 0; i <= proc; ++i) {
3690 if (i + 1 <= proc) {
3691 for (
int j = 0; j <= core_level; ++j) {
3692 if (__kmp_topology->at(i + 1).sub_ids[j] !=
3693 __kmp_topology->at(i).sub_ids[j]) {
3705 static int __kmp_affinity_max_proc_per_core(
int nprocs,
int bottom_level,
3707 if (core_level >= bottom_level)
3709 int thread_level = __kmp_topology->get_level(KMP_HW_THREAD);
3710 return __kmp_topology->calculate_ratio(thread_level, core_level);
3713 static int *procarr = NULL;
3714 static int __kmp_aff_depth = 0;
3718 static void __kmp_create_affinity_none_places() {
3719 KMP_ASSERT(__kmp_affin_fullMask != NULL);
3720 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3721 __kmp_affinity_num_masks = 1;
3722 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
3723 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
3724 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
3727 static void __kmp_aux_affinity_initialize(
void) {
3728 if (__kmp_affinity_masks != NULL) {
3729 KMP_ASSERT(__kmp_affin_fullMask != NULL);
3737 if (__kmp_affin_fullMask == NULL) {
3738 KMP_CPU_ALLOC(__kmp_affin_fullMask);
3740 if (KMP_AFFINITY_CAPABLE()) {
3741 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
3742 if (__kmp_affinity_respect_mask) {
3745 __kmp_avail_proc = 0;
3746 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3747 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
3752 if (__kmp_avail_proc > __kmp_xproc) {
3753 if (__kmp_affinity_verbose ||
3754 (__kmp_affinity_warnings &&
3755 (__kmp_affinity_type != affinity_none))) {
3756 KMP_WARNING(ErrorInitializeAffinity);
3758 __kmp_affinity_type = affinity_none;
3759 KMP_AFFINITY_DISABLE();
3763 if (__kmp_affinity_verbose) {
3764 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3765 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3766 __kmp_affin_fullMask);
3767 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
3770 if (__kmp_affinity_verbose) {
3771 char buf[KMP_AFFIN_MASK_PRINT_LEN];
3772 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
3773 __kmp_affin_fullMask);
3774 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
3777 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
3781 __kmp_affin_fullMask->set_process_affinity(
true);
3786 kmp_i18n_id_t msg_id = kmp_i18n_null;
3790 if ((__kmp_cpuinfo_file != NULL) &&
3791 (__kmp_affinity_top_method == affinity_top_method_all)) {
3792 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3795 bool success =
false;
3796 if (__kmp_affinity_top_method == affinity_top_method_all) {
3802 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3803 if (!__kmp_hwloc_error) {
3804 success = __kmp_affinity_create_hwloc_map(&msg_id);
3805 if (!success && __kmp_affinity_verbose) {
3806 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3808 }
else if (__kmp_affinity_verbose) {
3809 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3814 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3816 success = __kmp_affinity_create_x2apicid_map(&msg_id);
3817 if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
3818 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
3822 success = __kmp_affinity_create_apicid_map(&msg_id);
3823 if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
3824 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
3832 success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
3833 if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
3834 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
3839 #if KMP_GROUP_AFFINITY
3840 if (!success && (__kmp_num_proc_groups > 1)) {
3841 success = __kmp_affinity_create_proc_group_map(&msg_id);
3842 if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
3843 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
3849 success = __kmp_affinity_create_flat_map(&msg_id);
3850 if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
3851 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
3853 KMP_ASSERT(success);
3861 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
3862 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
3863 success = __kmp_affinity_create_hwloc_map(&msg_id);
3865 KMP_ASSERT(msg_id != kmp_i18n_null);
3866 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3871 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3872 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
3873 __kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
3874 success = __kmp_affinity_create_x2apicid_map(&msg_id);
3876 KMP_ASSERT(msg_id != kmp_i18n_null);
3877 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3879 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3880 success = __kmp_affinity_create_apicid_map(&msg_id);
3882 KMP_ASSERT(msg_id != kmp_i18n_null);
3883 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3888 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3890 success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
3892 KMP_ASSERT(msg_id != kmp_i18n_null);
3893 const char *filename = __kmp_cpuinfo_get_filename();
3895 KMP_FATAL(FileLineMsgExiting, filename, line,
3896 __kmp_i18n_catgets(msg_id));
3898 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3903 #if KMP_GROUP_AFFINITY
3904 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3905 success = __kmp_affinity_create_proc_group_map(&msg_id);
3906 KMP_ASSERT(success);
3908 KMP_ASSERT(msg_id != kmp_i18n_null);
3909 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3914 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3915 success = __kmp_affinity_create_flat_map(&msg_id);
3917 KMP_ASSERT(success);
3921 if (!__kmp_topology) {
3922 if (KMP_AFFINITY_CAPABLE() &&
3923 (__kmp_affinity_verbose ||
3924 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
3925 KMP_WARNING(ErrorInitializeAffinity);
3927 if (nPackages > 0 && nCoresPerPkg > 0 && __kmp_nThreadsPerCore > 0 &&
3929 __kmp_topology = kmp_topology_t::allocate(0, 0, NULL);
3930 __kmp_topology->canonicalize(nPackages, nCoresPerPkg,
3931 __kmp_nThreadsPerCore, __kmp_ncores);
3932 if (__kmp_affinity_verbose) {
3933 __kmp_topology->print(
"KMP_AFFINITY");
3936 __kmp_affinity_type = affinity_none;
3937 __kmp_create_affinity_none_places();
3938 #if KMP_USE_HIER_SCHED
3939 __kmp_dispatch_set_hierarchy_values();
3941 KMP_AFFINITY_DISABLE();
3947 __kmp_topology->canonicalize();
3948 if (__kmp_affinity_verbose)
3949 __kmp_topology->print(
"KMP_AFFINITY");
3950 bool filtered = __kmp_topology->filter_hw_subset();
3951 if (filtered && __kmp_affinity_verbose)
3952 __kmp_topology->print(
"KMP_HW_SUBSET");
3953 machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
3954 KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
3958 if (__kmp_affinity_type == affinity_none) {
3959 __kmp_create_affinity_none_places();
3960 #if KMP_USE_HIER_SCHED
3961 __kmp_dispatch_set_hierarchy_values();
3965 int depth = __kmp_topology->get_depth();
3970 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique);
3971 if (__kmp_affinity_gran_levels == 0) {
3972 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3975 switch (__kmp_affinity_type) {
3977 case affinity_explicit:
3978 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3979 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
3980 __kmp_affinity_process_proclist(
3981 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
3982 __kmp_affinity_proclist, osId2Mask, maxIndex);
3984 __kmp_affinity_process_placelist(
3985 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
3986 __kmp_affinity_proclist, osId2Mask, maxIndex);
3988 if (__kmp_affinity_num_masks == 0) {
3989 if (__kmp_affinity_verbose ||
3990 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3991 KMP_WARNING(AffNoValidProcID);
3993 __kmp_affinity_type = affinity_none;
3994 __kmp_create_affinity_none_places();
4003 case affinity_logical:
4004 __kmp_affinity_compact = 0;
4005 if (__kmp_affinity_offset) {
4006 __kmp_affinity_offset =
4007 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4011 case affinity_physical:
4012 if (__kmp_nThreadsPerCore > 1) {
4013 __kmp_affinity_compact = 1;
4014 if (__kmp_affinity_compact >= depth) {
4015 __kmp_affinity_compact = 0;
4018 __kmp_affinity_compact = 0;
4020 if (__kmp_affinity_offset) {
4021 __kmp_affinity_offset =
4022 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4026 case affinity_scatter:
4027 if (__kmp_affinity_compact >= depth) {
4028 __kmp_affinity_compact = 0;
4030 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4034 case affinity_compact:
4035 if (__kmp_affinity_compact >= depth) {
4036 __kmp_affinity_compact = depth - 1;
4040 case affinity_balanced:
4042 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4043 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4045 __kmp_affinity_type = affinity_none;
4046 __kmp_create_affinity_none_places();
4048 }
else if (!__kmp_topology->is_uniform()) {
4050 __kmp_aff_depth = depth;
4053 __kmp_affinity_find_core_level(__kmp_avail_proc, depth - 1);
4054 int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc, depth - 1,
4056 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4057 __kmp_avail_proc, depth - 1, core_level);
4059 int nproc = ncores * maxprocpercore;
4060 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4061 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4062 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4064 __kmp_affinity_type = affinity_none;
4068 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4069 for (
int i = 0; i < nproc; i++) {
4075 for (
int i = 0; i < __kmp_avail_proc; i++) {
4076 int proc = __kmp_topology->at(i).os_id;
4077 int core = __kmp_affinity_find_core(i, depth - 1, core_level);
4079 if (core == lastcore) {
4086 procarr[core * maxprocpercore + inlastcore] = proc;
4089 if (__kmp_affinity_compact >= depth) {
4090 __kmp_affinity_compact = depth - 1;
4095 if (__kmp_affinity_dups) {
4096 __kmp_affinity_num_masks = __kmp_avail_proc;
4098 __kmp_affinity_num_masks = numUnique;
4101 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4102 (__kmp_affinity_num_places > 0) &&
4103 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4104 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4107 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4111 __kmp_topology->sort_compact();
4115 int num_hw_threads = __kmp_topology->get_num_hw_threads();
4116 for (i = 0, j = 0; i < num_hw_threads; i++) {
4117 if ((!__kmp_affinity_dups) && (!__kmp_topology->at(i).leader)) {
4120 int osId = __kmp_topology->at(i).os_id;
4122 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4123 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4124 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4125 KMP_CPU_COPY(dest, src);
4126 if (++j >= __kmp_affinity_num_masks) {
4130 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4133 __kmp_topology->sort_ids();
4137 KMP_ASSERT2(0,
"Unexpected affinity setting");
4140 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4143 void __kmp_affinity_initialize(
void) {
4152 int disabled = (__kmp_affinity_type == affinity_disabled);
4153 if (!KMP_AFFINITY_CAPABLE()) {
4154 KMP_ASSERT(disabled);
4157 __kmp_affinity_type = affinity_none;
4159 __kmp_aux_affinity_initialize();
4161 __kmp_affinity_type = affinity_disabled;
4165 void __kmp_affinity_uninitialize(
void) {
4166 if (__kmp_affinity_masks != NULL) {
4167 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4168 __kmp_affinity_masks = NULL;
4170 if (__kmp_affin_fullMask != NULL) {
4171 KMP_CPU_FREE(__kmp_affin_fullMask);
4172 __kmp_affin_fullMask = NULL;
4174 __kmp_affinity_num_masks = 0;
4175 __kmp_affinity_type = affinity_default;
4176 __kmp_affinity_num_places = 0;
4177 if (__kmp_affinity_proclist != NULL) {
4178 __kmp_free(__kmp_affinity_proclist);
4179 __kmp_affinity_proclist = NULL;
4181 if (procarr != NULL) {
4182 __kmp_free(procarr);
4186 if (__kmp_hwloc_topology != NULL) {
4187 hwloc_topology_destroy(__kmp_hwloc_topology);
4188 __kmp_hwloc_topology = NULL;
4191 if (__kmp_hw_subset) {
4192 kmp_hw_subset_t::deallocate(__kmp_hw_subset);
4193 __kmp_hw_subset =
nullptr;
4195 if (__kmp_topology) {
4196 kmp_topology_t::deallocate(__kmp_topology);
4197 __kmp_topology =
nullptr;
4199 KMPAffinity::destroy_api();
4202 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4203 if (!KMP_AFFINITY_CAPABLE()) {
4207 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4208 if (th->th.th_affin_mask == NULL) {
4209 KMP_CPU_ALLOC(th->th.th_affin_mask);
4211 KMP_CPU_ZERO(th->th.th_affin_mask);
4218 kmp_affin_mask_t *mask;
4221 if (KMP_AFFINITY_NON_PROC_BIND) {
4222 if ((__kmp_affinity_type == affinity_none) ||
4223 (__kmp_affinity_type == affinity_balanced) ||
4224 KMP_HIDDEN_HELPER_THREAD(gtid)) {
4225 #if KMP_GROUP_AFFINITY
4226 if (__kmp_num_proc_groups > 1) {
4230 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4232 mask = __kmp_affin_fullMask;
4234 int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4235 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4236 i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4237 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4240 if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
4241 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4242 #if KMP_GROUP_AFFINITY
4243 if (__kmp_num_proc_groups > 1) {
4247 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4249 mask = __kmp_affin_fullMask;
4253 int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4254 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4255 i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4256 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4260 th->th.th_current_place = i;
4261 if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
4262 th->th.th_new_place = i;
4263 th->th.th_first_place = 0;
4264 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4265 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4268 th->th.th_first_place = 0;
4269 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4272 if (i == KMP_PLACE_ALL) {
4273 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4276 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4280 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4282 if (__kmp_affinity_verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
4284 && (__kmp_affinity_type == affinity_none ||
4285 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4286 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4287 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4288 th->th.th_affin_mask);
4289 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4290 __kmp_gettid(), gtid, buf);
4295 if (__kmp_affinity_verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
4296 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4297 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4298 th->th.th_affin_mask);
4299 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY (hidden helper thread)",
4300 (kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
4308 if (__kmp_affinity_type == affinity_none) {
4309 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4312 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4315 void __kmp_affinity_set_place(
int gtid) {
4316 if (!KMP_AFFINITY_CAPABLE()) {
4320 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4322 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4324 gtid, th->th.th_new_place, th->th.th_current_place));
4327 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4328 KMP_ASSERT(th->th.th_new_place >= 0);
4329 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4330 if (th->th.th_first_place <= th->th.th_last_place) {
4331 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4332 (th->th.th_new_place <= th->th.th_last_place));
4334 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4335 (th->th.th_new_place >= th->th.th_last_place));
4340 kmp_affin_mask_t *mask =
4341 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4342 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4343 th->th.th_current_place = th->th.th_new_place;
4345 if (__kmp_affinity_verbose) {
4346 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4347 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4348 th->th.th_affin_mask);
4349 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4350 __kmp_gettid(), gtid, buf);
4352 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4355 int __kmp_aux_set_affinity(
void **mask) {
4360 if (!KMP_AFFINITY_CAPABLE()) {
4364 gtid = __kmp_entry_gtid();
4367 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4368 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4369 (kmp_affin_mask_t *)(*mask));
4371 "kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4375 if (__kmp_env_consistency_check) {
4376 if ((mask == NULL) || (*mask == NULL)) {
4377 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4382 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4383 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4384 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4386 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4391 if (num_procs == 0) {
4392 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4395 #if KMP_GROUP_AFFINITY
4396 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4397 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4403 th = __kmp_threads[gtid];
4404 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4405 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4407 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4410 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4411 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4412 th->th.th_first_place = 0;
4413 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4416 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4421 int __kmp_aux_get_affinity(
void **mask) {
4424 #if KMP_OS_WINDOWS || KMP_DEBUG
4427 if (!KMP_AFFINITY_CAPABLE()) {
4431 gtid = __kmp_entry_gtid();
4432 #if KMP_OS_WINDOWS || KMP_DEBUG
4433 th = __kmp_threads[gtid];
4437 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4441 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4442 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4443 th->th.th_affin_mask);
4445 "kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid,
4449 if (__kmp_env_consistency_check) {
4450 if ((mask == NULL) || (*mask == NULL)) {
4451 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4457 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4460 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4461 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4462 (kmp_affin_mask_t *)(*mask));
4464 "kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid,
4472 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4478 int __kmp_aux_get_affinity_max_proc() {
4479 if (!KMP_AFFINITY_CAPABLE()) {
4482 #if KMP_GROUP_AFFINITY
4483 if (__kmp_num_proc_groups > 1) {
4484 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4490 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4491 if (!KMP_AFFINITY_CAPABLE()) {
4497 int gtid = __kmp_entry_gtid();
4498 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4499 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4500 (kmp_affin_mask_t *)(*mask));
4501 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
4502 "affinity mask for thread %d = %s\n",
4506 if (__kmp_env_consistency_check) {
4507 if ((mask == NULL) || (*mask == NULL)) {
4508 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4512 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4515 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4519 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4523 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
4524 if (!KMP_AFFINITY_CAPABLE()) {
4530 int gtid = __kmp_entry_gtid();
4531 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4532 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4533 (kmp_affin_mask_t *)(*mask));
4534 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
4535 "affinity mask for thread %d = %s\n",
4539 if (__kmp_env_consistency_check) {
4540 if ((mask == NULL) || (*mask == NULL)) {
4541 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4545 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4548 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4552 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4556 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
4557 if (!KMP_AFFINITY_CAPABLE()) {
4563 int gtid = __kmp_entry_gtid();
4564 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4565 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4566 (kmp_affin_mask_t *)(*mask));
4567 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
4568 "affinity mask for thread %d = %s\n",
4572 if (__kmp_env_consistency_check) {
4573 if ((mask == NULL) || (*mask == NULL)) {
4574 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4578 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4581 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4585 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4589 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
4590 KMP_DEBUG_ASSERT(th);
4591 bool fine_gran =
true;
4592 int tid = th->th.th_info.ds.ds_tid;
4595 if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
4598 switch (__kmp_affinity_gran) {
4602 if (__kmp_nThreadsPerCore > 1) {
4607 if (nCoresPerPkg > 1) {
4615 if (__kmp_topology->is_uniform()) {
4619 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4621 int ncores = __kmp_ncores;
4622 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
4623 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4627 int chunk = nthreads / ncores;
4629 int big_cores = nthreads % ncores;
4631 int big_nth = (chunk + 1) * big_cores;
4632 if (tid < big_nth) {
4633 coreID = tid / (chunk + 1);
4634 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
4636 coreID = (tid - big_cores) / chunk;
4637 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
4639 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4640 "Illegal set affinity operation when not capable");
4642 kmp_affin_mask_t *mask = th->th.th_affin_mask;
4647 __kmp_topology->at(coreID * __kmp_nth_per_core + threadID).os_id;
4648 KMP_CPU_SET(osID, mask);
4650 for (
int i = 0; i < __kmp_nth_per_core; i++) {
4652 osID = __kmp_topology->at(coreID * __kmp_nth_per_core + i).os_id;
4653 KMP_CPU_SET(osID, mask);
4656 if (__kmp_affinity_verbose) {
4657 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4658 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4659 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4660 __kmp_gettid(), tid, buf);
4662 __kmp_set_system_affinity(mask, TRUE);
4665 kmp_affin_mask_t *mask = th->th.th_affin_mask;
4669 __kmp_affinity_find_core_level(__kmp_avail_proc, __kmp_aff_depth - 1);
4670 int ncores = __kmp_affinity_compute_ncores(__kmp_avail_proc,
4671 __kmp_aff_depth - 1, core_level);
4672 int nth_per_core = __kmp_affinity_max_proc_per_core(
4673 __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4677 if (nthreads == __kmp_avail_proc) {
4679 int osID = __kmp_topology->at(tid).os_id;
4680 KMP_CPU_SET(osID, mask);
4683 __kmp_affinity_find_core(tid, __kmp_aff_depth - 1, core_level);
4684 for (
int i = 0; i < __kmp_avail_proc; i++) {
4685 int osID = __kmp_topology->at(i).os_id;
4686 if (__kmp_affinity_find_core(i, __kmp_aff_depth - 1, core_level) ==
4688 KMP_CPU_SET(osID, mask);
4692 }
else if (nthreads <= ncores) {
4695 for (
int i = 0; i < ncores; i++) {
4698 for (
int j = 0; j < nth_per_core; j++) {
4699 if (procarr[i * nth_per_core + j] != -1) {
4706 for (
int j = 0; j < nth_per_core; j++) {
4707 int osID = procarr[i * nth_per_core + j];
4709 KMP_CPU_SET(osID, mask);
4725 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
4727 int *ncores_with_x_procs =
4728 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
4730 int *ncores_with_x_to_max_procs =
4731 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
4733 for (
int i = 0; i <= nth_per_core; i++) {
4734 ncores_with_x_procs[i] = 0;
4735 ncores_with_x_to_max_procs[i] = 0;
4738 for (
int i = 0; i < ncores; i++) {
4740 for (
int j = 0; j < nth_per_core; j++) {
4741 if (procarr[i * nth_per_core + j] != -1) {
4745 nproc_at_core[i] = cnt;
4746 ncores_with_x_procs[cnt]++;
4749 for (
int i = 0; i <= nth_per_core; i++) {
4750 for (
int j = i; j <= nth_per_core; j++) {
4751 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
4756 int nproc = nth_per_core * ncores;
4758 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4759 for (
int i = 0; i < nproc; i++) {
4766 for (
int j = 1; j <= nth_per_core; j++) {
4767 int cnt = ncores_with_x_to_max_procs[j];
4768 for (
int i = 0; i < ncores; i++) {
4770 if (nproc_at_core[i] == 0) {
4773 for (
int k = 0; k < nth_per_core; k++) {
4774 if (procarr[i * nth_per_core + k] != -1) {
4775 if (newarr[i * nth_per_core + k] == 0) {
4776 newarr[i * nth_per_core + k] = 1;
4782 newarr[i * nth_per_core + k]++;
4790 if (cnt == 0 || nth == 0) {
4801 for (
int i = 0; i < nproc; i++) {
4805 int osID = procarr[i];
4806 KMP_CPU_SET(osID, mask);
4808 int coreID = i / nth_per_core;
4809 for (
int ii = 0; ii < nth_per_core; ii++) {
4810 int osID = procarr[coreID * nth_per_core + ii];
4812 KMP_CPU_SET(osID, mask);
4822 if (__kmp_affinity_verbose) {
4823 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4824 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4825 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4826 __kmp_gettid(), tid, buf);
4828 __kmp_set_system_affinity(mask, TRUE);
4832 #if KMP_OS_LINUX || KMP_OS_FREEBSD
4846 kmp_set_thread_affinity_mask_initial()
4851 int gtid = __kmp_get_gtid();
4854 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
4855 "non-omp thread, returning\n"));
4858 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4859 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
4860 "affinity not initialized, returning\n"));
4863 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
4864 "set full mask for thread %d\n",
4866 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4867 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
int try_open(const char *filename, const char *mode)