14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #include "kmp_utils.h"
28 #if KMP_USE_HIER_SCHED
29 #include "kmp_dispatch_hier.h"
33 #include "ompt-specific.h"
36 #include "ompd-specific.h"
39 #if OMP_PROFILING_SUPPORT
40 #include "llvm/Support/TimeProfiler.h"
41 static char *ProfileTraceFile =
nullptr;
45 #define KMP_USE_PRCTL 0
61 #if defined(KMP_GOMP_COMPAT)
62 char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66 char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70 char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79 kmp_info_t __kmp_monitor;
84 void __kmp_cleanup(
void);
86 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91 #if KMP_AFFINITY_SUPPORTED
92 static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95 static void __kmp_do_serial_initialize(
void);
96 void __kmp_fork_barrier(
int gtid,
int tid);
97 void __kmp_join_barrier(
int gtid);
98 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101 #ifdef USE_LOAD_BALANCE
102 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105 static int __kmp_expand_threads(
int nNeed);
107 static int __kmp_unregister_root_other_thread(
int gtid);
109 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
116 static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
118 kmp_nested_nthreads_t *new_nested_nth =
119 (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
120 sizeof(kmp_nested_nthreads_t));
121 int new_size = level + thr->th.th_set_nested_nth_sz;
122 new_nested_nth->nth = (
int *)KMP_INTERNAL_MALLOC(new_size *
sizeof(
int));
123 for (
int i = 0; i < level + 1; ++i)
124 new_nested_nth->nth[i] = 0;
125 for (
int i = level + 1, j = 1; i < new_size; ++i, ++j)
126 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127 new_nested_nth->size = new_nested_nth->used = new_size;
128 return new_nested_nth;
134 int __kmp_get_global_thread_id() {
136 kmp_info_t **other_threads;
144 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
145 __kmp_nth, __kmp_all_nth));
152 if (!TCR_4(__kmp_init_gtid))
155 #ifdef KMP_TDATA_GTID
156 if (TCR_4(__kmp_gtid_mode) >= 3) {
157 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
161 if (TCR_4(__kmp_gtid_mode) >= 2) {
162 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
163 return __kmp_gtid_get_specific();
165 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
167 stack_addr = (
char *)&stack_data;
168 other_threads = __kmp_threads;
181 for (i = 0; i < __kmp_threads_capacity; i++) {
183 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
187 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
192 if (stack_addr <= stack_base) {
193 size_t stack_diff = stack_base - stack_addr;
195 if (stack_diff <= stack_size) {
202 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
203 __kmp_gtid_get_specific() == i);
211 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
212 "thread, using TLS\n"));
213 i = __kmp_gtid_get_specific();
224 if (!TCR_SYNC_PTR(other_threads[i]))
229 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
230 KMP_FATAL(StackOverflow, i);
233 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
234 if (stack_addr > stack_base) {
235 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
240 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241 stack_base - stack_addr);
245 if (__kmp_storage_map) {
246 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
247 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
248 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
249 other_threads[i]->th.th_info.ds.ds_stacksize,
250 "th_%d stack (refinement)", i);
255 int __kmp_get_global_thread_id_reg() {
258 if (!__kmp_init_serial) {
261 #ifdef KMP_TDATA_GTID
262 if (TCR_4(__kmp_gtid_mode) >= 3) {
263 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
267 if (TCR_4(__kmp_gtid_mode) >= 2) {
268 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
269 gtid = __kmp_gtid_get_specific();
272 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
273 gtid = __kmp_get_global_thread_id();
277 if (gtid == KMP_GTID_DNE) {
279 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
280 "Registering a new gtid.\n"));
281 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
282 if (!__kmp_init_serial) {
283 __kmp_do_serial_initialize();
284 gtid = __kmp_gtid_get_specific();
286 gtid = __kmp_register_root(FALSE);
288 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
292 KMP_DEBUG_ASSERT(gtid >= 0);
298 void __kmp_check_stack_overlap(kmp_info_t *th) {
300 char *stack_beg = NULL;
301 char *stack_end = NULL;
304 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
305 if (__kmp_storage_map) {
306 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
307 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
309 gtid = __kmp_gtid_from_thread(th);
311 if (gtid == KMP_GTID_MONITOR) {
312 __kmp_print_storage_map_gtid(
313 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
314 "th_%s stack (%s)",
"mon",
315 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
317 __kmp_print_storage_map_gtid(
318 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
319 "th_%d stack (%s)", gtid,
320 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
326 gtid = __kmp_gtid_from_thread(th);
327 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
329 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
330 if (stack_beg == NULL) {
331 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
332 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
335 for (f = 0; f < __kmp_threads_capacity; f++) {
336 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
338 if (f_th && f_th != th) {
339 char *other_stack_end =
340 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
341 char *other_stack_beg =
342 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
343 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
344 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
347 if (__kmp_storage_map)
348 __kmp_print_storage_map_gtid(
349 -1, other_stack_beg, other_stack_end,
350 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
351 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
353 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
359 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
364 void __kmp_infinite_loop(
void) {
365 static int done = FALSE;
372 #define MAX_MESSAGE 512
374 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
375 char const *format, ...) {
376 char buffer[MAX_MESSAGE];
379 va_start(ap, format);
380 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
381 p2, (
unsigned long)size, format);
382 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
383 __kmp_vprintf(kmp_err, buffer, ap);
384 #if KMP_PRINT_DATA_PLACEMENT
387 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
388 if (__kmp_storage_map_verbose) {
389 node = __kmp_get_host_node(p1);
391 __kmp_storage_map_verbose = FALSE;
395 int localProc = __kmp_get_cpu_from_gtid(gtid);
397 const int page_size = KMP_GET_PAGE_SIZE();
399 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
400 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
402 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
405 __kmp_printf_no_lock(
" GTID %d\n", gtid);
414 (
char *)p1 += page_size;
415 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
416 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
420 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
421 (
char *)p1 + (page_size - 1),
422 __kmp_get_host_node(p1));
424 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
425 (
char *)p2 + (page_size - 1),
426 __kmp_get_host_node(p2));
432 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
435 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
440 void __kmp_warn(
char const *format, ...) {
441 char buffer[MAX_MESSAGE];
444 if (__kmp_generate_warnings == kmp_warnings_off) {
448 va_start(ap, format);
450 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
451 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
452 __kmp_vprintf(kmp_err, buffer, ap);
453 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
458 void __kmp_abort_process() {
460 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
462 if (__kmp_debug_buf) {
463 __kmp_dump_debug_buffer();
469 __kmp_global.g.g_abort = SIGABRT;
483 __kmp_unregister_library();
487 __kmp_infinite_loop();
488 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
492 void __kmp_abort_thread(
void) {
495 __kmp_infinite_loop();
501 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
502 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
506 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
509 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
511 __kmp_print_storage_map_gtid(
512 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
513 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
515 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
516 &thr->th.th_bar[bs_plain_barrier + 1],
517 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
520 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
521 &thr->th.th_bar[bs_forkjoin_barrier + 1],
522 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
525 #if KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
527 &thr->th.th_bar[bs_reduction_barrier + 1],
528 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
536 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
537 int team_id,
int num_thr) {
538 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
539 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
542 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
543 &team->t.t_bar[bs_last_barrier],
544 sizeof(kmp_balign_team_t) * bs_last_barrier,
545 "%s_%d.t_bar", header, team_id);
547 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
548 &team->t.t_bar[bs_plain_barrier + 1],
549 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
552 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
553 &team->t.t_bar[bs_forkjoin_barrier + 1],
554 sizeof(kmp_balign_team_t),
555 "%s_%d.t_bar[forkjoin]", header, team_id);
557 #if KMP_FAST_REDUCTION_BARRIER
558 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
559 &team->t.t_bar[bs_reduction_barrier + 1],
560 sizeof(kmp_balign_team_t),
561 "%s_%d.t_bar[reduction]", header, team_id);
564 __kmp_print_storage_map_gtid(
565 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
566 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
568 __kmp_print_storage_map_gtid(
569 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
570 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
572 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
573 &team->t.t_disp_buffer[num_disp_buff],
574 sizeof(dispatch_shared_info_t) * num_disp_buff,
575 "%s_%d.t_disp_buffer", header, team_id);
578 static void __kmp_init_allocator() {
579 __kmp_init_memkind();
580 __kmp_init_target_mem();
582 static void __kmp_fini_allocator() {
583 __kmp_fini_target_mem();
584 __kmp_fini_memkind();
589 #if ENABLE_LIBOMPTARGET
590 static void __kmp_init_omptarget() {
591 __kmp_init_target_task();
600 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
605 case DLL_PROCESS_ATTACH:
606 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
610 case DLL_PROCESS_DETACH:
611 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
624 if (lpReserved == NULL)
625 __kmp_internal_end_library(__kmp_gtid_get_specific());
629 case DLL_THREAD_ATTACH:
630 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
636 case DLL_THREAD_DETACH:
637 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
639 __kmp_internal_end_thread(__kmp_gtid_get_specific());
650 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
651 int gtid = *gtid_ref;
652 #ifdef BUILD_PARALLEL_ORDERED
653 kmp_team_t *team = __kmp_team_from_gtid(gtid);
656 if (__kmp_env_consistency_check) {
657 if (__kmp_threads[gtid]->th.th_root->r.r_active)
658 #if KMP_USE_DYNAMIC_LOCK
659 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
661 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
664 #ifdef BUILD_PARALLEL_ORDERED
665 if (!team->t.t_serialized) {
667 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
675 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
676 int gtid = *gtid_ref;
677 #ifdef BUILD_PARALLEL_ORDERED
678 int tid = __kmp_tid_from_gtid(gtid);
679 kmp_team_t *team = __kmp_team_from_gtid(gtid);
682 if (__kmp_env_consistency_check) {
683 if (__kmp_threads[gtid]->th.th_root->r.r_active)
684 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
686 #ifdef BUILD_PARALLEL_ORDERED
687 if (!team->t.t_serialized) {
692 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
702 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
707 if (!TCR_4(__kmp_init_parallel))
708 __kmp_parallel_initialize();
709 __kmp_resume_if_soft_paused();
711 th = __kmp_threads[gtid];
712 team = th->th.th_team;
715 th->th.th_ident = id_ref;
717 if (team->t.t_serialized) {
720 kmp_int32 old_this = th->th.th_local.this_construct;
722 ++th->th.th_local.this_construct;
726 if (team->t.t_construct == old_this) {
727 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
728 th->th.th_local.this_construct);
731 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
732 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
733 team->t.t_active_level == 1) {
735 __kmp_itt_metadata_single(id_ref);
740 if (__kmp_env_consistency_check) {
741 if (status && push_ws) {
742 __kmp_push_workshare(gtid, ct_psingle, id_ref);
744 __kmp_check_workshare(gtid, ct_psingle, id_ref);
749 __kmp_itt_single_start(gtid);
755 void __kmp_exit_single(
int gtid) {
757 __kmp_itt_single_end(gtid);
759 if (__kmp_env_consistency_check)
760 __kmp_pop_workshare(gtid, ct_psingle, NULL);
769 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
770 int master_tid,
int set_nthreads,
774 KMP_DEBUG_ASSERT(__kmp_init_serial);
775 KMP_DEBUG_ASSERT(root && parent_team);
776 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
780 new_nthreads = set_nthreads;
781 if (!get__dynamic_2(parent_team, master_tid)) {
784 #ifdef USE_LOAD_BALANCE
785 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
786 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
787 if (new_nthreads == 1) {
788 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
789 "reservation to 1 thread\n",
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
795 "reservation to %d threads\n",
796 master_tid, new_nthreads));
800 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
801 new_nthreads = __kmp_avail_proc - __kmp_nth +
802 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
803 if (new_nthreads <= 1) {
804 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
805 "reservation to 1 thread\n",
809 if (new_nthreads < set_nthreads) {
810 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
811 "reservation to %d threads\n",
812 master_tid, new_nthreads));
814 new_nthreads = set_nthreads;
816 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
817 if (set_nthreads > 2) {
818 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
819 new_nthreads = (new_nthreads % set_nthreads) + 1;
820 if (new_nthreads == 1) {
821 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
822 "reservation to 1 thread\n",
826 if (new_nthreads < set_nthreads) {
827 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
828 "reservation to %d threads\n",
829 master_tid, new_nthreads));
837 if (__kmp_nth + new_nthreads -
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
840 int tl_nthreads = __kmp_max_nth - __kmp_nth +
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
842 if (tl_nthreads <= 0) {
847 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848 __kmp_reserve_warn = 1;
849 __kmp_msg(kmp_ms_warning,
850 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
853 if (tl_nthreads == 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
855 "reduced reservation to 1 thread\n",
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
860 "reservation to %d threads\n",
861 master_tid, tl_nthreads));
862 new_nthreads = tl_nthreads;
866 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
867 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
868 if (cg_nthreads + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
871 int tl_nthreads = max_cg_threads - cg_nthreads +
872 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
873 if (tl_nthreads <= 0) {
878 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
879 __kmp_reserve_warn = 1;
880 __kmp_msg(kmp_ms_warning,
881 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
882 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
884 if (tl_nthreads == 1) {
885 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
886 "reduced reservation to 1 thread\n",
890 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
891 "reservation to %d threads\n",
892 master_tid, tl_nthreads));
893 new_nthreads = tl_nthreads;
899 capacity = __kmp_threads_capacity;
900 if (TCR_PTR(__kmp_threads[0]) == NULL) {
906 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
907 capacity -= __kmp_hidden_helper_threads_num;
909 if (__kmp_nth + new_nthreads -
910 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
913 int slotsRequired = __kmp_nth + new_nthreads -
914 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 int slotsAdded = __kmp_expand_threads(slotsRequired);
917 if (slotsAdded < slotsRequired) {
919 new_nthreads -= (slotsRequired - slotsAdded);
920 KMP_ASSERT(new_nthreads >= 1);
923 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924 __kmp_reserve_warn = 1;
925 if (__kmp_tp_cached) {
926 __kmp_msg(kmp_ms_warning,
927 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
940 if (new_nthreads == 1) {
942 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
943 "dead roots and rechecking; requested %d threads\n",
944 __kmp_get_gtid(), set_nthreads));
946 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
948 __kmp_get_gtid(), new_nthreads, set_nthreads));
952 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
953 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
954 this_thr->th.th_nt_msg);
962 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
963 kmp_info_t *master_th,
int master_gtid,
964 int fork_teams_workers) {
968 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
969 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
973 master_th->th.th_info.ds.ds_tid = 0;
974 master_th->th.th_team = team;
975 master_th->th.th_team_nproc = team->t.t_nproc;
976 master_th->th.th_team_master = master_th;
977 master_th->th.th_team_serialized = FALSE;
978 master_th->th.th_dispatch = &team->t.t_dispatch[0];
982 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
985 int level = team->t.t_active_level - 1;
986 if (master_th->th.th_teams_microtask) {
987 if (master_th->th.th_teams_size.nteams > 1) {
991 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
992 master_th->th.th_teams_level == team->t.t_level) {
997 if (level < __kmp_hot_teams_max_level) {
998 if (hot_teams[level].hot_team) {
1000 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1004 hot_teams[level].hot_team = team;
1005 hot_teams[level].hot_team_nth = team->t.t_nproc;
1011 if (!use_hot_team) {
1014 team->t.t_threads[0] = master_th;
1015 __kmp_initialize_info(master_th, team, 0, master_gtid);
1018 for (i = 1; i < team->t.t_nproc; i++) {
1021 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1022 team->t.t_threads[i] = thr;
1023 KMP_DEBUG_ASSERT(thr);
1024 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1026 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1027 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1028 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1029 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1030 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1031 team->t.t_bar[bs_plain_barrier].b_arrived));
1032 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1033 thr->th.th_teams_level = master_th->th.th_teams_level;
1034 thr->th.th_teams_size = master_th->th.th_teams_size;
1037 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1038 for (b = 0; b < bs_last_barrier; ++b) {
1039 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1040 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1042 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1048 #if KMP_AFFINITY_SUPPORTED
1052 if (!fork_teams_workers) {
1053 __kmp_partition_places(team);
1057 if (team->t.t_nproc > 1 &&
1058 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1059 team->t.b->update_num_threads(team->t.t_nproc);
1060 __kmp_add_threads_to_team(team, team->t.t_nproc);
1065 if (__kmp_tasking_mode != tskm_immediate_exec) {
1067 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1070 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1071 "%p, new task_team %p / team %p\n",
1072 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1073 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1077 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1078 master_th->th.th_task_state);
1082 if (team->t.t_nproc > 1) {
1083 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1084 team->t.t_threads[1]->th.th_task_state == 1);
1085 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1086 team->t.t_threads[1]->th.th_task_state);
1088 master_th->th.th_task_state = 0;
1092 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1093 master_th->th.th_task_state);
1095 master_th->th.th_task_state = 0;
1099 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1100 for (i = 0; i < team->t.t_nproc; i++) {
1101 kmp_info_t *thr = team->t.t_threads[i];
1102 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1103 thr->th.th_prev_level != team->t.t_level) {
1104 team->t.t_display_affinity = 1;
1113 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1117 inline static void propagateFPControl(kmp_team_t *team) {
1118 if (__kmp_inherit_fp_control) {
1119 kmp_int16 x87_fpu_control_word;
1123 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1124 __kmp_store_mxcsr(&mxcsr);
1125 mxcsr &= KMP_X86_MXCSR_MASK;
1136 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1137 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1140 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1144 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1150 inline static void updateHWFPControl(kmp_team_t *team) {
1151 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1154 kmp_int16 x87_fpu_control_word;
1156 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1157 __kmp_store_mxcsr(&mxcsr);
1158 mxcsr &= KMP_X86_MXCSR_MASK;
1160 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1161 __kmp_clear_x87_fpu_status_word();
1162 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1165 if (team->t.t_mxcsr != mxcsr) {
1166 __kmp_load_mxcsr(&team->t.t_mxcsr);
1171 #define propagateFPControl(x) ((void)0)
1172 #define updateHWFPControl(x) ((void)0)
1175 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1180 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1181 kmp_info_t *this_thr;
1182 kmp_team_t *serial_team;
1184 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1191 if (!TCR_4(__kmp_init_parallel))
1192 __kmp_parallel_initialize();
1193 __kmp_resume_if_soft_paused();
1195 this_thr = __kmp_threads[global_tid];
1196 serial_team = this_thr->th.th_serial_team;
1199 KMP_DEBUG_ASSERT(serial_team);
1202 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1203 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1204 proc_bind = proc_bind_false;
1205 }
else if (proc_bind == proc_bind_default) {
1208 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1211 this_thr->th.th_set_proc_bind = proc_bind_default;
1216 if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1)
1217 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
1218 this_thr->th.th_nt_msg);
1220 this_thr->th.th_set_nproc = 0;
1223 ompt_data_t ompt_parallel_data = ompt_data_none;
1224 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1225 if (ompt_enabled.enabled &&
1226 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1228 ompt_task_info_t *parent_task_info;
1229 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1231 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1232 if (ompt_enabled.ompt_callback_parallel_begin) {
1235 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1236 &(parent_task_info->task_data), &(parent_task_info->frame),
1237 &ompt_parallel_data, team_size,
1238 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1243 if (this_thr->th.th_team != serial_team) {
1245 int level = this_thr->th.th_team->t.t_level;
1247 if (serial_team->t.t_serialized) {
1250 kmp_team_t *new_team;
1252 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1254 new_team = __kmp_allocate_team(
1255 this_thr->th.th_root, 1, 1,
1259 proc_bind, &this_thr->th.th_current_task->td_icvs, 0, NULL);
1260 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1261 KMP_ASSERT(new_team);
1264 new_team->t.t_threads[0] = this_thr;
1265 new_team->t.t_parent = this_thr->th.th_team;
1266 serial_team = new_team;
1267 this_thr->th.th_serial_team = serial_team;
1271 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1272 global_tid, serial_team));
1280 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1281 global_tid, serial_team));
1285 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1286 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1287 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1288 serial_team->t.t_ident = loc;
1289 serial_team->t.t_serialized = 1;
1290 serial_team->t.t_nproc = 1;
1291 serial_team->t.t_parent = this_thr->th.th_team;
1292 if (this_thr->th.th_team->t.t_nested_nth)
1293 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1295 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1297 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1298 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1299 this_thr->th.th_team = serial_team;
1300 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1302 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1303 this_thr->th.th_current_task));
1304 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1305 this_thr->th.th_current_task->td_flags.executing = 0;
1307 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1312 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1313 &this_thr->th.th_current_task->td_parent->td_icvs);
1317 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1318 if (this_thr->th.th_team->t.t_nested_nth)
1319 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1320 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1321 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1324 if (__kmp_nested_proc_bind.used &&
1325 (level + 1 < __kmp_nested_proc_bind.used)) {
1326 this_thr->th.th_current_task->td_icvs.proc_bind =
1327 __kmp_nested_proc_bind.bind_types[level + 1];
1331 serial_team->t.t_pkfn = (microtask_t)(~0);
1333 this_thr->th.th_info.ds.ds_tid = 0;
1336 this_thr->th.th_team_nproc = 1;
1337 this_thr->th.th_team_master = this_thr;
1338 this_thr->th.th_team_serialized = 1;
1339 this_thr->th.th_task_team = NULL;
1340 this_thr->th.th_task_state = 0;
1342 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1343 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1344 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1346 propagateFPControl(serial_team);
1349 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1350 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1351 serial_team->t.t_dispatch->th_disp_buffer =
1352 (dispatch_private_info_t *)__kmp_allocate(
1353 sizeof(dispatch_private_info_t));
1355 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1362 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1363 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1364 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1365 ++serial_team->t.t_serialized;
1366 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1369 int level = this_thr->th.th_team->t.t_level;
1373 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1374 if (serial_team->t.t_nested_nth)
1375 nested_nth = serial_team->t.t_nested_nth;
1376 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1377 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1380 serial_team->t.t_level++;
1381 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1382 "of serial team %p to %d\n",
1383 global_tid, serial_team, serial_team->t.t_level));
1386 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1388 dispatch_private_info_t *disp_buffer =
1389 (dispatch_private_info_t *)__kmp_allocate(
1390 sizeof(dispatch_private_info_t));
1391 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1392 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1394 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1397 __kmp_push_task_team_node(this_thr, serial_team);
1401 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1405 if (__kmp_display_affinity) {
1406 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1407 this_thr->th.th_prev_num_threads != 1) {
1409 __kmp_aux_display_affinity(global_tid, NULL);
1410 this_thr->th.th_prev_level = serial_team->t.t_level;
1411 this_thr->th.th_prev_num_threads = 1;
1415 if (__kmp_env_consistency_check)
1416 __kmp_push_parallel(global_tid, NULL);
1418 serial_team->t.ompt_team_info.master_return_address = codeptr;
1419 if (ompt_enabled.enabled &&
1420 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1421 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1422 OMPT_GET_FRAME_ADDRESS(0);
1424 ompt_lw_taskteam_t lw_taskteam;
1425 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1426 &ompt_parallel_data, codeptr);
1428 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1432 if (ompt_enabled.ompt_callback_implicit_task) {
1433 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1434 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1435 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1436 ompt_task_implicit);
1437 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1438 __kmp_tid_from_gtid(global_tid);
1442 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1443 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1444 OMPT_GET_FRAME_ADDRESS(0);
1450 static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1451 microtask_t microtask,
int level,
1452 int teams_level, kmp_va_list ap) {
1453 return (master_th->th.th_teams_microtask && ap &&
1454 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1459 static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1460 int teams_level, kmp_va_list ap) {
1461 return ((ap == NULL && active_level == 0) ||
1462 (ap && teams_level > 0 && teams_level == level));
1469 __kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1470 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1471 enum fork_context_e call_context, microtask_t microtask,
1472 launch_t invoker,
int master_set_numthreads,
int level,
1474 ompt_data_t ompt_parallel_data,
void *return_address,
1480 parent_team->t.t_ident = loc;
1481 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1482 parent_team->t.t_argc = argc;
1483 argv = (
void **)parent_team->t.t_argv;
1484 for (i = argc - 1; i >= 0; --i) {
1485 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1488 if (parent_team == master_th->th.th_serial_team) {
1491 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1493 if (call_context == fork_context_gnu) {
1496 parent_team->t.t_serialized--;
1501 parent_team->t.t_pkfn = microtask;
1506 void **exit_frame_p;
1507 ompt_data_t *implicit_task_data;
1508 ompt_lw_taskteam_t lw_taskteam;
1510 if (ompt_enabled.enabled) {
1511 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1512 &ompt_parallel_data, return_address);
1513 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1515 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1519 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1520 if (ompt_enabled.ompt_callback_implicit_task) {
1521 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1522 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1523 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1524 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1528 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1530 exit_frame_p = &dummy;
1536 parent_team->t.t_serialized--;
1539 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1540 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1541 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1550 if (ompt_enabled.enabled) {
1551 *exit_frame_p = NULL;
1552 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1553 if (ompt_enabled.ompt_callback_implicit_task) {
1554 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1555 ompt_scope_end, NULL, implicit_task_data, 1,
1556 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1558 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1559 __ompt_lw_taskteam_unlink(master_th);
1560 if (ompt_enabled.ompt_callback_parallel_end) {
1561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1562 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1563 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1565 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1571 parent_team->t.t_pkfn = microtask;
1572 parent_team->t.t_invoke = invoker;
1573 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1574 parent_team->t.t_active_level++;
1575 parent_team->t.t_level++;
1576 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1583 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1586 if (ompt_enabled.enabled) {
1587 ompt_lw_taskteam_t lw_taskteam;
1588 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1590 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1595 if (master_set_numthreads) {
1596 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1598 kmp_info_t **other_threads = parent_team->t.t_threads;
1601 int old_proc = master_th->th.th_teams_size.nth;
1602 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1603 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1604 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1606 parent_team->t.t_nproc = master_set_numthreads;
1607 for (i = 0; i < master_set_numthreads; ++i) {
1608 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1612 master_th->th.th_set_nproc = 0;
1616 if (__kmp_debugging) {
1617 int nth = __kmp_omp_num_threads(loc);
1619 master_set_numthreads = nth;
1625 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1627 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1628 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1629 proc_bind = proc_bind_false;
1632 if (proc_bind == proc_bind_default) {
1633 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1639 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1640 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1641 master_th->th.th_current_task->td_icvs.proc_bind)) {
1642 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1645 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1647 if (proc_bind_icv != proc_bind_default &&
1648 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1649 kmp_info_t **other_threads = parent_team->t.t_threads;
1650 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1651 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1655 master_th->th.th_set_proc_bind = proc_bind_default;
1657 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1658 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1660 __kmp_forkjoin_frames_mode == 3 &&
1661 parent_team->t.t_active_level == 1
1662 && master_th->th.th_teams_size.nteams == 1) {
1663 kmp_uint64 tmp_time = __itt_get_timestamp();
1664 master_th->th.th_frame_time = tmp_time;
1665 parent_team->t.t_region_time = tmp_time;
1667 if (__itt_stack_caller_create_ptr) {
1668 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1670 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1673 #if KMP_AFFINITY_SUPPORTED
1674 __kmp_partition_places(parent_team);
1677 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1678 "master_th=%p, gtid=%d\n",
1679 root, parent_team, master_th, gtid));
1680 __kmp_internal_fork(loc, gtid, parent_team);
1681 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1682 "master_th=%p, gtid=%d\n",
1683 root, parent_team, master_th, gtid));
1685 if (call_context == fork_context_gnu)
1689 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1690 parent_team->t.t_id, parent_team->t.t_pkfn));
1692 if (!parent_team->t.t_invoke(gtid)) {
1693 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1695 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1696 parent_team->t.t_id, parent_team->t.t_pkfn));
1699 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1706 __kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1707 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1708 kmp_info_t *master_th, kmp_team_t *parent_team,
1710 ompt_data_t *ompt_parallel_data,
void **return_address,
1711 ompt_data_t **parent_task_data,
1719 #if KMP_OS_LINUX && \
1720 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1723 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1728 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1733 master_th->th.th_serial_team->t.t_pkfn = microtask;
1736 if (call_context == fork_context_intel) {
1738 master_th->th.th_serial_team->t.t_ident = loc;
1741 master_th->th.th_serial_team->t.t_level--;
1746 void **exit_frame_p;
1747 ompt_task_info_t *task_info;
1748 ompt_lw_taskteam_t lw_taskteam;
1750 if (ompt_enabled.enabled) {
1751 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1752 ompt_parallel_data, *return_address);
1754 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1756 task_info = OMPT_CUR_TASK_INFO(master_th);
1757 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1758 if (ompt_enabled.ompt_callback_implicit_task) {
1759 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1760 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1761 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1762 &(task_info->task_data), 1,
1763 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1767 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1769 exit_frame_p = &dummy;
1774 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1775 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1776 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1785 if (ompt_enabled.enabled) {
1786 *exit_frame_p = NULL;
1787 if (ompt_enabled.ompt_callback_implicit_task) {
1788 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1789 ompt_scope_end, NULL, &(task_info->task_data), 1,
1790 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1792 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1793 __ompt_lw_taskteam_unlink(master_th);
1794 if (ompt_enabled.ompt_callback_parallel_end) {
1795 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1796 ompt_parallel_data, *parent_task_data,
1797 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1799 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1802 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1803 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1804 team = master_th->th.th_team;
1806 team->t.t_invoke = invoker;
1807 __kmp_alloc_argv_entries(argc, team, TRUE);
1808 team->t.t_argc = argc;
1809 argv = (
void **)team->t.t_argv;
1810 for (i = argc - 1; i >= 0; --i)
1811 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1818 if (ompt_enabled.enabled) {
1819 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 0,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1825 if (ompt_enabled.ompt_callback_parallel_end) {
1826 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1827 ompt_parallel_data, *parent_task_data,
1828 OMPT_INVOKER(call_context) | ompt_parallel_league,
1831 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1836 for (i = argc - 1; i >= 0; --i)
1837 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1842 void **exit_frame_p;
1843 ompt_task_info_t *task_info;
1844 ompt_lw_taskteam_t lw_taskteam;
1845 ompt_data_t *implicit_task_data;
1847 if (ompt_enabled.enabled) {
1848 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1849 ompt_parallel_data, *return_address);
1850 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1852 task_info = OMPT_CUR_TASK_INFO(master_th);
1853 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1856 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1857 if (ompt_enabled.ompt_callback_implicit_task) {
1858 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1859 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1860 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1861 ompt_task_implicit);
1862 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1866 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1868 exit_frame_p = &dummy;
1873 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1874 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1875 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1884 if (ompt_enabled.enabled) {
1885 *exit_frame_p = NULL;
1886 if (ompt_enabled.ompt_callback_implicit_task) {
1887 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1888 ompt_scope_end, NULL, &(task_info->task_data), 1,
1889 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1892 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1893 __ompt_lw_taskteam_unlink(master_th);
1894 if (ompt_enabled.ompt_callback_parallel_end) {
1895 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1896 ompt_parallel_data, *parent_task_data,
1897 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1899 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1903 }
else if (call_context == fork_context_gnu) {
1905 if (ompt_enabled.enabled) {
1906 ompt_lw_taskteam_t lwt;
1907 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1910 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1911 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1917 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1920 KMP_ASSERT2(call_context < fork_context_last,
1921 "__kmp_serial_fork_call: unknown fork_context parameter");
1924 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1931 int __kmp_fork_call(
ident_t *loc,
int gtid,
1932 enum fork_context_e call_context,
1933 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1938 int master_this_cons;
1940 kmp_team_t *parent_team;
1941 kmp_info_t *master_th;
1945 int master_set_numthreads;
1946 int task_thread_limit = 0;
1950 kmp_hot_team_ptr_t **p_hot_teams;
1952 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1955 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1956 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1959 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1961 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1962 __kmp_stkpadding += (short)((kmp_int64)dummy);
1968 if (!TCR_4(__kmp_init_parallel))
1969 __kmp_parallel_initialize();
1970 __kmp_resume_if_soft_paused();
1975 master_th = __kmp_threads[gtid];
1977 parent_team = master_th->th.th_team;
1978 master_tid = master_th->th.th_info.ds.ds_tid;
1979 master_this_cons = master_th->th.th_local.this_construct;
1980 root = master_th->th.th_root;
1981 master_active = root->r.r_active;
1982 master_set_numthreads = master_th->th.th_set_nproc;
1984 master_th->th.th_current_task->td_icvs.task_thread_limit;
1987 ompt_data_t ompt_parallel_data = ompt_data_none;
1988 ompt_data_t *parent_task_data = NULL;
1989 ompt_frame_t *ompt_frame = NULL;
1990 void *return_address = NULL;
1992 if (ompt_enabled.enabled) {
1993 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1995 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
2000 __kmp_assign_root_init_mask();
2003 level = parent_team->t.t_level;
2005 active_level = parent_team->t.t_active_level;
2007 teams_level = master_th->th.th_teams_level;
2008 p_hot_teams = &master_th->th.th_hot_teams;
2009 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2010 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2011 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2012 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2014 (*p_hot_teams)[0].hot_team_nth = 1;
2018 if (ompt_enabled.enabled) {
2019 if (ompt_enabled.ompt_callback_parallel_begin) {
2020 int team_size = master_set_numthreads
2021 ? master_set_numthreads
2022 : get__nproc_2(parent_team, master_tid);
2023 int flags = OMPT_INVOKER(call_context) |
2024 ((microtask == (microtask_t)__kmp_teams_master)
2025 ? ompt_parallel_league
2026 : ompt_parallel_team);
2027 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2028 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2031 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2035 master_th->th.th_ident = loc;
2038 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2039 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2040 call_context, microtask, invoker,
2041 master_set_numthreads, level,
2043 ompt_parallel_data, return_address,
2052 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2056 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2057 if ((!enter_teams &&
2058 (parent_team->t.t_active_level >=
2059 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2060 (__kmp_library == library_serial)) {
2061 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2064 nthreads = master_set_numthreads
2065 ? master_set_numthreads
2067 : get__nproc_2(parent_team, master_tid);
2070 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2077 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2082 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2083 nthreads, enter_teams);
2084 if (nthreads == 1) {
2088 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2092 KMP_DEBUG_ASSERT(nthreads > 0);
2095 master_th->th.th_set_nproc = 0;
2097 if (nthreads == 1) {
2098 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2099 invoker, master_th, parent_team,
2101 &ompt_parallel_data, &return_address,
2109 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2110 "curtask=%p, curtask_max_aclevel=%d\n",
2111 parent_team->t.t_active_level, master_th,
2112 master_th->th.th_current_task,
2113 master_th->th.th_current_task->td_icvs.max_active_levels));
2117 master_th->th.th_current_task->td_flags.executing = 0;
2119 if (!master_th->th.th_teams_microtask || level > teams_level) {
2121 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2125 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2126 kmp_nested_nthreads_t *nested_nth = NULL;
2127 if (!master_th->th.th_set_nested_nth &&
2128 (level + 1 < parent_team->t.t_nested_nth->used) &&
2129 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2130 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2131 }
else if (master_th->th.th_set_nested_nth) {
2132 nested_nth = __kmp_override_nested_nth(master_th, level);
2133 if ((level + 1 < nested_nth->used) &&
2134 (nested_nth->nth[level + 1] != nthreads_icv))
2135 nthreads_icv = nested_nth->nth[level + 1];
2143 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2145 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2146 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2147 proc_bind = proc_bind_false;
2151 if (proc_bind == proc_bind_default) {
2152 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2155 if (master_th->th.th_teams_microtask &&
2156 microtask == (microtask_t)__kmp_teams_master) {
2157 proc_bind = __kmp_teams_proc_bind;
2163 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2164 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2165 master_th->th.th_current_task->td_icvs.proc_bind)) {
2168 if (!master_th->th.th_teams_microtask ||
2169 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2170 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2175 master_th->th.th_set_proc_bind = proc_bind_default;
2177 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2178 kmp_internal_control_t new_icvs;
2179 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2180 new_icvs.next = NULL;
2181 if (nthreads_icv > 0) {
2182 new_icvs.nproc = nthreads_icv;
2184 if (proc_bind_icv != proc_bind_default) {
2185 new_icvs.proc_bind = proc_bind_icv;
2189 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2190 team = __kmp_allocate_team(root, nthreads, nthreads,
2194 proc_bind, &new_icvs, argc, master_th);
2195 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2196 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2199 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2200 team = __kmp_allocate_team(
2201 root, nthreads, nthreads,
2205 proc_bind, &master_th->th.th_current_task->td_icvs, argc, master_th);
2206 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2207 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2208 &master_th->th.th_current_task->td_icvs);
2211 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2214 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2215 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2216 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2217 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2218 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2220 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2223 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2225 if (!master_th->th.th_teams_microtask || level > teams_level) {
2226 int new_level = parent_team->t.t_level + 1;
2227 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2228 new_level = parent_team->t.t_active_level + 1;
2229 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2232 int new_level = parent_team->t.t_level;
2233 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2234 new_level = parent_team->t.t_active_level;
2235 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2237 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2239 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2241 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2242 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2245 if (team->t.t_nested_nth &&
2246 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2247 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2248 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2249 team->t.t_nested_nth = NULL;
2251 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2252 if (master_th->th.th_set_nested_nth) {
2254 nested_nth = __kmp_override_nested_nth(master_th, level);
2255 team->t.t_nested_nth = nested_nth;
2256 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2257 master_th->th.th_set_nested_nth = NULL;
2258 master_th->th.th_set_nested_nth_sz = 0;
2259 master_th->th.th_nt_strict =
false;
2263 propagateFPControl(team);
2265 if (ompd_state & OMPD_ENABLE_BP)
2266 ompd_bp_parallel_begin();
2271 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2272 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2274 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2275 (team->t.t_master_tid == 0 &&
2276 (team->t.t_parent == root->r.r_root_team ||
2277 team->t.t_parent->t.t_serialized)));
2281 argv = (
void **)team->t.t_argv;
2283 for (i = argc - 1; i >= 0; --i) {
2284 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2285 KMP_CHECK_UPDATE(*argv, new_argv);
2289 for (i = 0; i < argc; ++i) {
2291 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2296 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2297 if (!root->r.r_active)
2298 root->r.r_active = TRUE;
2300 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2301 __kmp_setup_icv_copy(team, nthreads,
2302 &master_th->th.th_current_task->td_icvs, loc);
2305 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2308 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2311 if (team->t.t_active_level == 1
2312 && !master_th->th.th_teams_microtask) {
2314 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2315 (__kmp_forkjoin_frames_mode == 3 ||
2316 __kmp_forkjoin_frames_mode == 1)) {
2317 kmp_uint64 tmp_time = 0;
2318 if (__itt_get_timestamp_ptr)
2319 tmp_time = __itt_get_timestamp();
2321 master_th->th.th_frame_time = tmp_time;
2322 if (__kmp_forkjoin_frames_mode == 3)
2323 team->t.t_region_time = tmp_time;
2327 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2328 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2330 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2336 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2339 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2340 root, team, master_th, gtid));
2343 if (__itt_stack_caller_create_ptr) {
2346 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2347 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2348 }
else if (parent_team->t.t_serialized) {
2353 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2354 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2362 __kmp_internal_fork(loc, gtid, team);
2363 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2364 "master_th=%p, gtid=%d\n",
2365 root, team, master_th, gtid));
2368 if (call_context == fork_context_gnu) {
2369 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2374 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2375 team->t.t_id, team->t.t_pkfn));
2378 #if KMP_STATS_ENABLED
2382 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2386 if (!team->t.t_invoke(gtid)) {
2387 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2390 #if KMP_STATS_ENABLED
2393 KMP_SET_THREAD_STATE(previous_state);
2397 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2398 team->t.t_id, team->t.t_pkfn));
2401 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2403 if (ompt_enabled.enabled) {
2404 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2412 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2415 thread->th.ompt_thread_info.state =
2416 ((team->t.t_serialized) ? ompt_state_work_serial
2417 : ompt_state_work_parallel);
2420 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2421 kmp_team_t *team, ompt_data_t *parallel_data,
2422 int flags,
void *codeptr) {
2423 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2424 if (ompt_enabled.ompt_callback_parallel_end) {
2425 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2426 parallel_data, &(task_info->task_data), flags, codeptr);
2429 task_info->frame.enter_frame = ompt_data_none;
2430 __kmp_join_restore_state(thread, team);
2434 void __kmp_join_call(
ident_t *loc,
int gtid
2437 enum fork_context_e fork_context
2441 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2443 kmp_team_t *parent_team;
2444 kmp_info_t *master_th;
2448 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2451 master_th = __kmp_threads[gtid];
2452 root = master_th->th.th_root;
2453 team = master_th->th.th_team;
2454 parent_team = team->t.t_parent;
2456 master_th->th.th_ident = loc;
2459 void *team_microtask = (
void *)team->t.t_pkfn;
2463 if (ompt_enabled.enabled &&
2464 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2465 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2470 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2471 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2472 "th_task_team = %p\n",
2473 __kmp_gtid_from_thread(master_th), team,
2474 team->t.t_task_team[master_th->th.th_task_state],
2475 master_th->th.th_task_team));
2476 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2480 if (team->t.t_serialized) {
2481 if (master_th->th.th_teams_microtask) {
2483 int level = team->t.t_level;
2484 int tlevel = master_th->th.th_teams_level;
2485 if (level == tlevel) {
2489 }
else if (level == tlevel + 1) {
2493 team->t.t_serialized++;
2499 if (ompt_enabled.enabled) {
2500 if (fork_context == fork_context_gnu) {
2501 __ompt_lw_taskteam_unlink(master_th);
2503 __kmp_join_restore_state(master_th, parent_team);
2510 master_active = team->t.t_master_active;
2515 __kmp_internal_join(loc, gtid, team);
2517 if (__itt_stack_caller_create_ptr) {
2518 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2520 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2521 team->t.t_stack_id = NULL;
2525 master_th->th.th_task_state =
2528 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2529 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2533 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2534 parent_team->t.t_stack_id = NULL;
2542 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2543 void *codeptr = team->t.ompt_team_info.master_return_address;
2548 if (team->t.t_active_level == 1 &&
2549 (!master_th->th.th_teams_microtask ||
2550 master_th->th.th_teams_size.nteams == 1)) {
2551 master_th->th.th_ident = loc;
2554 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2555 __kmp_forkjoin_frames_mode == 3)
2556 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2557 master_th->th.th_frame_time, 0, loc,
2558 master_th->th.th_team_nproc, 1);
2559 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2560 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2561 __kmp_itt_region_joined(gtid);
2565 #if KMP_AFFINITY_SUPPORTED
2568 master_th->th.th_first_place = team->t.t_first_place;
2569 master_th->th.th_last_place = team->t.t_last_place;
2573 if (master_th->th.th_teams_microtask && !exit_teams &&
2574 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2575 team->t.t_level == master_th->th.th_teams_level + 1) {
2580 ompt_data_t ompt_parallel_data = ompt_data_none;
2581 if (ompt_enabled.enabled) {
2582 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2583 if (ompt_enabled.ompt_callback_implicit_task) {
2584 int ompt_team_size = team->t.t_nproc;
2585 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2586 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2587 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2589 task_info->frame.exit_frame = ompt_data_none;
2590 task_info->task_data = ompt_data_none;
2591 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2592 __ompt_lw_taskteam_unlink(master_th);
2597 team->t.t_active_level--;
2598 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2604 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2605 int old_num = master_th->th.th_team_nproc;
2606 int new_num = master_th->th.th_teams_size.nth;
2607 kmp_info_t **other_threads = team->t.t_threads;
2608 team->t.t_nproc = new_num;
2609 for (
int i = 0; i < old_num; ++i) {
2610 other_threads[i]->th.th_team_nproc = new_num;
2613 for (
int i = old_num; i < new_num; ++i) {
2615 KMP_DEBUG_ASSERT(other_threads[i]);
2616 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2617 for (
int b = 0; b < bs_last_barrier; ++b) {
2618 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2619 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2621 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2624 if (__kmp_tasking_mode != tskm_immediate_exec) {
2626 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2632 if (ompt_enabled.enabled) {
2633 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2634 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2642 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2643 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2645 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2650 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2652 if (!master_th->th.th_teams_microtask ||
2653 team->t.t_level > master_th->th.th_teams_level) {
2655 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2657 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2660 if (ompt_enabled.enabled) {
2661 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2662 if (ompt_enabled.ompt_callback_implicit_task) {
2663 int flags = (team_microtask == (
void *)__kmp_teams_master)
2665 : ompt_task_implicit;
2666 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2667 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2668 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2669 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2671 task_info->frame.exit_frame = ompt_data_none;
2672 task_info->task_data = ompt_data_none;
2676 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2678 __kmp_pop_current_task_from_thread(master_th);
2680 master_th->th.th_def_allocator = team->t.t_def_allocator;
2683 if (ompd_state & OMPD_ENABLE_BP)
2684 ompd_bp_parallel_end();
2686 updateHWFPControl(team);
2688 if (root->r.r_active != master_active)
2689 root->r.r_active = master_active;
2691 __kmp_free_team(root, team, master_th);
2698 master_th->th.th_team = parent_team;
2699 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2700 master_th->th.th_team_master = parent_team->t.t_threads[0];
2701 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2704 if (parent_team->t.t_serialized &&
2705 parent_team != master_th->th.th_serial_team &&
2706 parent_team != root->r.r_root_team) {
2707 __kmp_free_team(root, master_th->th.th_serial_team, NULL);
2708 master_th->th.th_serial_team = parent_team;
2711 if (__kmp_tasking_mode != tskm_immediate_exec) {
2713 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2714 team->t.t_primary_task_state == 1);
2715 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2718 master_th->th.th_task_team =
2719 parent_team->t.t_task_team[master_th->th.th_task_state];
2721 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2722 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2729 master_th->th.th_current_task->td_flags.executing = 1;
2731 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2733 #if KMP_AFFINITY_SUPPORTED
2734 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2735 __kmp_reset_root_init_mask(gtid);
2740 OMPT_INVOKER(fork_context) |
2741 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2742 : ompt_parallel_team);
2743 if (ompt_enabled.enabled) {
2744 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2750 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2755 void __kmp_save_internal_controls(kmp_info_t *thread) {
2757 if (thread->th.th_team != thread->th.th_serial_team) {
2760 if (thread->th.th_team->t.t_serialized > 1) {
2763 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2766 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2767 thread->th.th_team->t.t_serialized) {
2772 kmp_internal_control_t *control =
2773 (kmp_internal_control_t *)__kmp_allocate(
2774 sizeof(kmp_internal_control_t));
2776 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2778 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2780 control->next = thread->th.th_team->t.t_control_stack_top;
2781 thread->th.th_team->t.t_control_stack_top = control;
2787 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2791 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2792 KMP_DEBUG_ASSERT(__kmp_init_serial);
2796 else if (new_nth > __kmp_max_nth)
2797 new_nth = __kmp_max_nth;
2800 thread = __kmp_threads[gtid];
2801 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2804 __kmp_save_internal_controls(thread);
2806 set__nproc(thread, new_nth);
2811 root = thread->th.th_root;
2812 if (__kmp_init_parallel && (!root->r.r_active) &&
2813 (root->r.r_hot_team->t.t_nproc > new_nth) && __kmp_hot_teams_max_level &&
2814 !__kmp_hot_teams_mode) {
2815 kmp_team_t *hot_team = root->r.r_hot_team;
2818 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2820 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2821 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2824 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2825 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2826 if (__kmp_tasking_mode != tskm_immediate_exec) {
2829 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2831 __kmp_free_thread(hot_team->t.t_threads[f]);
2832 hot_team->t.t_threads[f] = NULL;
2834 hot_team->t.t_nproc = new_nth;
2835 if (thread->th.th_hot_teams) {
2836 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2837 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2840 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2841 hot_team->t.b->update_num_threads(new_nth);
2842 __kmp_add_threads_to_team(hot_team, new_nth);
2845 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2848 for (f = 0; f < new_nth; f++) {
2849 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2850 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2853 hot_team->t.t_size_changed = -1;
2858 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2861 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2863 gtid, max_active_levels));
2864 KMP_DEBUG_ASSERT(__kmp_init_serial);
2867 if (max_active_levels < 0) {
2868 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2873 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2874 "max_active_levels for thread %d = (%d)\n",
2875 gtid, max_active_levels));
2878 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2883 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2884 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2885 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2891 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2892 "max_active_levels for thread %d = (%d)\n",
2893 gtid, max_active_levels));
2895 thread = __kmp_threads[gtid];
2897 __kmp_save_internal_controls(thread);
2899 set__max_active_levels(thread, max_active_levels);
2903 int __kmp_get_max_active_levels(
int gtid) {
2906 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2907 KMP_DEBUG_ASSERT(__kmp_init_serial);
2909 thread = __kmp_threads[gtid];
2910 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2911 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2912 "curtask_maxaclevel=%d\n",
2913 gtid, thread->th.th_current_task,
2914 thread->th.th_current_task->td_icvs.max_active_levels));
2915 return thread->th.th_current_task->td_icvs.max_active_levels;
2919 void __kmp_set_num_teams(
int num_teams) {
2921 __kmp_nteams = num_teams;
2923 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2925 void __kmp_set_teams_thread_limit(
int limit) {
2927 __kmp_teams_thread_limit = limit;
2929 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2931 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2932 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2935 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2937 kmp_sched_t orig_kind;
2940 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2941 gtid, (
int)kind, chunk));
2942 KMP_DEBUG_ASSERT(__kmp_init_serial);
2949 kind = __kmp_sched_without_mods(kind);
2951 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2952 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2954 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2955 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2957 kind = kmp_sched_default;
2961 thread = __kmp_threads[gtid];
2963 __kmp_save_internal_controls(thread);
2965 if (kind < kmp_sched_upper_std) {
2966 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2969 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2971 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2972 __kmp_sch_map[kind - kmp_sched_lower - 1];
2977 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2978 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2979 kmp_sched_lower - 2];
2981 __kmp_sched_apply_mods_intkind(
2982 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2983 if (kind == kmp_sched_auto || chunk < 1) {
2985 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2987 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2992 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2996 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2997 KMP_DEBUG_ASSERT(__kmp_init_serial);
2999 thread = __kmp_threads[gtid];
3001 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3002 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3004 case kmp_sch_static_greedy:
3005 case kmp_sch_static_balanced:
3006 *kind = kmp_sched_static;
3007 __kmp_sched_apply_mods_stdkind(kind, th_type);
3010 case kmp_sch_static_chunked:
3011 *kind = kmp_sched_static;
3013 case kmp_sch_dynamic_chunked:
3014 *kind = kmp_sched_dynamic;
3017 case kmp_sch_guided_iterative_chunked:
3018 case kmp_sch_guided_analytical_chunked:
3019 *kind = kmp_sched_guided;
3022 *kind = kmp_sched_auto;
3024 case kmp_sch_trapezoidal:
3025 *kind = kmp_sched_trapezoidal;
3027 #if KMP_STATIC_STEAL_ENABLED
3028 case kmp_sch_static_steal:
3029 *kind = kmp_sched_static_steal;
3033 KMP_FATAL(UnknownSchedulingType, th_type);
3036 __kmp_sched_apply_mods_stdkind(kind, th_type);
3037 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3040 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3046 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3047 KMP_DEBUG_ASSERT(__kmp_init_serial);
3054 thr = __kmp_threads[gtid];
3055 team = thr->th.th_team;
3056 ii = team->t.t_level;
3060 if (thr->th.th_teams_microtask) {
3062 int tlevel = thr->th.th_teams_level;
3065 KMP_DEBUG_ASSERT(ii >= tlevel);
3077 return __kmp_tid_from_gtid(gtid);
3079 dd = team->t.t_serialized;
3081 while (ii > level) {
3082 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3084 if ((team->t.t_serialized) && (!dd)) {
3085 team = team->t.t_parent;
3089 team = team->t.t_parent;
3090 dd = team->t.t_serialized;
3095 return (dd > 1) ? (0) : (team->t.t_master_tid);
3098 int __kmp_get_team_size(
int gtid,
int level) {
3104 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3105 KMP_DEBUG_ASSERT(__kmp_init_serial);
3112 thr = __kmp_threads[gtid];
3113 team = thr->th.th_team;
3114 ii = team->t.t_level;
3118 if (thr->th.th_teams_microtask) {
3120 int tlevel = thr->th.th_teams_level;
3123 KMP_DEBUG_ASSERT(ii >= tlevel);
3134 while (ii > level) {
3135 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3137 if (team->t.t_serialized && (!dd)) {
3138 team = team->t.t_parent;
3142 team = team->t.t_parent;
3147 return team->t.t_nproc;
3150 kmp_r_sched_t __kmp_get_schedule_global() {
3155 kmp_r_sched_t r_sched;
3161 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3162 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3165 r_sched.r_sched_type = __kmp_static;
3168 r_sched.r_sched_type = __kmp_guided;
3170 r_sched.r_sched_type = __kmp_sched;
3172 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3174 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3176 r_sched.chunk = KMP_DEFAULT_CHUNK;
3178 r_sched.chunk = __kmp_chunk;
3186 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3188 KMP_DEBUG_ASSERT(team);
3189 if (!realloc || argc > team->t.t_max_argc) {
3191 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3192 "current entries=%d\n",
3193 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3195 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3196 __kmp_free((
void *)team->t.t_argv);
3198 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3200 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3201 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3203 team->t.t_id, team->t.t_max_argc));
3204 team->t.t_argv = &team->t.t_inline_argv[0];
3205 if (__kmp_storage_map) {
3206 __kmp_print_storage_map_gtid(
3207 -1, &team->t.t_inline_argv[0],
3208 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3209 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3214 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3215 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3217 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3219 team->t.t_id, team->t.t_max_argc));
3221 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3222 if (__kmp_storage_map) {
3223 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3224 &team->t.t_argv[team->t.t_max_argc],
3225 sizeof(
void *) * team->t.t_max_argc,
3226 "team_%d.t_argv", team->t.t_id);
3232 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3234 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3236 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3237 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3238 sizeof(dispatch_shared_info_t) * num_disp_buff);
3239 team->t.t_dispatch =
3240 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3241 team->t.t_implicit_task_taskdata =
3242 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3243 team->t.t_max_nproc = max_nth;
3246 for (i = 0; i < num_disp_buff; ++i) {
3247 team->t.t_disp_buffer[i].buffer_index = i;
3248 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3252 static void __kmp_free_team_arrays(kmp_team_t *team) {
3255 for (i = 0; i < team->t.t_max_nproc; ++i) {
3256 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3257 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3258 team->t.t_dispatch[i].th_disp_buffer = NULL;
3261 #if KMP_USE_HIER_SCHED
3262 __kmp_dispatch_free_hierarchies(team);
3264 __kmp_free(team->t.t_threads);
3265 __kmp_free(team->t.t_disp_buffer);
3266 __kmp_free(team->t.t_dispatch);
3267 __kmp_free(team->t.t_implicit_task_taskdata);
3268 team->t.t_threads = NULL;
3269 team->t.t_disp_buffer = NULL;
3270 team->t.t_dispatch = NULL;
3271 team->t.t_implicit_task_taskdata = 0;
3274 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3275 kmp_info_t **oldThreads = team->t.t_threads;
3277 __kmp_free(team->t.t_disp_buffer);
3278 __kmp_free(team->t.t_dispatch);
3279 __kmp_free(team->t.t_implicit_task_taskdata);
3280 __kmp_allocate_team_arrays(team, max_nth);
3282 KMP_MEMCPY(team->t.t_threads, oldThreads,
3283 team->t.t_nproc *
sizeof(kmp_info_t *));
3285 __kmp_free(oldThreads);
3288 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3290 kmp_r_sched_t r_sched =
3291 __kmp_get_schedule_global();
3293 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3295 kmp_internal_control_t g_icvs = {
3297 (kmp_int8)__kmp_global.g.g_dynamic,
3299 (kmp_int8)__kmp_env_blocktime,
3301 __kmp_dflt_blocktime,
3306 __kmp_dflt_team_nth,
3312 __kmp_dflt_max_active_levels,
3316 __kmp_nested_proc_bind.bind_types[0],
3317 __kmp_default_device,
3324 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3326 kmp_internal_control_t gx_icvs;
3327 gx_icvs.serial_nesting_level =
3329 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3330 gx_icvs.next = NULL;
3335 static void __kmp_initialize_root(kmp_root_t *root) {
3337 kmp_team_t *root_team;
3338 kmp_team_t *hot_team;
3339 int hot_team_max_nth;
3340 kmp_r_sched_t r_sched =
3341 __kmp_get_schedule_global();
3342 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3343 KMP_DEBUG_ASSERT(root);
3344 KMP_ASSERT(!root->r.r_begin);
3347 __kmp_init_lock(&root->r.r_begin_lock);
3348 root->r.r_begin = FALSE;
3349 root->r.r_active = FALSE;
3350 root->r.r_in_parallel = 0;
3351 root->r.r_blocktime = __kmp_dflt_blocktime;
3352 #if KMP_AFFINITY_SUPPORTED
3353 root->r.r_affinity_assigned = FALSE;
3358 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3360 root_team = __kmp_allocate_team(root,
3366 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3373 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3376 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3378 root->r.r_root_team = root_team;
3379 root_team->t.t_control_stack_top = NULL;
3382 root_team->t.t_threads[0] = NULL;
3383 root_team->t.t_nproc = 1;
3384 root_team->t.t_serialized = 1;
3386 root_team->t.t_sched.sched = r_sched.sched;
3387 root_team->t.t_nested_nth = &__kmp_nested_nth;
3390 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3391 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3395 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3397 hot_team = __kmp_allocate_team(root,
3399 __kmp_dflt_team_nth_ub * 2,
3403 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3407 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3409 root->r.r_hot_team = hot_team;
3410 root_team->t.t_control_stack_top = NULL;
3413 hot_team->t.t_parent = root_team;
3416 hot_team_max_nth = hot_team->t.t_max_nproc;
3417 for (f = 0; f < hot_team_max_nth; ++f) {
3418 hot_team->t.t_threads[f] = NULL;
3420 hot_team->t.t_nproc = 1;
3422 hot_team->t.t_sched.sched = r_sched.sched;
3423 hot_team->t.t_size_changed = 0;
3424 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3429 typedef struct kmp_team_list_item {
3430 kmp_team_p
const *entry;
3431 struct kmp_team_list_item *next;
3432 } kmp_team_list_item_t;
3433 typedef kmp_team_list_item_t *kmp_team_list_t;
3435 static void __kmp_print_structure_team_accum(
3436 kmp_team_list_t list,
3437 kmp_team_p
const *team
3447 KMP_DEBUG_ASSERT(list != NULL);
3452 __kmp_print_structure_team_accum(list, team->t.t_parent);
3453 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3457 while (l->next != NULL && l->entry != team) {
3460 if (l->next != NULL) {
3466 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3472 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3473 sizeof(kmp_team_list_item_t));
3480 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3483 __kmp_printf(
"%s", title);
3485 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3487 __kmp_printf(
" - (nil)\n");
3491 static void __kmp_print_structure_thread(
char const *title,
3492 kmp_info_p
const *thread) {
3493 __kmp_printf(
"%s", title);
3494 if (thread != NULL) {
3495 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3497 __kmp_printf(
" - (nil)\n");
3501 void __kmp_print_structure(
void) {
3503 kmp_team_list_t list;
3507 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3511 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3512 "Table\n------------------------------\n");
3515 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3516 __kmp_printf(
"%2d", gtid);
3517 if (__kmp_threads != NULL) {
3518 __kmp_printf(
" %p", __kmp_threads[gtid]);
3520 if (__kmp_root != NULL) {
3521 __kmp_printf(
" %p", __kmp_root[gtid]);
3528 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3530 if (__kmp_threads != NULL) {
3532 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3533 kmp_info_t
const *thread = __kmp_threads[gtid];
3534 if (thread != NULL) {
3535 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3536 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3537 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3538 __kmp_print_structure_team(
" Serial Team: ",
3539 thread->th.th_serial_team);
3540 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3541 __kmp_print_structure_thread(
" Primary: ",
3542 thread->th.th_team_master);
3543 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3544 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3545 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3546 __kmp_print_structure_thread(
" Next in pool: ",
3547 thread->th.th_next_pool);
3549 __kmp_print_structure_team_accum(list, thread->th.th_team);
3550 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3554 __kmp_printf(
"Threads array is not allocated.\n");
3558 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3560 if (__kmp_root != NULL) {
3562 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3563 kmp_root_t
const *root = __kmp_root[gtid];
3565 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3566 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3567 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3568 __kmp_print_structure_thread(
" Uber Thread: ",
3569 root->r.r_uber_thread);
3570 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3571 __kmp_printf(
" In Parallel: %2d\n",
3572 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3574 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3575 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3579 __kmp_printf(
"Ubers array is not allocated.\n");
3582 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3584 while (list->next != NULL) {
3585 kmp_team_p
const *team = list->entry;
3587 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3588 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3589 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3590 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3591 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3592 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3593 for (i = 0; i < team->t.t_nproc; ++i) {
3594 __kmp_printf(
" Thread %2d: ", i);
3595 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3597 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3603 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3605 __kmp_print_structure_thread(
"Thread pool: ",
3606 CCAST(kmp_info_t *, __kmp_thread_pool));
3607 __kmp_print_structure_team(
"Team pool: ",
3608 CCAST(kmp_team_t *, __kmp_team_pool));
3612 while (list != NULL) {
3613 kmp_team_list_item_t *item = list;
3615 KMP_INTERNAL_FREE(item);
3624 static const unsigned __kmp_primes[] = {
3625 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3626 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3627 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3628 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3629 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3630 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3631 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3632 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3633 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3634 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3635 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3639 unsigned short __kmp_get_random(kmp_info_t *thread) {
3640 unsigned x = thread->th.th_x;
3641 unsigned short r = (
unsigned short)(x >> 16);
3643 thread->th.th_x = x * thread->th.th_a + 1;
3645 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3646 thread->th.th_info.ds.ds_tid, r));
3652 void __kmp_init_random(kmp_info_t *thread) {
3653 unsigned seed = thread->th.th_info.ds.ds_tid;
3656 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3657 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3659 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3665 static int __kmp_reclaim_dead_roots(
void) {
3668 for (i = 0; i < __kmp_threads_capacity; ++i) {
3669 if (KMP_UBER_GTID(i) &&
3670 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3673 r += __kmp_unregister_root_other_thread(i);
3698 static int __kmp_expand_threads(
int nNeed) {
3700 int minimumRequiredCapacity;
3702 kmp_info_t **newThreads;
3703 kmp_root_t **newRoot;
3709 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3712 added = __kmp_reclaim_dead_roots();
3741 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3744 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3748 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3750 newCapacity = __kmp_threads_capacity;
3752 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3753 : __kmp_sys_max_nth;
3754 }
while (newCapacity < minimumRequiredCapacity);
3755 newThreads = (kmp_info_t **)__kmp_allocate(
3756 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3758 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3759 KMP_MEMCPY(newThreads, __kmp_threads,
3760 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3761 KMP_MEMCPY(newRoot, __kmp_root,
3762 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3765 kmp_old_threads_list_t *node =
3766 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3767 node->threads = __kmp_threads;
3768 node->next = __kmp_old_threads_list;
3769 __kmp_old_threads_list = node;
3771 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3772 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3773 added += newCapacity - __kmp_threads_capacity;
3774 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3776 if (newCapacity > __kmp_tp_capacity) {
3777 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3778 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3779 __kmp_threadprivate_resize_cache(newCapacity);
3781 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3783 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3792 int __kmp_register_root(
int initial_thread) {
3793 kmp_info_t *root_thread;
3797 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3798 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3815 capacity = __kmp_threads_capacity;
3816 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3823 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3824 capacity -= __kmp_hidden_helper_threads_num;
3828 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3829 if (__kmp_tp_cached) {
3830 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3831 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3832 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3834 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3844 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3847 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3848 gtid <= __kmp_hidden_helper_threads_num;
3851 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3852 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3853 "hidden helper thread: T#%d\n",
3859 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3862 for (gtid = __kmp_hidden_helper_threads_num + 1;
3863 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3867 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3868 KMP_ASSERT(gtid < __kmp_threads_capacity);
3873 TCW_4(__kmp_nth, __kmp_nth + 1);
3877 if (__kmp_adjust_gtid_mode) {
3878 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3879 if (TCR_4(__kmp_gtid_mode) != 2) {
3880 TCW_4(__kmp_gtid_mode, 2);
3883 if (TCR_4(__kmp_gtid_mode) != 1) {
3884 TCW_4(__kmp_gtid_mode, 1);
3889 #ifdef KMP_ADJUST_BLOCKTIME
3892 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3893 if (__kmp_nth > __kmp_avail_proc) {
3894 __kmp_zero_bt = TRUE;
3900 if (!(root = __kmp_root[gtid])) {
3901 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3902 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3905 #if KMP_STATS_ENABLED
3907 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3908 __kmp_stats_thread_ptr->startLife();
3909 KMP_SET_THREAD_STATE(SERIAL_REGION);
3912 __kmp_initialize_root(root);
3915 if (root->r.r_uber_thread) {
3916 root_thread = root->r.r_uber_thread;
3918 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3919 if (__kmp_storage_map) {
3920 __kmp_print_thread_storage_map(root_thread, gtid);
3922 root_thread->th.th_info.ds.ds_gtid = gtid;
3924 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3926 root_thread->th.th_root = root;
3927 if (__kmp_env_consistency_check) {
3928 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3931 __kmp_initialize_fast_memory(root_thread);
3935 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3936 __kmp_initialize_bget(root_thread);
3938 __kmp_init_random(root_thread);
3942 if (!root_thread->th.th_serial_team) {
3943 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3944 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3945 root_thread->th.th_serial_team =
3946 __kmp_allocate_team(root, 1, 1,
3950 proc_bind_default, &r_icvs, 0, NULL);
3952 KMP_ASSERT(root_thread->th.th_serial_team);
3953 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3954 root_thread->th.th_serial_team));
3957 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3959 root->r.r_root_team->t.t_threads[0] = root_thread;
3960 root->r.r_hot_team->t.t_threads[0] = root_thread;
3961 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3963 root_thread->th.th_serial_team->t.t_serialized = 0;
3964 root->r.r_uber_thread = root_thread;
3967 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3968 TCW_4(__kmp_init_gtid, TRUE);
3971 __kmp_gtid_set_specific(gtid);
3974 __kmp_itt_thread_name(gtid);
3977 #ifdef KMP_TDATA_GTID
3980 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3981 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3983 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3985 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3986 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3987 KMP_INIT_BARRIER_STATE));
3990 for (b = 0; b < bs_last_barrier; ++b) {
3991 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3993 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3997 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3998 KMP_INIT_BARRIER_STATE);
4000 #if KMP_AFFINITY_SUPPORTED
4001 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4002 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4003 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4004 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4006 root_thread->th.th_def_allocator = __kmp_def_allocator;
4007 root_thread->th.th_prev_level = 0;
4008 root_thread->th.th_prev_num_threads = 1;
4010 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4011 tmp->cg_root = root_thread;
4012 tmp->cg_thread_limit = __kmp_cg_max_nth;
4013 tmp->cg_nthreads = 1;
4014 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4015 " cg_nthreads init to 1\n",
4018 root_thread->th.th_cg_roots = tmp;
4020 __kmp_root_counter++;
4023 if (ompt_enabled.enabled) {
4025 kmp_info_t *root_thread = ompt_get_thread();
4027 ompt_set_thread_state(root_thread, ompt_state_overhead);
4029 if (ompt_enabled.ompt_callback_thread_begin) {
4030 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4031 ompt_thread_initial, __ompt_get_thread_data_internal());
4033 ompt_data_t *task_data;
4034 ompt_data_t *parallel_data;
4035 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4037 if (ompt_enabled.ompt_callback_implicit_task) {
4038 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4039 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4042 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4046 if (ompd_state & OMPD_ENABLE_BP)
4047 ompd_bp_thread_begin();
4051 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4056 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4057 const int max_level) {
4059 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4060 if (!hot_teams || !hot_teams[level].hot_team) {
4063 KMP_DEBUG_ASSERT(level < max_level);
4064 kmp_team_t *team = hot_teams[level].hot_team;
4065 nth = hot_teams[level].hot_team_nth;
4067 if (level < max_level - 1) {
4068 for (i = 0; i < nth; ++i) {
4069 kmp_info_t *th = team->t.t_threads[i];
4070 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4071 if (i > 0 && th->th.th_hot_teams) {
4072 __kmp_free(th->th.th_hot_teams);
4073 th->th.th_hot_teams = NULL;
4077 __kmp_free_team(root, team, NULL);
4083 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4084 kmp_team_t *root_team = root->r.r_root_team;
4085 kmp_team_t *hot_team = root->r.r_hot_team;
4086 int n = hot_team->t.t_nproc;
4089 KMP_DEBUG_ASSERT(!root->r.r_active);
4091 root->r.r_root_team = NULL;
4092 root->r.r_hot_team = NULL;
4095 __kmp_free_team(root, root_team, NULL);
4096 if (__kmp_hot_teams_max_level >
4098 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4099 kmp_info_t *th = hot_team->t.t_threads[i];
4100 if (__kmp_hot_teams_max_level > 1) {
4101 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4103 if (th->th.th_hot_teams) {
4104 __kmp_free(th->th.th_hot_teams);
4105 th->th.th_hot_teams = NULL;
4109 __kmp_free_team(root, hot_team, NULL);
4114 if (__kmp_tasking_mode != tskm_immediate_exec) {
4115 __kmp_wait_to_unref_task_teams();
4121 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4123 (LPVOID) & (root->r.r_uber_thread->th),
4124 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4125 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4129 if (ompd_state & OMPD_ENABLE_BP)
4130 ompd_bp_thread_end();
4134 ompt_data_t *task_data;
4135 ompt_data_t *parallel_data;
4136 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4138 if (ompt_enabled.ompt_callback_implicit_task) {
4139 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4140 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4142 if (ompt_enabled.ompt_callback_thread_end) {
4143 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4144 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4150 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4151 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4153 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4154 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4157 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4158 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4159 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4160 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4161 root->r.r_uber_thread->th.th_cg_roots = NULL;
4163 __kmp_reap_thread(root->r.r_uber_thread, 1);
4167 root->r.r_uber_thread = NULL;
4169 root->r.r_begin = FALSE;
4174 void __kmp_unregister_root_current_thread(
int gtid) {
4175 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4179 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4180 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4181 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4184 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4187 kmp_root_t *root = __kmp_root[gtid];
4189 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4190 KMP_ASSERT(KMP_UBER_GTID(gtid));
4191 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4192 KMP_ASSERT(root->r.r_active == FALSE);
4196 kmp_info_t *thread = __kmp_threads[gtid];
4197 kmp_team_t *team = thread->th.th_team;
4198 kmp_task_team_t *task_team = thread->th.th_task_team;
4201 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4202 task_team->tt.tt_hidden_helper_task_encountered)) {
4205 thread->th.ompt_thread_info.state = ompt_state_undefined;
4207 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4210 __kmp_reset_root(gtid, root);
4214 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4216 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4223 static int __kmp_unregister_root_other_thread(
int gtid) {
4224 kmp_root_t *root = __kmp_root[gtid];
4227 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4228 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4229 KMP_ASSERT(KMP_UBER_GTID(gtid));
4230 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4231 KMP_ASSERT(root->r.r_active == FALSE);
4233 r = __kmp_reset_root(gtid, root);
4235 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4241 void __kmp_task_info() {
4243 kmp_int32 gtid = __kmp_entry_gtid();
4244 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4245 kmp_info_t *this_thr = __kmp_threads[gtid];
4246 kmp_team_t *steam = this_thr->th.th_serial_team;
4247 kmp_team_t *team = this_thr->th.th_team;
4250 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4252 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4253 team->t.t_implicit_task_taskdata[tid].td_parent);
4260 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4261 int tid,
int gtid) {
4265 KMP_DEBUG_ASSERT(this_thr != NULL);
4266 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4267 KMP_DEBUG_ASSERT(team);
4268 KMP_DEBUG_ASSERT(team->t.t_threads);
4269 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4270 kmp_info_t *master = team->t.t_threads[0];
4271 KMP_DEBUG_ASSERT(master);
4272 KMP_DEBUG_ASSERT(master->th.th_root);
4276 TCW_SYNC_PTR(this_thr->th.th_team, team);
4278 this_thr->th.th_info.ds.ds_tid = tid;
4279 this_thr->th.th_set_nproc = 0;
4280 if (__kmp_tasking_mode != tskm_immediate_exec)
4283 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4285 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4286 this_thr->th.th_set_proc_bind = proc_bind_default;
4288 #if KMP_AFFINITY_SUPPORTED
4289 this_thr->th.th_new_place = this_thr->th.th_current_place;
4291 this_thr->th.th_root = master->th.th_root;
4294 this_thr->th.th_team_nproc = team->t.t_nproc;
4295 this_thr->th.th_team_master = master;
4296 this_thr->th.th_team_serialized = team->t.t_serialized;
4298 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4300 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4301 tid, gtid, this_thr, this_thr->th.th_current_task));
4303 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4306 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4307 tid, gtid, this_thr, this_thr->th.th_current_task));
4312 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4314 this_thr->th.th_local.this_construct = 0;
4316 if (!this_thr->th.th_pri_common) {
4317 this_thr->th.th_pri_common =
4318 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4319 if (__kmp_storage_map) {
4320 __kmp_print_storage_map_gtid(
4321 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4322 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4324 this_thr->th.th_pri_head = NULL;
4327 if (this_thr != master &&
4328 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4330 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4331 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4334 int i = tmp->cg_nthreads--;
4335 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4336 " on node %p of thread %p to %d\n",
4337 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4342 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4344 this_thr->th.th_cg_roots->cg_nthreads++;
4345 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4346 " node %p of thread %p to %d\n",
4347 this_thr, this_thr->th.th_cg_roots,
4348 this_thr->th.th_cg_roots->cg_root,
4349 this_thr->th.th_cg_roots->cg_nthreads));
4350 this_thr->th.th_current_task->td_icvs.thread_limit =
4351 this_thr->th.th_cg_roots->cg_thread_limit;
4356 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4359 sizeof(dispatch_private_info_t) *
4360 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4361 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4362 team->t.t_max_nproc));
4363 KMP_ASSERT(dispatch);
4364 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4365 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4367 dispatch->th_disp_index = 0;
4368 dispatch->th_doacross_buf_idx = 0;
4369 if (!dispatch->th_disp_buffer) {
4370 dispatch->th_disp_buffer =
4371 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4373 if (__kmp_storage_map) {
4374 __kmp_print_storage_map_gtid(
4375 gtid, &dispatch->th_disp_buffer[0],
4376 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4378 : __kmp_dispatch_num_buffers],
4380 "th_%d.th_dispatch.th_disp_buffer "
4381 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4382 gtid, team->t.t_id, gtid);
4385 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4388 dispatch->th_dispatch_pr_current = 0;
4389 dispatch->th_dispatch_sh_current = 0;
4391 dispatch->th_deo_fcn = 0;
4392 dispatch->th_dxo_fcn = 0;
4395 this_thr->th.th_next_pool = NULL;
4397 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4398 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4408 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4410 kmp_team_t *serial_team;
4411 kmp_info_t *new_thr;
4414 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4415 KMP_DEBUG_ASSERT(root && team);
4421 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4422 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4423 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4424 if (new_thr == __kmp_thread_pool_insert_pt) {
4425 __kmp_thread_pool_insert_pt = NULL;
4427 TCW_4(new_thr->th.th_in_pool, FALSE);
4428 __kmp_suspend_initialize_thread(new_thr);
4429 __kmp_lock_suspend_mx(new_thr);
4430 if (new_thr->th.th_active_in_pool == TRUE) {
4431 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4432 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4433 new_thr->th.th_active_in_pool = FALSE;
4435 __kmp_unlock_suspend_mx(new_thr);
4437 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4438 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4439 KMP_ASSERT(!new_thr->th.th_team);
4440 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4443 __kmp_initialize_info(new_thr, team, new_tid,
4444 new_thr->th.th_info.ds.ds_gtid);
4445 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4447 TCW_4(__kmp_nth, __kmp_nth + 1);
4449 new_thr->th.th_task_state = 0;
4451 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4453 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4457 #ifdef KMP_ADJUST_BLOCKTIME
4460 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4461 if (__kmp_nth > __kmp_avail_proc) {
4462 __kmp_zero_bt = TRUE;
4471 kmp_balign_t *balign = new_thr->th.th_bar;
4472 for (b = 0; b < bs_last_barrier; ++b)
4473 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4476 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4477 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4484 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4485 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4490 if (!TCR_4(__kmp_init_monitor)) {
4491 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4492 if (!TCR_4(__kmp_init_monitor)) {
4493 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4494 TCW_4(__kmp_init_monitor, 1);
4495 __kmp_create_monitor(&__kmp_monitor);
4496 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4507 while (TCR_4(__kmp_init_monitor) < 2) {
4510 KF_TRACE(10, (
"after monitor thread has started\n"));
4513 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4520 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4522 : __kmp_hidden_helper_threads_num + 1;
4524 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4526 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4529 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4530 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4535 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4537 new_thr->th.th_nt_strict =
false;
4538 new_thr->th.th_nt_loc = NULL;
4539 new_thr->th.th_nt_sev = severity_fatal;
4540 new_thr->th.th_nt_msg = NULL;
4542 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4544 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4547 __itt_suppress_mark_range(
4548 __itt_suppress_range, __itt_suppress_threading_errors,
4549 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4550 __itt_suppress_mark_range(
4551 __itt_suppress_range, __itt_suppress_threading_errors,
4552 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4554 __itt_suppress_mark_range(
4555 __itt_suppress_range, __itt_suppress_threading_errors,
4556 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4558 __itt_suppress_mark_range(__itt_suppress_range,
4559 __itt_suppress_threading_errors,
4560 &new_thr->th.th_suspend_init_count,
4561 sizeof(new_thr->th.th_suspend_init_count));
4564 __itt_suppress_mark_range(__itt_suppress_range,
4565 __itt_suppress_threading_errors,
4566 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4567 sizeof(new_thr->th.th_bar[0].bb.b_go));
4568 __itt_suppress_mark_range(__itt_suppress_range,
4569 __itt_suppress_threading_errors,
4570 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4571 sizeof(new_thr->th.th_bar[1].bb.b_go));
4572 __itt_suppress_mark_range(__itt_suppress_range,
4573 __itt_suppress_threading_errors,
4574 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4575 sizeof(new_thr->th.th_bar[2].bb.b_go));
4577 if (__kmp_storage_map) {
4578 __kmp_print_thread_storage_map(new_thr, new_gtid);
4583 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4584 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4585 new_thr->th.th_serial_team = serial_team =
4586 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4590 proc_bind_default, &r_icvs, 0, NULL);
4592 KMP_ASSERT(serial_team);
4593 serial_team->t.t_serialized = 0;
4595 serial_team->t.t_threads[0] = new_thr;
4597 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4601 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4604 __kmp_initialize_fast_memory(new_thr);
4608 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4609 __kmp_initialize_bget(new_thr);
4612 __kmp_init_random(new_thr);
4616 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4617 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4620 kmp_balign_t *balign = new_thr->th.th_bar;
4621 for (b = 0; b < bs_last_barrier; ++b) {
4622 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4623 balign[b].bb.team = NULL;
4624 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4625 balign[b].bb.use_oncore_barrier = 0;
4628 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4629 new_thr->th.th_sleep_loc_type = flag_unset;
4631 new_thr->th.th_spin_here = FALSE;
4632 new_thr->th.th_next_waiting = 0;
4634 new_thr->th.th_blocking =
false;
4637 #if KMP_AFFINITY_SUPPORTED
4638 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4639 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4640 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4641 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4643 new_thr->th.th_def_allocator = __kmp_def_allocator;
4644 new_thr->th.th_prev_level = 0;
4645 new_thr->th.th_prev_num_threads = 1;
4647 TCW_4(new_thr->th.th_in_pool, FALSE);
4648 new_thr->th.th_active_in_pool = FALSE;
4649 TCW_4(new_thr->th.th_active, TRUE);
4651 new_thr->th.th_set_nested_nth = NULL;
4652 new_thr->th.th_set_nested_nth_sz = 0;
4660 if (__kmp_adjust_gtid_mode) {
4661 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4662 if (TCR_4(__kmp_gtid_mode) != 2) {
4663 TCW_4(__kmp_gtid_mode, 2);
4666 if (TCR_4(__kmp_gtid_mode) != 1) {
4667 TCW_4(__kmp_gtid_mode, 1);
4672 #ifdef KMP_ADJUST_BLOCKTIME
4675 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4676 if (__kmp_nth > __kmp_avail_proc) {
4677 __kmp_zero_bt = TRUE;
4682 #if KMP_AFFINITY_SUPPORTED
4684 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4689 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4690 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4692 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4694 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4705 static void __kmp_reinitialize_team(kmp_team_t *team,
4706 kmp_internal_control_t *new_icvs,
4708 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4709 team->t.t_threads[0], team));
4710 KMP_DEBUG_ASSERT(team && new_icvs);
4711 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4712 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4714 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4716 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4717 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4719 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4720 team->t.t_threads[0], team));
4726 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4727 kmp_internal_control_t *new_icvs,
4729 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4732 KMP_DEBUG_ASSERT(team);
4733 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4734 KMP_DEBUG_ASSERT(team->t.t_threads);
4737 team->t.t_master_tid = 0;
4739 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4740 team->t.t_nproc = new_nproc;
4743 team->t.t_next_pool = NULL;
4747 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4748 team->t.t_invoke = NULL;
4751 team->t.t_sched.sched = new_icvs->sched.sched;
4753 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4754 team->t.t_fp_control_saved = FALSE;
4755 team->t.t_x87_fpu_control_word = 0;
4756 team->t.t_mxcsr = 0;
4759 team->t.t_construct = 0;
4761 team->t.t_ordered.dt.t_value = 0;
4762 team->t.t_master_active = FALSE;
4765 team->t.t_copypriv_data = NULL;
4768 team->t.t_copyin_counter = 0;
4771 team->t.t_control_stack_top = NULL;
4773 __kmp_reinitialize_team(team, new_icvs, loc);
4776 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4779 #if KMP_AFFINITY_SUPPORTED
4780 static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4781 int first,
int last,
int newp) {
4782 th->th.th_first_place = first;
4783 th->th.th_last_place = last;
4784 th->th.th_new_place = newp;
4785 if (newp != th->th.th_current_place) {
4786 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4787 team->t.t_display_affinity = 1;
4789 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4790 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4798 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4800 if (KMP_HIDDEN_HELPER_TEAM(team))
4803 kmp_info_t *master_th = team->t.t_threads[0];
4804 KMP_DEBUG_ASSERT(master_th != NULL);
4805 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4806 int first_place = master_th->th.th_first_place;
4807 int last_place = master_th->th.th_last_place;
4808 int masters_place = master_th->th.th_current_place;
4809 int num_masks = __kmp_affinity.num_masks;
4810 team->t.t_first_place = first_place;
4811 team->t.t_last_place = last_place;
4813 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4814 "bound to place %d partition = [%d,%d]\n",
4815 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4816 team->t.t_id, masters_place, first_place, last_place));
4818 switch (proc_bind) {
4820 case proc_bind_default:
4823 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4826 case proc_bind_primary: {
4828 int n_th = team->t.t_nproc;
4829 for (f = 1; f < n_th; f++) {
4830 kmp_info_t *th = team->t.t_threads[f];
4831 KMP_DEBUG_ASSERT(th != NULL);
4832 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4834 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4835 "partition = [%d,%d]\n",
4836 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4837 f, masters_place, first_place, last_place));
4841 case proc_bind_close: {
4843 int n_th = team->t.t_nproc;
4845 if (first_place <= last_place) {
4846 n_places = last_place - first_place + 1;
4848 n_places = num_masks - first_place + last_place + 1;
4850 if (n_th <= n_places) {
4851 int place = masters_place;
4852 for (f = 1; f < n_th; f++) {
4853 kmp_info_t *th = team->t.t_threads[f];
4854 KMP_DEBUG_ASSERT(th != NULL);
4856 if (place == last_place) {
4857 place = first_place;
4858 }
else if (place == (num_masks - 1)) {
4863 __kmp_set_thread_place(team, th, first_place, last_place, place);
4865 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4866 "partition = [%d,%d]\n",
4867 __kmp_gtid_from_thread(team->t.t_threads[f]),
4868 team->t.t_id, f, place, first_place, last_place));
4871 int S, rem, gap, s_count;
4872 S = n_th / n_places;
4874 rem = n_th - (S * n_places);
4875 gap = rem > 0 ? n_places / rem : n_places;
4876 int place = masters_place;
4878 for (f = 0; f < n_th; f++) {
4879 kmp_info_t *th = team->t.t_threads[f];
4880 KMP_DEBUG_ASSERT(th != NULL);
4882 __kmp_set_thread_place(team, th, first_place, last_place, place);
4885 if ((s_count == S) && rem && (gap_ct == gap)) {
4887 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4889 if (place == last_place) {
4890 place = first_place;
4891 }
else if (place == (num_masks - 1)) {
4899 }
else if (s_count == S) {
4900 if (place == last_place) {
4901 place = first_place;
4902 }
else if (place == (num_masks - 1)) {
4912 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4913 "partition = [%d,%d]\n",
4914 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4915 th->th.th_new_place, first_place, last_place));
4917 KMP_DEBUG_ASSERT(place == masters_place);
4921 case proc_bind_spread: {
4923 int n_th = team->t.t_nproc;
4926 if (first_place <= last_place) {
4927 n_places = last_place - first_place + 1;
4929 n_places = num_masks - first_place + last_place + 1;
4931 if (n_th <= n_places) {
4934 if (n_places != num_masks) {
4935 int S = n_places / n_th;
4936 int s_count, rem, gap, gap_ct;
4938 place = masters_place;
4939 rem = n_places - n_th * S;
4940 gap = rem ? n_th / rem : 1;
4943 if (update_master_only == 1)
4945 for (f = 0; f < thidx; f++) {
4946 kmp_info_t *th = team->t.t_threads[f];
4947 KMP_DEBUG_ASSERT(th != NULL);
4949 int fplace = place, nplace = place;
4951 while (s_count < S) {
4952 if (place == last_place) {
4953 place = first_place;
4954 }
else if (place == (num_masks - 1)) {
4961 if (rem && (gap_ct == gap)) {
4962 if (place == last_place) {
4963 place = first_place;
4964 }
else if (place == (num_masks - 1)) {
4972 __kmp_set_thread_place(team, th, fplace, place, nplace);
4975 if (place == last_place) {
4976 place = first_place;
4977 }
else if (place == (num_masks - 1)) {
4984 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4985 "partition = [%d,%d], num_masks: %u\n",
4986 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4987 f, th->th.th_new_place, th->th.th_first_place,
4988 th->th.th_last_place, num_masks));
4994 double current =
static_cast<double>(masters_place);
4996 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5001 if (update_master_only == 1)
5003 for (f = 0; f < thidx; f++) {
5004 first =
static_cast<int>(current);
5005 last =
static_cast<int>(current + spacing) - 1;
5006 KMP_DEBUG_ASSERT(last >= first);
5007 if (first >= n_places) {
5008 if (masters_place) {
5011 if (first == (masters_place + 1)) {
5012 KMP_DEBUG_ASSERT(f == n_th);
5015 if (last == masters_place) {
5016 KMP_DEBUG_ASSERT(f == (n_th - 1));
5020 KMP_DEBUG_ASSERT(f == n_th);
5025 if (last >= n_places) {
5026 last = (n_places - 1);
5031 KMP_DEBUG_ASSERT(0 <= first);
5032 KMP_DEBUG_ASSERT(n_places > first);
5033 KMP_DEBUG_ASSERT(0 <= last);
5034 KMP_DEBUG_ASSERT(n_places > last);
5035 KMP_DEBUG_ASSERT(last_place >= first_place);
5036 th = team->t.t_threads[f];
5037 KMP_DEBUG_ASSERT(th);
5038 __kmp_set_thread_place(team, th, first, last, place);
5040 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5041 "partition = [%d,%d], spacing = %.4f\n",
5042 __kmp_gtid_from_thread(team->t.t_threads[f]),
5043 team->t.t_id, f, th->th.th_new_place,
5044 th->th.th_first_place, th->th.th_last_place, spacing));
5048 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5050 int S, rem, gap, s_count;
5051 S = n_th / n_places;
5053 rem = n_th - (S * n_places);
5054 gap = rem > 0 ? n_places / rem : n_places;
5055 int place = masters_place;
5058 if (update_master_only == 1)
5060 for (f = 0; f < thidx; f++) {
5061 kmp_info_t *th = team->t.t_threads[f];
5062 KMP_DEBUG_ASSERT(th != NULL);
5064 __kmp_set_thread_place(team, th, place, place, place);
5067 if ((s_count == S) && rem && (gap_ct == gap)) {
5069 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5071 if (place == last_place) {
5072 place = first_place;
5073 }
else if (place == (num_masks - 1)) {
5081 }
else if (s_count == S) {
5082 if (place == last_place) {
5083 place = first_place;
5084 }
else if (place == (num_masks - 1)) {
5093 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5094 "partition = [%d,%d]\n",
5095 __kmp_gtid_from_thread(team->t.t_threads[f]),
5096 team->t.t_id, f, th->th.th_new_place,
5097 th->th.th_first_place, th->th.th_last_place));
5099 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5107 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5114 kmp_team_t *__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5116 ompt_data_t ompt_parallel_data,
5118 kmp_proc_bind_t new_proc_bind,
5119 kmp_internal_control_t *new_icvs,
int argc,
5120 kmp_info_t *master) {
5121 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5124 int use_hot_team = !root->r.r_active;
5126 int do_place_partition = 1;
5128 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5129 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5130 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5133 kmp_hot_team_ptr_t *hot_teams;
5135 team = master->th.th_team;
5136 level = team->t.t_active_level;
5137 if (master->th.th_teams_microtask) {
5138 if (master->th.th_teams_size.nteams > 1 &&
5141 (microtask_t)__kmp_teams_master ||
5142 master->th.th_teams_level <
5149 if ((master->th.th_teams_size.nteams == 1 &&
5150 master->th.th_teams_level >= team->t.t_level) ||
5151 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5152 do_place_partition = 0;
5154 hot_teams = master->th.th_hot_teams;
5155 if (level < __kmp_hot_teams_max_level && hot_teams &&
5156 hot_teams[level].hot_team) {
5164 KMP_DEBUG_ASSERT(new_nproc == 1);
5167 if (use_hot_team && new_nproc > 1) {
5168 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5169 team = hot_teams[level].hot_team;
5171 if (__kmp_tasking_mode != tskm_immediate_exec) {
5172 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5173 "task_team[1] = %p before reinit\n",
5174 team->t.t_task_team[0], team->t.t_task_team[1]));
5178 if (team->t.t_nproc != new_nproc &&
5179 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5181 int old_nthr = team->t.t_nproc;
5182 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5187 if (do_place_partition == 0)
5188 team->t.t_proc_bind = proc_bind_default;
5192 if (team->t.t_nproc == new_nproc) {
5193 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5196 if (team->t.t_size_changed == -1) {
5197 team->t.t_size_changed = 1;
5199 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5203 kmp_r_sched_t new_sched = new_icvs->sched;
5205 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5207 __kmp_reinitialize_team(team, new_icvs,
5208 root->r.r_uber_thread->th.th_ident);
5210 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5211 team->t.t_threads[0], team));
5212 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5214 #if KMP_AFFINITY_SUPPORTED
5215 if ((team->t.t_size_changed == 0) &&
5216 (team->t.t_proc_bind == new_proc_bind)) {
5217 if (new_proc_bind == proc_bind_spread) {
5218 if (do_place_partition) {
5220 __kmp_partition_places(team, 1);
5223 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5224 "proc_bind = %d, partition = [%d,%d]\n",
5225 team->t.t_id, new_proc_bind, team->t.t_first_place,
5226 team->t.t_last_place));
5228 if (do_place_partition) {
5229 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5230 __kmp_partition_places(team);
5234 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5236 }
else if (team->t.t_nproc > new_nproc) {
5238 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5241 team->t.t_size_changed = 1;
5242 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5245 __kmp_add_threads_to_team(team, new_nproc);
5249 if (__kmp_tasking_mode != tskm_immediate_exec) {
5250 for (f = new_nproc; f < team->t.t_nproc; f++) {
5251 kmp_info_t *th = team->t.t_threads[f];
5252 KMP_DEBUG_ASSERT(th);
5253 th->th.th_task_team = NULL;
5256 if (__kmp_hot_teams_mode == 0) {
5259 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5260 hot_teams[level].hot_team_nth = new_nproc;
5262 for (f = new_nproc; f < team->t.t_nproc; f++) {
5263 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5264 __kmp_free_thread(team->t.t_threads[f]);
5265 team->t.t_threads[f] = NULL;
5271 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5272 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5273 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5274 for (
int b = 0; b < bs_last_barrier; ++b) {
5275 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5276 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5278 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5282 team->t.t_nproc = new_nproc;
5284 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5285 __kmp_reinitialize_team(team, new_icvs,
5286 root->r.r_uber_thread->th.th_ident);
5289 for (f = 0; f < new_nproc; ++f) {
5290 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5295 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5296 team->t.t_threads[0], team));
5298 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5301 for (f = 0; f < team->t.t_nproc; f++) {
5302 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5303 team->t.t_threads[f]->th.th_team_nproc ==
5308 if (do_place_partition) {
5309 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5310 #if KMP_AFFINITY_SUPPORTED
5311 __kmp_partition_places(team);
5317 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5319 int old_nproc = team->t.t_nproc;
5320 team->t.t_size_changed = 1;
5322 int avail_threads = hot_teams[level].hot_team_nth;
5323 if (new_nproc < avail_threads)
5324 avail_threads = new_nproc;
5325 kmp_info_t **other_threads = team->t.t_threads;
5326 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5330 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5331 for (b = 0; b < bs_last_barrier; ++b) {
5332 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5333 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5335 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5339 if (hot_teams[level].hot_team_nth >= new_nproc) {
5342 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5343 team->t.t_nproc = new_nproc;
5347 team->t.t_nproc = hot_teams[level].hot_team_nth;
5348 hot_teams[level].hot_team_nth = new_nproc;
5349 if (team->t.t_max_nproc < new_nproc) {
5351 __kmp_reallocate_team_arrays(team, new_nproc);
5352 __kmp_reinitialize_team(team, new_icvs, NULL);
5355 #if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5356 KMP_AFFINITY_SUPPORTED
5362 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5366 for (f = team->t.t_nproc; f < new_nproc; f++) {
5367 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5368 KMP_DEBUG_ASSERT(new_worker);
5369 team->t.t_threads[f] = new_worker;
5372 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5373 "join=%llu, plain=%llu\n",
5374 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5375 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5376 team->t.t_bar[bs_plain_barrier].b_arrived));
5380 kmp_balign_t *balign = new_worker->th.th_bar;
5381 for (b = 0; b < bs_last_barrier; ++b) {
5382 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5383 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5384 KMP_BARRIER_PARENT_FLAG);
5386 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5392 #if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5393 KMP_AFFINITY_SUPPORTED
5395 new_temp_affinity.restore();
5398 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5401 __kmp_add_threads_to_team(team, new_nproc);
5405 __kmp_initialize_team(team, new_nproc, new_icvs,
5406 root->r.r_uber_thread->th.th_ident);
5409 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5410 for (f = 0; f < team->t.t_nproc; ++f)
5411 __kmp_initialize_info(team->t.t_threads[f], team, f,
5412 __kmp_gtid_from_tid(f, team));
5415 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5416 for (f = old_nproc; f < team->t.t_nproc; ++f)
5417 team->t.t_threads[f]->th.th_task_state = old_state;
5420 for (f = 0; f < team->t.t_nproc; ++f) {
5421 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5422 team->t.t_threads[f]->th.th_team_nproc ==
5427 if (do_place_partition) {
5428 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5429 #if KMP_AFFINITY_SUPPORTED
5430 __kmp_partition_places(team);
5435 if (master->th.th_teams_microtask) {
5436 for (f = 1; f < new_nproc; ++f) {
5438 kmp_info_t *thr = team->t.t_threads[f];
5439 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5440 thr->th.th_teams_level = master->th.th_teams_level;
5441 thr->th.th_teams_size = master->th.th_teams_size;
5447 for (f = 1; f < new_nproc; ++f) {
5448 kmp_info_t *thr = team->t.t_threads[f];
5450 kmp_balign_t *balign = thr->th.th_bar;
5451 for (b = 0; b < bs_last_barrier; ++b) {
5452 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5453 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5455 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5462 __kmp_alloc_argv_entries(argc, team, TRUE);
5463 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5467 KF_TRACE(10, (
" hot_team = %p\n", team));
5470 if (__kmp_tasking_mode != tskm_immediate_exec) {
5471 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5472 "task_team[1] = %p after reinit\n",
5473 team->t.t_task_team[0], team->t.t_task_team[1]));
5478 __ompt_team_assign_id(team, ompt_parallel_data);
5488 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5491 if (team->t.t_max_nproc >= max_nproc) {
5493 __kmp_team_pool = team->t.t_next_pool;
5495 if (max_nproc > 1 &&
5496 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5498 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5503 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5505 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5506 "task_team[1] %p to NULL\n",
5507 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5508 team->t.t_task_team[0] = NULL;
5509 team->t.t_task_team[1] = NULL;
5512 __kmp_alloc_argv_entries(argc, team, TRUE);
5513 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5516 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5517 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5520 for (b = 0; b < bs_last_barrier; ++b) {
5521 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5523 team->t.t_bar[b].b_master_arrived = 0;
5524 team->t.t_bar[b].b_team_arrived = 0;
5529 team->t.t_proc_bind = new_proc_bind;
5531 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5535 __ompt_team_assign_id(team, ompt_parallel_data);
5538 team->t.t_nested_nth = NULL;
5549 team = __kmp_reap_team(team);
5550 __kmp_team_pool = team;
5555 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5558 team->t.t_max_nproc = max_nproc;
5559 if (max_nproc > 1 &&
5560 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5562 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5567 __kmp_allocate_team_arrays(team, max_nproc);
5569 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5570 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5572 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5574 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5575 team->t.t_task_team[0] = NULL;
5577 team->t.t_task_team[1] = NULL;
5580 if (__kmp_storage_map) {
5581 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5585 __kmp_alloc_argv_entries(argc, team, FALSE);
5586 team->t.t_argc = argc;
5589 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5590 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5593 for (b = 0; b < bs_last_barrier; ++b) {
5594 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5596 team->t.t_bar[b].b_master_arrived = 0;
5597 team->t.t_bar[b].b_team_arrived = 0;
5602 team->t.t_proc_bind = new_proc_bind;
5605 __ompt_team_assign_id(team, ompt_parallel_data);
5606 team->t.ompt_serialized_team_info = NULL;
5611 team->t.t_nested_nth = NULL;
5613 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5624 void __kmp_free_team(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master) {
5626 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5630 KMP_DEBUG_ASSERT(root);
5631 KMP_DEBUG_ASSERT(team);
5632 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5633 KMP_DEBUG_ASSERT(team->t.t_threads);
5635 int use_hot_team = team == root->r.r_hot_team;
5638 level = team->t.t_active_level - 1;
5639 if (master->th.th_teams_microtask) {
5640 if (master->th.th_teams_size.nteams > 1) {
5644 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5645 master->th.th_teams_level == team->t.t_level) {
5651 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5653 if (level < __kmp_hot_teams_max_level) {
5654 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5660 TCW_SYNC_PTR(team->t.t_pkfn,
5663 team->t.t_copyin_counter = 0;
5668 if (!use_hot_team) {
5669 if (__kmp_tasking_mode != tskm_immediate_exec) {
5671 for (f = 1; f < team->t.t_nproc; ++f) {
5672 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5673 kmp_info_t *th = team->t.t_threads[f];
5674 volatile kmp_uint32 *state = &th->th.th_reap_state;
5675 while (*state != KMP_SAFE_TO_REAP) {
5679 if (!__kmp_is_thread_alive(th, &ecode)) {
5680 *state = KMP_SAFE_TO_REAP;
5685 if (th->th.th_sleep_loc)
5686 __kmp_null_resume_wrapper(th);
5693 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5694 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5695 if (task_team != NULL) {
5696 for (f = 0; f < team->t.t_nproc; ++f) {
5697 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5698 team->t.t_threads[f]->th.th_task_team = NULL;
5702 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5703 __kmp_get_gtid(), task_team, team->t.t_id));
5704 __kmp_free_task_team(master, task_team);
5705 team->t.t_task_team[tt_idx] = NULL;
5711 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5712 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5713 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5714 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5716 team->t.t_nested_nth = NULL;
5719 team->t.t_parent = NULL;
5720 team->t.t_level = 0;
5721 team->t.t_active_level = 0;
5724 for (f = 1; f < team->t.t_nproc; ++f) {
5725 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5726 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5727 (void)KMP_COMPARE_AND_STORE_ACQ32(
5728 &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
5730 __kmp_free_thread(team->t.t_threads[f]);
5733 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5736 team->t.b->go_release();
5737 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5738 for (f = 1; f < team->t.t_nproc; ++f) {
5739 if (team->t.b->sleep[f].sleep) {
5740 __kmp_atomic_resume_64(
5741 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5742 (kmp_atomic_flag_64<> *)NULL);
5747 for (
int f = 1; f < team->t.t_nproc; ++f) {
5748 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5754 for (f = 1; f < team->t.t_nproc; ++f) {
5755 team->t.t_threads[f] = NULL;
5758 if (team->t.t_max_nproc > 1 &&
5759 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5760 distributedBarrier::deallocate(team->t.b);
5765 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5766 __kmp_team_pool = (
volatile kmp_team_t *)team;
5769 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5770 team->t.t_threads[1]->th.th_cg_roots);
5771 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5773 for (f = 1; f < team->t.t_nproc; ++f) {
5774 kmp_info_t *thr = team->t.t_threads[f];
5775 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5776 thr->th.th_cg_roots->cg_root == thr);
5778 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5779 thr->th.th_cg_roots = tmp->up;
5780 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5781 " up to node %p. cg_nthreads was %d\n",
5782 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5783 int i = tmp->cg_nthreads--;
5788 if (thr->th.th_cg_roots)
5789 thr->th.th_current_task->td_icvs.thread_limit =
5790 thr->th.th_cg_roots->cg_thread_limit;
5799 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5800 kmp_team_t *next_pool = team->t.t_next_pool;
5802 KMP_DEBUG_ASSERT(team);
5803 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5804 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5805 KMP_DEBUG_ASSERT(team->t.t_threads);
5806 KMP_DEBUG_ASSERT(team->t.t_argv);
5811 __kmp_free_team_arrays(team);
5812 if (team->t.t_argv != &team->t.t_inline_argv[0])
5813 __kmp_free((
void *)team->t.t_argv);
5845 void __kmp_free_thread(kmp_info_t *this_th) {
5849 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5850 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5852 KMP_DEBUG_ASSERT(this_th);
5857 kmp_balign_t *balign = this_th->th.th_bar;
5858 for (b = 0; b < bs_last_barrier; ++b) {
5859 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5860 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5861 balign[b].bb.team = NULL;
5862 balign[b].bb.leaf_kids = 0;
5864 this_th->th.th_task_state = 0;
5865 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5868 TCW_PTR(this_th->th.th_team, NULL);
5869 TCW_PTR(this_th->th.th_root, NULL);
5870 TCW_PTR(this_th->th.th_dispatch, NULL);
5872 while (this_th->th.th_cg_roots) {
5873 this_th->th.th_cg_roots->cg_nthreads--;
5874 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5875 " %p of thread %p to %d\n",
5876 this_th, this_th->th.th_cg_roots,
5877 this_th->th.th_cg_roots->cg_root,
5878 this_th->th.th_cg_roots->cg_nthreads));
5879 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5880 if (tmp->cg_root == this_th) {
5881 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5883 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5884 this_th->th.th_cg_roots = tmp->up;
5887 if (tmp->cg_nthreads == 0) {
5890 this_th->th.th_cg_roots = NULL;
5900 __kmp_free_implicit_task(this_th);
5901 this_th->th.th_current_task = NULL;
5905 gtid = this_th->th.th_info.ds.ds_gtid;
5906 if (__kmp_thread_pool_insert_pt != NULL) {
5907 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5908 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5909 __kmp_thread_pool_insert_pt = NULL;
5918 if (__kmp_thread_pool_insert_pt != NULL) {
5919 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5921 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5923 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5924 scan = &((*scan)->th.th_next_pool))
5929 TCW_PTR(this_th->th.th_next_pool, *scan);
5930 __kmp_thread_pool_insert_pt = *scan = this_th;
5931 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5932 (this_th->th.th_info.ds.ds_gtid <
5933 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5934 TCW_4(this_th->th.th_in_pool, TRUE);
5935 __kmp_suspend_initialize_thread(this_th);
5936 __kmp_lock_suspend_mx(this_th);
5937 if (this_th->th.th_active == TRUE) {
5938 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5939 this_th->th.th_active_in_pool = TRUE;
5943 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5946 __kmp_unlock_suspend_mx(this_th);
5948 TCW_4(__kmp_nth, __kmp_nth - 1);
5950 #ifdef KMP_ADJUST_BLOCKTIME
5953 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5954 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5955 if (__kmp_nth <= __kmp_avail_proc) {
5956 __kmp_zero_bt = FALSE;
5966 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5967 #if OMP_PROFILING_SUPPORT
5968 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5970 if (ProfileTraceFile)
5971 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5974 int gtid = this_thr->th.th_info.ds.ds_gtid;
5976 kmp_team_t **
volatile pteam;
5979 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5981 if (__kmp_env_consistency_check) {
5982 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5986 if (ompd_state & OMPD_ENABLE_BP)
5987 ompd_bp_thread_begin();
5991 ompt_data_t *thread_data =
nullptr;
5992 if (ompt_enabled.enabled) {
5993 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5994 *thread_data = ompt_data_none;
5996 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5997 this_thr->th.ompt_thread_info.wait_id = 0;
5998 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5999 this_thr->th.ompt_thread_info.parallel_flags = 0;
6000 if (ompt_enabled.ompt_callback_thread_begin) {
6001 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6002 ompt_thread_worker, thread_data);
6004 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6009 while (!TCR_4(__kmp_global.g.g_done)) {
6010 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6014 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6017 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6020 if (ompt_enabled.enabled) {
6021 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6025 pteam = &this_thr->th.th_team;
6028 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6030 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6033 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6034 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6035 (*pteam)->t.t_pkfn));
6037 updateHWFPControl(*pteam);
6040 if (ompt_enabled.enabled) {
6041 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6045 rc = (*pteam)->t.t_invoke(gtid);
6049 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6050 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6051 (*pteam)->t.t_pkfn));
6054 if (ompt_enabled.enabled) {
6056 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6058 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6062 __kmp_join_barrier(gtid);
6067 if (ompd_state & OMPD_ENABLE_BP)
6068 ompd_bp_thread_end();
6072 if (ompt_enabled.ompt_callback_thread_end) {
6073 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6077 this_thr->th.th_task_team = NULL;
6079 __kmp_common_destroy_gtid(gtid);
6081 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6084 #if OMP_PROFILING_SUPPORT
6085 llvm::timeTraceProfilerFinishThread();
6092 void __kmp_internal_end_dest(
void *specific_gtid) {
6095 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6097 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6101 __kmp_internal_end_thread(gtid);
6104 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6106 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6107 __kmp_internal_end_atexit();
6114 void __kmp_internal_end_atexit(
void) {
6115 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6139 __kmp_internal_end_library(-1);
6141 __kmp_close_console();
6145 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6150 KMP_DEBUG_ASSERT(thread != NULL);
6152 gtid = thread->th.th_info.ds.ds_gtid;
6155 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6158 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6160 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6162 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6164 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6168 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6170 __kmp_release_64(&flag);
6175 __kmp_reap_worker(thread);
6187 if (thread->th.th_active_in_pool) {
6188 thread->th.th_active_in_pool = FALSE;
6189 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6190 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6194 __kmp_free_implicit_task(thread);
6198 __kmp_free_fast_memory(thread);
6201 __kmp_suspend_uninitialize_thread(thread);
6203 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6204 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6209 #ifdef KMP_ADJUST_BLOCKTIME
6212 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6213 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6214 if (__kmp_nth <= __kmp_avail_proc) {
6215 __kmp_zero_bt = FALSE;
6221 if (__kmp_env_consistency_check) {
6222 if (thread->th.th_cons) {
6223 __kmp_free_cons_stack(thread->th.th_cons);
6224 thread->th.th_cons = NULL;
6228 if (thread->th.th_pri_common != NULL) {
6229 __kmp_free(thread->th.th_pri_common);
6230 thread->th.th_pri_common = NULL;
6234 if (thread->th.th_local.bget_data != NULL) {
6235 __kmp_finalize_bget(thread);
6239 #if KMP_AFFINITY_SUPPORTED
6240 if (thread->th.th_affin_mask != NULL) {
6241 KMP_CPU_FREE(thread->th.th_affin_mask);
6242 thread->th.th_affin_mask = NULL;
6246 #if KMP_USE_HIER_SCHED
6247 if (thread->th.th_hier_bar_data != NULL) {
6248 __kmp_free(thread->th.th_hier_bar_data);
6249 thread->th.th_hier_bar_data = NULL;
6253 __kmp_reap_team(thread->th.th_serial_team);
6254 thread->th.th_serial_team = NULL;
6261 static void __kmp_itthash_clean(kmp_info_t *th) {
6263 if (__kmp_itt_region_domains.count > 0) {
6264 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6265 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6267 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6268 __kmp_thread_free(th, bucket);
6273 if (__kmp_itt_barrier_domains.count > 0) {
6274 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6275 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6277 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6278 __kmp_thread_free(th, bucket);
6286 static void __kmp_internal_end(
void) {
6290 __kmp_unregister_library();
6297 __kmp_reclaim_dead_roots();
6301 for (i = 0; i < __kmp_threads_capacity; i++)
6303 if (__kmp_root[i]->r.r_active)
6306 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6308 if (i < __kmp_threads_capacity) {
6320 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6321 if (TCR_4(__kmp_init_monitor)) {
6322 __kmp_reap_monitor(&__kmp_monitor);
6323 TCW_4(__kmp_init_monitor, 0);
6325 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6326 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6332 for (i = 0; i < __kmp_threads_capacity; i++) {
6333 if (__kmp_root[i]) {
6336 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6345 while (__kmp_thread_pool != NULL) {
6347 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6348 __kmp_thread_pool = thread->th.th_next_pool;
6350 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6351 thread->th.th_next_pool = NULL;
6352 thread->th.th_in_pool = FALSE;
6353 __kmp_reap_thread(thread, 0);
6355 __kmp_thread_pool_insert_pt = NULL;
6358 while (__kmp_team_pool != NULL) {
6360 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6361 __kmp_team_pool = team->t.t_next_pool;
6363 team->t.t_next_pool = NULL;
6364 __kmp_reap_team(team);
6367 __kmp_reap_task_teams();
6374 for (i = 0; i < __kmp_threads_capacity; i++) {
6375 kmp_info_t *thr = __kmp_threads[i];
6376 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6381 for (i = 0; i < __kmp_threads_capacity; ++i) {
6388 TCW_SYNC_4(__kmp_init_common, FALSE);
6390 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6398 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6399 if (TCR_4(__kmp_init_monitor)) {
6400 __kmp_reap_monitor(&__kmp_monitor);
6401 TCW_4(__kmp_init_monitor, 0);
6403 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6404 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6407 TCW_4(__kmp_init_gtid, FALSE);
6416 void __kmp_internal_end_library(
int gtid_req) {
6423 if (__kmp_global.g.g_abort) {
6424 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6428 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6429 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6434 if (TCR_4(__kmp_init_hidden_helper) &&
6435 !TCR_4(__kmp_hidden_helper_team_done)) {
6436 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6438 __kmp_hidden_helper_main_thread_release();
6440 __kmp_hidden_helper_threads_deinitz_wait();
6446 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6448 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6449 if (gtid == KMP_GTID_SHUTDOWN) {
6450 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6451 "already shutdown\n"));
6453 }
else if (gtid == KMP_GTID_MONITOR) {
6454 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6455 "registered, or system shutdown\n"));
6457 }
else if (gtid == KMP_GTID_DNE) {
6458 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6461 }
else if (KMP_UBER_GTID(gtid)) {
6463 if (__kmp_root[gtid]->r.r_active) {
6464 __kmp_global.g.g_abort = -1;
6465 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6466 __kmp_unregister_library();
6468 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6472 __kmp_itthash_clean(__kmp_threads[gtid]);
6475 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6476 __kmp_unregister_root_current_thread(gtid);
6483 #ifdef DUMP_DEBUG_ON_EXIT
6484 if (__kmp_debug_buf)
6485 __kmp_dump_debug_buffer();
6490 __kmp_unregister_library();
6495 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6498 if (__kmp_global.g.g_abort) {
6499 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6501 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6504 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6505 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6514 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6517 __kmp_internal_end();
6519 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6520 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6522 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6524 #ifdef DUMP_DEBUG_ON_EXIT
6525 if (__kmp_debug_buf)
6526 __kmp_dump_debug_buffer();
6530 __kmp_close_console();
6533 __kmp_fini_allocator();
6537 void __kmp_internal_end_thread(
int gtid_req) {
6546 if (__kmp_global.g.g_abort) {
6547 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6551 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6552 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6557 if (TCR_4(__kmp_init_hidden_helper) &&
6558 !TCR_4(__kmp_hidden_helper_team_done)) {
6559 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6561 __kmp_hidden_helper_main_thread_release();
6563 __kmp_hidden_helper_threads_deinitz_wait();
6570 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6572 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6573 if (gtid == KMP_GTID_SHUTDOWN) {
6574 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6575 "already shutdown\n"));
6577 }
else if (gtid == KMP_GTID_MONITOR) {
6578 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6579 "registered, or system shutdown\n"));
6581 }
else if (gtid == KMP_GTID_DNE) {
6582 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6586 }
else if (KMP_UBER_GTID(gtid)) {
6588 if (__kmp_root[gtid]->r.r_active) {
6589 __kmp_global.g.g_abort = -1;
6590 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6592 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6596 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6598 __kmp_unregister_root_current_thread(gtid);
6602 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6605 __kmp_threads[gtid]->th.th_task_team = NULL;
6609 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6615 if (__kmp_pause_status != kmp_hard_paused)
6619 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6624 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6627 if (__kmp_global.g.g_abort) {
6628 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6630 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6633 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6634 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6645 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6647 for (i = 0; i < __kmp_threads_capacity; ++i) {
6648 if (KMP_UBER_GTID(i)) {
6651 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6652 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6653 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6660 __kmp_internal_end();
6662 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6663 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6665 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6667 #ifdef DUMP_DEBUG_ON_EXIT
6668 if (__kmp_debug_buf)
6669 __kmp_dump_debug_buffer();
6676 static long __kmp_registration_flag = 0;
6678 static char *__kmp_registration_str = NULL;
6681 static inline char *__kmp_reg_status_name() {
6687 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6688 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6691 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6695 #if defined(KMP_USE_SHM)
6696 bool __kmp_shm_available =
false;
6697 bool __kmp_tmp_available =
false;
6699 char *temp_reg_status_file_name =
nullptr;
6702 void __kmp_register_library_startup(
void) {
6704 char *name = __kmp_reg_status_name();
6710 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6711 __kmp_initialize_system_tick();
6713 __kmp_read_system_time(&time.dtime);
6714 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6715 __kmp_registration_str =
6716 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6717 __kmp_registration_flag, KMP_LIBRARY_FILE);
6719 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6720 __kmp_registration_str));
6726 #if defined(KMP_USE_SHM)
6727 char *shm_name =
nullptr;
6728 char *data1 =
nullptr;
6729 __kmp_shm_available = __kmp_detect_shm();
6730 if (__kmp_shm_available) {
6732 shm_name = __kmp_str_format(
"/%s", name);
6733 int shm_preexist = 0;
6734 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6735 if ((fd1 == -1) && (errno == EEXIST)) {
6738 fd1 = shm_open(shm_name, O_RDWR, 0600);
6740 KMP_WARNING(FunctionError,
"Can't open SHM");
6741 __kmp_shm_available =
false;
6746 if (__kmp_shm_available && shm_preexist == 0) {
6747 if (ftruncate(fd1, SHM_SIZE) == -1) {
6748 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6749 __kmp_shm_available =
false;
6752 if (__kmp_shm_available) {
6753 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6755 if (data1 == MAP_FAILED) {
6756 KMP_WARNING(FunctionError,
"Can't map SHM");
6757 __kmp_shm_available =
false;
6760 if (__kmp_shm_available) {
6761 if (shm_preexist == 0) {
6762 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6765 value = __kmp_str_format(
"%s", data1);
6766 munmap(data1, SHM_SIZE);
6771 if (!__kmp_shm_available)
6772 __kmp_tmp_available = __kmp_detect_tmp();
6773 if (!__kmp_shm_available && __kmp_tmp_available) {
6780 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6781 int tmp_preexist = 0;
6782 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6783 if ((fd1 == -1) && (errno == EEXIST)) {
6786 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6788 KMP_WARNING(FunctionError,
"Can't open TEMP");
6789 __kmp_tmp_available =
false;
6794 if (__kmp_tmp_available && tmp_preexist == 0) {
6796 if (ftruncate(fd1, SHM_SIZE) == -1) {
6797 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6798 __kmp_tmp_available =
false;
6801 if (__kmp_tmp_available) {
6802 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6804 if (data1 == MAP_FAILED) {
6805 KMP_WARNING(FunctionError,
"Can't map /tmp");
6806 __kmp_tmp_available =
false;
6809 if (__kmp_tmp_available) {
6810 if (tmp_preexist == 0) {
6811 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6814 value = __kmp_str_format(
"%s", data1);
6815 munmap(data1, SHM_SIZE);
6820 if (!__kmp_shm_available && !__kmp_tmp_available) {
6823 __kmp_env_set(name, __kmp_registration_str, 0);
6825 value = __kmp_env_get(name);
6829 __kmp_env_set(name, __kmp_registration_str, 0);
6831 value = __kmp_env_get(name);
6834 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6841 char *flag_addr_str = NULL;
6842 char *flag_val_str = NULL;
6843 char const *file_name = NULL;
6844 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6845 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6848 unsigned long *flag_addr = 0;
6849 unsigned long flag_val = 0;
6850 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6851 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6852 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6856 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6870 file_name =
"unknown library";
6875 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6876 if (!__kmp_str_match_true(duplicate_ok)) {
6878 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6879 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6881 KMP_INTERNAL_FREE(duplicate_ok);
6882 __kmp_duplicate_library_ok = 1;
6887 #if defined(KMP_USE_SHM)
6888 if (__kmp_shm_available) {
6889 shm_unlink(shm_name);
6890 }
else if (__kmp_tmp_available) {
6891 unlink(temp_reg_status_file_name);
6894 __kmp_env_unset(name);
6898 __kmp_env_unset(name);
6902 KMP_DEBUG_ASSERT(0);
6906 KMP_INTERNAL_FREE((
void *)value);
6907 #if defined(KMP_USE_SHM)
6909 KMP_INTERNAL_FREE((
void *)shm_name);
6912 KMP_INTERNAL_FREE((
void *)name);
6916 void __kmp_unregister_library(
void) {
6918 char *name = __kmp_reg_status_name();
6921 #if defined(KMP_USE_SHM)
6922 char *shm_name =
nullptr;
6924 if (__kmp_shm_available) {
6925 shm_name = __kmp_str_format(
"/%s", name);
6926 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6928 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6929 if (data1 != MAP_FAILED) {
6930 value = __kmp_str_format(
"%s", data1);
6931 munmap(data1, SHM_SIZE);
6935 }
else if (__kmp_tmp_available) {
6936 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6938 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6939 if (data1 != MAP_FAILED) {
6940 value = __kmp_str_format(
"%s", data1);
6941 munmap(data1, SHM_SIZE);
6946 value = __kmp_env_get(name);
6949 value = __kmp_env_get(name);
6952 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6953 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6954 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6956 #if defined(KMP_USE_SHM)
6957 if (__kmp_shm_available) {
6958 shm_unlink(shm_name);
6959 }
else if (__kmp_tmp_available) {
6960 unlink(temp_reg_status_file_name);
6962 __kmp_env_unset(name);
6965 __kmp_env_unset(name);
6969 #if defined(KMP_USE_SHM)
6971 KMP_INTERNAL_FREE(shm_name);
6972 if (temp_reg_status_file_name)
6973 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6976 KMP_INTERNAL_FREE(__kmp_registration_str);
6977 KMP_INTERNAL_FREE(value);
6978 KMP_INTERNAL_FREE(name);
6980 __kmp_registration_flag = 0;
6981 __kmp_registration_str = NULL;
6988 #if KMP_MIC_SUPPORTED
6990 static void __kmp_check_mic_type() {
6991 kmp_cpuid_t cpuid_state = {0};
6992 kmp_cpuid_t *cs_p = &cpuid_state;
6993 __kmp_x86_cpuid(1, 0, cs_p);
6995 if ((cs_p->eax & 0xff0) == 0xB10) {
6996 __kmp_mic_type = mic2;
6997 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6998 __kmp_mic_type = mic3;
7000 __kmp_mic_type = non_mic;
7007 static void __kmp_user_level_mwait_init() {
7008 struct kmp_cpuid buf;
7009 __kmp_x86_cpuid(7, 0, &buf);
7010 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7011 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7012 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7013 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7014 __kmp_umwait_enabled));
7016 #elif KMP_HAVE_MWAIT
7017 #ifndef AT_INTELPHIUSERMWAIT
7020 #define AT_INTELPHIUSERMWAIT 10000
7025 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7026 unsigned long getauxval(
unsigned long) {
return 0; }
7028 static void __kmp_user_level_mwait_init() {
7033 if (__kmp_mic_type == mic3) {
7034 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7035 if ((res & 0x1) || __kmp_user_level_mwait) {
7036 __kmp_mwait_enabled = TRUE;
7037 if (__kmp_user_level_mwait) {
7038 KMP_INFORM(EnvMwaitWarn);
7041 __kmp_mwait_enabled = FALSE;
7044 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7045 "__kmp_mwait_enabled = %d\n",
7046 __kmp_mic_type, __kmp_mwait_enabled));
7050 static void __kmp_do_serial_initialize(
void) {
7054 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7056 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7057 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7058 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7059 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7060 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7070 __kmp_validate_locks();
7072 #if ENABLE_LIBOMPTARGET
7074 __kmp_init_omptarget();
7078 __kmp_init_allocator();
7084 if (__kmp_need_register_serial)
7085 __kmp_register_library_startup();
7088 if (TCR_4(__kmp_global.g.g_done)) {
7089 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7092 __kmp_global.g.g_abort = 0;
7093 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7096 #if KMP_USE_ADAPTIVE_LOCKS
7097 #if KMP_DEBUG_ADAPTIVE_LOCKS
7098 __kmp_init_speculative_stats();
7101 #if KMP_STATS_ENABLED
7104 __kmp_init_lock(&__kmp_global_lock);
7105 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7106 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7107 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7108 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7109 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7110 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7111 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7112 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7113 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7114 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7115 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7116 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7117 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7118 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7119 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7121 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7123 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7127 __kmp_runtime_initialize();
7129 #if KMP_MIC_SUPPORTED
7130 __kmp_check_mic_type();
7137 __kmp_abort_delay = 0;
7141 __kmp_dflt_team_nth_ub = __kmp_xproc;
7142 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7143 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7145 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7146 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7148 __kmp_max_nth = __kmp_sys_max_nth;
7149 __kmp_cg_max_nth = __kmp_sys_max_nth;
7150 __kmp_teams_max_nth = __kmp_xproc;
7151 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7152 __kmp_teams_max_nth = __kmp_sys_max_nth;
7157 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7159 __kmp_monitor_wakeups =
7160 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7161 __kmp_bt_intervals =
7162 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7165 __kmp_library = library_throughput;
7167 __kmp_static = kmp_sch_static_balanced;
7174 #if KMP_FAST_REDUCTION_BARRIER
7175 #define kmp_reduction_barrier_gather_bb ((int)1)
7176 #define kmp_reduction_barrier_release_bb ((int)1)
7177 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7178 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7180 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7181 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7182 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7183 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7184 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7185 #if KMP_FAST_REDUCTION_BARRIER
7186 if (i == bs_reduction_barrier) {
7188 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7189 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7190 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7191 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7195 #if KMP_FAST_REDUCTION_BARRIER
7196 #undef kmp_reduction_barrier_release_pat
7197 #undef kmp_reduction_barrier_gather_pat
7198 #undef kmp_reduction_barrier_release_bb
7199 #undef kmp_reduction_barrier_gather_bb
7201 #if KMP_MIC_SUPPORTED
7202 if (__kmp_mic_type == mic2) {
7204 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7205 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7207 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7208 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7210 #if KMP_FAST_REDUCTION_BARRIER
7211 if (__kmp_mic_type == mic2) {
7212 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7213 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7220 __kmp_env_checks = TRUE;
7222 __kmp_env_checks = FALSE;
7226 __kmp_foreign_tp = TRUE;
7228 __kmp_global.g.g_dynamic = FALSE;
7229 __kmp_global.g.g_dynamic_mode = dynamic_default;
7231 __kmp_init_nesting_mode();
7233 __kmp_env_initialize(NULL);
7235 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7236 __kmp_user_level_mwait_init();
7240 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7241 if (__kmp_str_match_true(val)) {
7242 kmp_str_buf_t buffer;
7243 __kmp_str_buf_init(&buffer);
7244 __kmp_i18n_dump_catalog(&buffer);
7245 __kmp_printf(
"%s", buffer.str);
7246 __kmp_str_buf_free(&buffer);
7248 __kmp_env_free(&val);
7251 __kmp_threads_capacity =
7252 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7254 __kmp_tp_capacity = __kmp_default_tp_capacity(
7255 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7260 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7261 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7262 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7263 __kmp_thread_pool = NULL;
7264 __kmp_thread_pool_insert_pt = NULL;
7265 __kmp_team_pool = NULL;
7272 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7274 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7275 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7276 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7279 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7281 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7286 gtid = __kmp_register_root(TRUE);
7287 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7288 KMP_ASSERT(KMP_UBER_GTID(gtid));
7289 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7293 __kmp_common_initialize();
7297 __kmp_register_atfork();
7300 #if !KMP_DYNAMIC_LIB || \
7301 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7306 int rc = atexit(__kmp_internal_end_atexit);
7308 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7314 #if KMP_HANDLE_SIGNALS
7320 __kmp_install_signals(FALSE);
7323 __kmp_install_signals(TRUE);
7328 __kmp_init_counter++;
7330 __kmp_init_serial = TRUE;
7332 if (__kmp_version) {
7333 __kmp_print_version_1();
7336 if (__kmp_settings) {
7340 if (__kmp_display_env || __kmp_display_env_verbose) {
7341 __kmp_env_print_2();
7350 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7353 void __kmp_serial_initialize(
void) {
7354 if (__kmp_init_serial) {
7357 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7358 if (__kmp_init_serial) {
7359 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7362 __kmp_do_serial_initialize();
7363 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7366 static void __kmp_do_middle_initialize(
void) {
7368 int prev_dflt_team_nth;
7370 if (!__kmp_init_serial) {
7371 __kmp_do_serial_initialize();
7374 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7376 if (UNLIKELY(!__kmp_need_register_serial)) {
7379 __kmp_register_library_startup();
7384 prev_dflt_team_nth = __kmp_dflt_team_nth;
7386 #if KMP_AFFINITY_SUPPORTED
7389 __kmp_affinity_initialize(__kmp_affinity);
7393 KMP_ASSERT(__kmp_xproc > 0);
7394 if (__kmp_avail_proc == 0) {
7395 __kmp_avail_proc = __kmp_xproc;
7401 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7402 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7407 if (__kmp_dflt_team_nth == 0) {
7408 #ifdef KMP_DFLT_NTH_CORES
7410 __kmp_dflt_team_nth = __kmp_ncores;
7411 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7412 "__kmp_ncores (%d)\n",
7413 __kmp_dflt_team_nth));
7416 __kmp_dflt_team_nth = __kmp_avail_proc;
7417 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7418 "__kmp_avail_proc(%d)\n",
7419 __kmp_dflt_team_nth));
7423 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7424 __kmp_dflt_team_nth = KMP_MIN_NTH;
7426 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7427 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7430 if (__kmp_nesting_mode > 0)
7431 __kmp_set_nesting_mode_threads();
7435 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7437 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7442 for (i = 0; i < __kmp_threads_capacity; i++) {
7443 kmp_info_t *thread = __kmp_threads[i];
7446 if (thread->th.th_current_task->td_icvs.nproc != 0)
7449 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7454 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7455 __kmp_dflt_team_nth));
7457 #ifdef KMP_ADJUST_BLOCKTIME
7459 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7460 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7461 if (__kmp_nth > __kmp_avail_proc) {
7462 __kmp_zero_bt = TRUE;
7468 TCW_SYNC_4(__kmp_init_middle, TRUE);
7470 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7473 void __kmp_middle_initialize(
void) {
7474 if (__kmp_init_middle) {
7477 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7478 if (__kmp_init_middle) {
7479 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7482 __kmp_do_middle_initialize();
7483 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7486 void __kmp_parallel_initialize(
void) {
7487 int gtid = __kmp_entry_gtid();
7490 if (TCR_4(__kmp_init_parallel))
7492 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7493 if (TCR_4(__kmp_init_parallel)) {
7494 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7499 if (TCR_4(__kmp_global.g.g_done)) {
7502 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7503 __kmp_infinite_loop();
7509 if (!__kmp_init_middle) {
7510 __kmp_do_middle_initialize();
7512 __kmp_assign_root_init_mask();
7513 __kmp_resume_if_hard_paused();
7516 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7517 KMP_ASSERT(KMP_UBER_GTID(gtid));
7519 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7522 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7523 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7524 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7528 #if KMP_HANDLE_SIGNALS
7530 __kmp_install_signals(TRUE);
7534 __kmp_suspend_initialize();
7536 #if defined(USE_LOAD_BALANCE)
7537 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7538 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7541 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7542 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7546 if (__kmp_version) {
7547 __kmp_print_version_2();
7551 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7554 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7556 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7559 void __kmp_hidden_helper_initialize() {
7560 if (TCR_4(__kmp_init_hidden_helper))
7564 if (!TCR_4(__kmp_init_parallel))
7565 __kmp_parallel_initialize();
7569 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7570 if (TCR_4(__kmp_init_hidden_helper)) {
7571 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7575 #if KMP_AFFINITY_SUPPORTED
7579 if (!__kmp_hh_affinity.flags.initialized)
7580 __kmp_affinity_initialize(__kmp_hh_affinity);
7584 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7588 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7591 __kmp_do_initialize_hidden_helper_threads();
7594 __kmp_hidden_helper_threads_initz_wait();
7597 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7599 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7604 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7606 kmp_disp_t *dispatch;
7611 this_thr->th.th_local.this_construct = 0;
7612 #if KMP_CACHE_MANAGE
7613 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7615 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7616 KMP_DEBUG_ASSERT(dispatch);
7617 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7621 dispatch->th_disp_index = 0;
7622 dispatch->th_doacross_buf_idx = 0;
7623 if (__kmp_env_consistency_check)
7624 __kmp_push_parallel(gtid, team->t.t_ident);
7629 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7631 if (__kmp_env_consistency_check)
7632 __kmp_pop_parallel(gtid, team->t.t_ident);
7634 __kmp_finish_implicit_task(this_thr);
7637 int __kmp_invoke_task_func(
int gtid) {
7639 int tid = __kmp_tid_from_gtid(gtid);
7640 kmp_info_t *this_thr = __kmp_threads[gtid];
7641 kmp_team_t *team = this_thr->th.th_team;
7643 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7645 if (__itt_stack_caller_create_ptr) {
7647 if (team->t.t_stack_id != NULL) {
7648 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7650 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7651 __kmp_itt_stack_callee_enter(
7652 (__itt_caller)team->t.t_parent->t.t_stack_id);
7656 #if INCLUDE_SSC_MARKS
7657 SSC_MARK_INVOKING();
7662 void **exit_frame_p;
7663 ompt_data_t *my_task_data;
7664 ompt_data_t *my_parallel_data;
7667 if (ompt_enabled.enabled) {
7668 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7669 .ompt_task_info.frame.exit_frame.ptr);
7671 exit_frame_p = &dummy;
7675 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7676 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7677 if (ompt_enabled.ompt_callback_implicit_task) {
7678 ompt_team_size = team->t.t_nproc;
7679 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7680 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7681 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7682 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7686 #if KMP_STATS_ENABLED
7688 if (previous_state == stats_state_e::TEAMS_REGION) {
7689 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7691 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7693 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7696 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7697 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7704 *exit_frame_p = NULL;
7705 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7708 #if KMP_STATS_ENABLED
7709 if (previous_state == stats_state_e::TEAMS_REGION) {
7710 KMP_SET_THREAD_STATE(previous_state);
7712 KMP_POP_PARTITIONED_TIMER();
7716 if (__itt_stack_caller_create_ptr) {
7718 if (team->t.t_stack_id != NULL) {
7719 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7721 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7722 __kmp_itt_stack_callee_leave(
7723 (__itt_caller)team->t.t_parent->t.t_stack_id);
7727 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7732 void __kmp_teams_master(
int gtid) {
7734 kmp_info_t *thr = __kmp_threads[gtid];
7735 kmp_team_t *team = thr->th.th_team;
7736 ident_t *loc = team->t.t_ident;
7737 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7738 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7739 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7740 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7741 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7744 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7747 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7748 tmp->cg_nthreads = 1;
7749 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7750 " cg_nthreads to 1\n",
7752 tmp->up = thr->th.th_cg_roots;
7753 thr->th.th_cg_roots = tmp;
7757 #if INCLUDE_SSC_MARKS
7760 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7761 (microtask_t)thr->th.th_teams_microtask,
7762 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7763 #if INCLUDE_SSC_MARKS
7767 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7768 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7771 __kmp_join_call(loc, gtid
7780 int __kmp_invoke_teams_master(
int gtid) {
7781 kmp_info_t *this_thr = __kmp_threads[gtid];
7782 kmp_team_t *team = this_thr->th.th_team;
7784 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7785 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7786 (
void *)__kmp_teams_master);
7788 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7790 int tid = __kmp_tid_from_gtid(gtid);
7791 ompt_data_t *task_data =
7792 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7793 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7794 if (ompt_enabled.ompt_callback_implicit_task) {
7795 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7796 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7798 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7801 __kmp_teams_master(gtid);
7803 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7805 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7813 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7814 kmp_info_t *thr = __kmp_threads[gtid];
7816 if (num_threads > 0)
7817 thr->th.th_set_nproc = num_threads;
7820 void __kmp_push_num_threads_list(
ident_t *
id,
int gtid, kmp_uint32 list_length,
7821 int *num_threads_list) {
7822 kmp_info_t *thr = __kmp_threads[gtid];
7824 KMP_DEBUG_ASSERT(list_length > 1);
7826 if (num_threads_list[0] > 0)
7827 thr->th.th_set_nproc = num_threads_list[0];
7828 thr->th.th_set_nested_nth =
7829 (
int *)KMP_INTERNAL_MALLOC(list_length *
sizeof(
int));
7830 for (kmp_uint32 i = 0; i < list_length; ++i)
7831 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7832 thr->th.th_set_nested_nth_sz = list_length;
7835 void __kmp_set_strict_num_threads(
ident_t *loc,
int gtid,
int sev,
7837 kmp_info_t *thr = __kmp_threads[gtid];
7838 thr->th.th_nt_strict =
true;
7839 thr->th.th_nt_loc = loc;
7841 if (sev == severity_warning)
7842 thr->th.th_nt_sev = sev;
7844 thr->th.th_nt_sev = severity_fatal;
7847 thr->th.th_nt_msg = msg;
7849 thr->th.th_nt_msg =
"Cannot form team with number of threads specified by "
7850 "strict num_threads clause.";
7853 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7855 KMP_DEBUG_ASSERT(thr);
7857 if (!TCR_4(__kmp_init_middle))
7858 __kmp_middle_initialize();
7859 __kmp_assign_root_init_mask();
7860 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7861 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7863 if (num_threads == 0) {
7864 if (__kmp_teams_thread_limit > 0) {
7865 num_threads = __kmp_teams_thread_limit;
7867 num_threads = __kmp_avail_proc / num_teams;
7872 if (num_threads > __kmp_dflt_team_nth) {
7873 num_threads = __kmp_dflt_team_nth;
7875 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7876 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7878 if (num_teams * num_threads > __kmp_teams_max_nth) {
7879 num_threads = __kmp_teams_max_nth / num_teams;
7881 if (num_threads == 0) {
7885 if (num_threads < 0) {
7886 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7892 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7894 if (num_threads > __kmp_dflt_team_nth) {
7895 num_threads = __kmp_dflt_team_nth;
7897 if (num_teams * num_threads > __kmp_teams_max_nth) {
7898 int new_threads = __kmp_teams_max_nth / num_teams;
7899 if (new_threads == 0) {
7902 if (new_threads != num_threads) {
7903 if (!__kmp_reserve_warn) {
7904 __kmp_reserve_warn = 1;
7905 __kmp_msg(kmp_ms_warning,
7906 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7907 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7910 num_threads = new_threads;
7913 thr->th.th_teams_size.nth = num_threads;
7918 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7920 kmp_info_t *thr = __kmp_threads[gtid];
7921 if (num_teams < 0) {
7924 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7928 if (num_teams == 0) {
7929 if (__kmp_nteams > 0) {
7930 num_teams = __kmp_nteams;
7935 if (num_teams > __kmp_teams_max_nth) {
7936 if (!__kmp_reserve_warn) {
7937 __kmp_reserve_warn = 1;
7938 __kmp_msg(kmp_ms_warning,
7939 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7940 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7942 num_teams = __kmp_teams_max_nth;
7946 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7948 __kmp_push_thread_limit(thr, num_teams, num_threads);
7953 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7954 int num_teams_ub,
int num_threads) {
7955 kmp_info_t *thr = __kmp_threads[gtid];
7956 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7957 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7958 KMP_DEBUG_ASSERT(num_threads >= 0);
7960 if (num_teams_lb > num_teams_ub) {
7961 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7962 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7967 if (num_teams_lb == 0 && num_teams_ub > 0)
7968 num_teams_lb = num_teams_ub;
7970 if (num_teams_lb == 0 && num_teams_ub == 0) {
7971 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7972 if (num_teams > __kmp_teams_max_nth) {
7973 if (!__kmp_reserve_warn) {
7974 __kmp_reserve_warn = 1;
7975 __kmp_msg(kmp_ms_warning,
7976 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7977 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7979 num_teams = __kmp_teams_max_nth;
7981 }
else if (num_teams_lb == num_teams_ub) {
7982 num_teams = num_teams_ub;
7984 if (num_threads <= 0) {
7985 if (num_teams_ub > __kmp_teams_max_nth) {
7986 num_teams = num_teams_lb;
7988 num_teams = num_teams_ub;
7991 num_teams = (num_threads > __kmp_teams_max_nth)
7993 : __kmp_teams_max_nth / num_threads;
7994 if (num_teams < num_teams_lb) {
7995 num_teams = num_teams_lb;
7996 }
else if (num_teams > num_teams_ub) {
7997 num_teams = num_teams_ub;
8003 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8005 __kmp_push_thread_limit(thr, num_teams, num_threads);
8009 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8010 kmp_info_t *thr = __kmp_threads[gtid];
8011 thr->th.th_set_proc_bind = proc_bind;
8016 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8017 kmp_info_t *this_thr = __kmp_threads[gtid];
8023 KMP_DEBUG_ASSERT(team);
8024 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8025 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8028 team->t.t_construct = 0;
8029 team->t.t_ordered.dt.t_value =
8033 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8034 if (team->t.t_max_nproc > 1) {
8036 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8037 team->t.t_disp_buffer[i].buffer_index = i;
8038 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8041 team->t.t_disp_buffer[0].buffer_index = 0;
8042 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8046 KMP_ASSERT(this_thr->th.th_team == team);
8049 for (f = 0; f < team->t.t_nproc; f++) {
8050 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8051 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8056 __kmp_fork_barrier(gtid, 0);
8059 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8060 kmp_info_t *this_thr = __kmp_threads[gtid];
8062 KMP_DEBUG_ASSERT(team);
8063 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8064 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8070 if (__kmp_threads[gtid] &&
8071 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8072 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8073 __kmp_threads[gtid]);
8074 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8075 "team->t.t_nproc=%d\n",
8076 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8078 __kmp_print_structure();
8080 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8081 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8084 __kmp_join_barrier(gtid);
8086 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8087 if (ompt_enabled.enabled &&
8088 (ompt_state == ompt_state_wait_barrier_teams ||
8089 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8090 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8091 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8092 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8094 void *codeptr = NULL;
8095 if (KMP_MASTER_TID(ds_tid) &&
8096 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8097 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8098 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8100 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8101 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8102 sync_kind = ompt_sync_region_barrier_teams;
8103 if (ompt_enabled.ompt_callback_sync_region_wait) {
8104 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8105 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8107 if (ompt_enabled.ompt_callback_sync_region) {
8108 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8109 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8112 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8113 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8114 ompt_scope_end, NULL, task_data, 0, ds_tid,
8115 ompt_task_implicit);
8121 KMP_ASSERT(this_thr->th.th_team == team);
8126 #ifdef USE_LOAD_BALANCE
8130 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8133 kmp_team_t *hot_team;
8135 if (root->r.r_active) {
8138 hot_team = root->r.r_hot_team;
8139 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8140 return hot_team->t.t_nproc - 1;
8145 for (i = 1; i < hot_team->t.t_nproc; i++) {
8146 if (hot_team->t.t_threads[i]->th.th_active) {
8155 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8158 int hot_team_active;
8159 int team_curr_active;
8162 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8164 KMP_DEBUG_ASSERT(root);
8165 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8166 ->th.th_current_task->td_icvs.dynamic == TRUE);
8167 KMP_DEBUG_ASSERT(set_nproc > 1);
8169 if (set_nproc == 1) {
8170 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8179 pool_active = __kmp_thread_pool_active_nth;
8180 hot_team_active = __kmp_active_hot_team_nproc(root);
8181 team_curr_active = pool_active + hot_team_active + 1;
8184 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8185 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8186 "hot team active = %d\n",
8187 system_active, pool_active, hot_team_active));
8189 if (system_active < 0) {
8193 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8194 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8197 retval = __kmp_avail_proc - __kmp_nth +
8198 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8199 if (retval > set_nproc) {
8202 if (retval < KMP_MIN_NTH) {
8203 retval = KMP_MIN_NTH;
8206 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8214 if (system_active < team_curr_active) {
8215 system_active = team_curr_active;
8217 retval = __kmp_avail_proc - system_active + team_curr_active;
8218 if (retval > set_nproc) {
8221 if (retval < KMP_MIN_NTH) {
8222 retval = KMP_MIN_NTH;
8225 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8234 void __kmp_cleanup(
void) {
8237 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8239 if (TCR_4(__kmp_init_parallel)) {
8240 #if KMP_HANDLE_SIGNALS
8241 __kmp_remove_signals();
8243 TCW_4(__kmp_init_parallel, FALSE);
8246 if (TCR_4(__kmp_init_middle)) {
8247 #if KMP_AFFINITY_SUPPORTED
8248 __kmp_affinity_uninitialize();
8250 __kmp_cleanup_hierarchy();
8251 TCW_4(__kmp_init_middle, FALSE);
8254 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8256 if (__kmp_init_serial) {
8257 __kmp_runtime_destroy();
8258 __kmp_init_serial = FALSE;
8261 __kmp_cleanup_threadprivate_caches();
8263 for (f = 0; f < __kmp_threads_capacity; f++) {
8264 if (__kmp_root[f] != NULL) {
8265 __kmp_free(__kmp_root[f]);
8266 __kmp_root[f] = NULL;
8269 __kmp_free(__kmp_threads);
8272 __kmp_threads = NULL;
8274 __kmp_threads_capacity = 0;
8277 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8279 kmp_old_threads_list_t *next = ptr->next;
8280 __kmp_free(ptr->threads);
8284 __kmp_old_threads_list = NULL;
8286 #if KMP_USE_DYNAMIC_LOCK
8287 __kmp_cleanup_indirect_user_locks();
8289 __kmp_cleanup_user_locks();
8292 if (ompd_env_block) {
8293 __kmp_free(ompd_env_block);
8294 ompd_env_block = NULL;
8295 ompd_env_block_size = 0;
8299 #if KMP_AFFINITY_SUPPORTED
8300 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8301 __kmp_cpuinfo_file = NULL;
8304 #if KMP_USE_ADAPTIVE_LOCKS
8305 #if KMP_DEBUG_ADAPTIVE_LOCKS
8306 __kmp_print_speculative_stats();
8309 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8310 __kmp_nested_nth.nth = NULL;
8311 __kmp_nested_nth.size = 0;
8312 __kmp_nested_nth.used = 0;
8314 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8315 __kmp_nested_proc_bind.bind_types = NULL;
8316 __kmp_nested_proc_bind.size = 0;
8317 __kmp_nested_proc_bind.used = 0;
8318 __kmp_dflt_team_nth = 0;
8319 __kmp_dflt_team_nth_ub = 0;
8320 if (__kmp_affinity_format) {
8321 KMP_INTERNAL_FREE(__kmp_affinity_format);
8322 __kmp_affinity_format = NULL;
8325 __kmp_i18n_catclose();
8327 if (__kmp_nesting_nth_level)
8328 KMP_INTERNAL_FREE(__kmp_nesting_nth_level);
8330 #if KMP_USE_HIER_SCHED
8331 __kmp_hier_scheds.deallocate();
8334 #if KMP_STATS_ENABLED
8338 __kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator);
8339 __kmp_def_allocator = omp_default_mem_alloc;
8341 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8346 int __kmp_ignore_mppbeg(
void) {
8349 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8350 if (__kmp_str_match_false(env))
8357 int __kmp_ignore_mppend(
void) {
8360 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8361 if (__kmp_str_match_false(env))
8368 void __kmp_internal_begin(
void) {
8374 gtid = __kmp_entry_gtid();
8375 root = __kmp_threads[gtid]->th.th_root;
8376 KMP_ASSERT(KMP_UBER_GTID(gtid));
8378 if (root->r.r_begin)
8380 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8381 if (root->r.r_begin) {
8382 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8386 root->r.r_begin = TRUE;
8388 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8393 void __kmp_user_set_library(
enum library_type arg) {
8400 gtid = __kmp_entry_gtid();
8401 thread = __kmp_threads[gtid];
8403 root = thread->th.th_root;
8405 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8407 if (root->r.r_in_parallel) {
8409 KMP_WARNING(SetLibraryIncorrectCall);
8414 case library_serial:
8415 thread->th.th_set_nproc = 0;
8416 set__nproc(thread, 1);
8418 case library_turnaround:
8419 thread->th.th_set_nproc = 0;
8420 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8421 : __kmp_dflt_team_nth_ub);
8423 case library_throughput:
8424 thread->th.th_set_nproc = 0;
8425 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8426 : __kmp_dflt_team_nth_ub);
8429 KMP_FATAL(UnknownLibraryType, arg);
8432 __kmp_aux_set_library(arg);
8435 void __kmp_aux_set_stacksize(
size_t arg) {
8436 if (!__kmp_init_serial)
8437 __kmp_serial_initialize();
8440 if (arg & (0x1000 - 1)) {
8441 arg &= ~(0x1000 - 1);
8446 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8449 if (!TCR_4(__kmp_init_parallel)) {
8452 if (value < __kmp_sys_min_stksize)
8453 value = __kmp_sys_min_stksize;
8454 else if (value > KMP_MAX_STKSIZE)
8455 value = KMP_MAX_STKSIZE;
8457 __kmp_stksize = value;
8459 __kmp_env_stksize = TRUE;
8462 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8467 void __kmp_aux_set_library(
enum library_type arg) {
8468 __kmp_library = arg;
8470 switch (__kmp_library) {
8471 case library_serial: {
8472 KMP_INFORM(LibraryIsSerial);
8474 case library_turnaround:
8475 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8476 __kmp_use_yield = 2;
8478 case library_throughput:
8479 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8480 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8483 KMP_FATAL(UnknownLibraryType, arg);
8489 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8490 kmp_info_t *thr = __kmp_entry_thread();
8491 teams_serialized = 0;
8492 if (thr->th.th_teams_microtask) {
8493 kmp_team_t *team = thr->th.th_team;
8494 int tlevel = thr->th.th_teams_level;
8495 int ii = team->t.t_level;
8496 teams_serialized = team->t.t_serialized;
8497 int level = tlevel + 1;
8498 KMP_DEBUG_ASSERT(ii >= tlevel);
8499 while (ii > level) {
8500 for (teams_serialized = team->t.t_serialized;
8501 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8503 if (team->t.t_serialized && (!teams_serialized)) {
8504 team = team->t.t_parent;
8508 team = team->t.t_parent;
8517 int __kmp_aux_get_team_num() {
8519 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8521 if (serialized > 1) {
8524 return team->t.t_master_tid;
8530 int __kmp_aux_get_num_teams() {
8532 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8534 if (serialized > 1) {
8537 return team->t.t_parent->t.t_nproc;
8576 typedef struct kmp_affinity_format_field_t {
8578 const char *long_name;
8581 } kmp_affinity_format_field_t;
8583 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8584 #if KMP_AFFINITY_SUPPORTED
8585 {
'A',
"thread_affinity",
's'},
8587 {
't',
"team_num",
'd'},
8588 {
'T',
"num_teams",
'd'},
8589 {
'L',
"nesting_level",
'd'},
8590 {
'n',
"thread_num",
'd'},
8591 {
'N',
"num_threads",
'd'},
8592 {
'a',
"ancestor_tnum",
'd'},
8594 {
'P',
"process_id",
'd'},
8595 {
'i',
"native_thread_id",
'd'}};
8598 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8600 kmp_str_buf_t *field_buffer) {
8601 int rc, format_index, field_value;
8602 const char *width_left, *width_right;
8603 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8604 static const int FORMAT_SIZE = 20;
8605 char format[FORMAT_SIZE] = {0};
8606 char absolute_short_name = 0;
8608 KMP_DEBUG_ASSERT(gtid >= 0);
8609 KMP_DEBUG_ASSERT(th);
8610 KMP_DEBUG_ASSERT(**ptr ==
'%');
8611 KMP_DEBUG_ASSERT(field_buffer);
8613 __kmp_str_buf_clear(field_buffer);
8620 __kmp_str_buf_cat(field_buffer,
"%", 1);
8631 right_justify =
false;
8633 right_justify =
true;
8637 width_left = width_right = NULL;
8638 if (**ptr >=
'0' && **ptr <=
'9') {
8646 format[format_index++] =
'%';
8648 format[format_index++] =
'-';
8650 format[format_index++] =
'0';
8651 if (width_left && width_right) {
8655 while (i < 8 && width_left < width_right) {
8656 format[format_index++] = *width_left;
8664 found_valid_name =
false;
8665 parse_long_name = (**ptr ==
'{');
8666 if (parse_long_name)
8668 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8669 sizeof(__kmp_affinity_format_table[0]);
8671 char short_name = __kmp_affinity_format_table[i].short_name;
8672 const char *long_name = __kmp_affinity_format_table[i].long_name;
8673 char field_format = __kmp_affinity_format_table[i].field_format;
8674 if (parse_long_name) {
8675 size_t length = KMP_STRLEN(long_name);
8676 if (strncmp(*ptr, long_name, length) == 0) {
8677 found_valid_name =
true;
8680 }
else if (**ptr == short_name) {
8681 found_valid_name =
true;
8684 if (found_valid_name) {
8685 format[format_index++] = field_format;
8686 format[format_index++] =
'\0';
8687 absolute_short_name = short_name;
8691 if (parse_long_name) {
8693 absolute_short_name = 0;
8701 switch (absolute_short_name) {
8703 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8706 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8709 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8712 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8715 static const int BUFFER_SIZE = 256;
8716 char buf[BUFFER_SIZE];
8717 __kmp_expand_host_name(buf, BUFFER_SIZE);
8718 rc = __kmp_str_buf_print(field_buffer, format, buf);
8721 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8724 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8727 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8731 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8732 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8734 #if KMP_AFFINITY_SUPPORTED
8736 if (th->th.th_affin_mask) {
8738 __kmp_str_buf_init(&buf);
8739 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8740 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8741 __kmp_str_buf_free(&buf);
8743 rc = __kmp_str_buf_print(field_buffer,
"%s",
"disabled");
8750 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8752 if (parse_long_name) {
8761 KMP_ASSERT(format_index <= FORMAT_SIZE);
8771 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8772 kmp_str_buf_t *buffer) {
8773 const char *parse_ptr;
8775 const kmp_info_t *th;
8776 kmp_str_buf_t field;
8778 KMP_DEBUG_ASSERT(buffer);
8779 KMP_DEBUG_ASSERT(gtid >= 0);
8781 __kmp_str_buf_init(&field);
8782 __kmp_str_buf_clear(buffer);
8784 th = __kmp_threads[gtid];
8790 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8791 parse_ptr = __kmp_affinity_format;
8793 KMP_DEBUG_ASSERT(parse_ptr);
8795 while (*parse_ptr !=
'\0') {
8797 if (*parse_ptr ==
'%') {
8799 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8800 __kmp_str_buf_catbuf(buffer, &field);
8804 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8809 __kmp_str_buf_free(&field);
8814 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8816 __kmp_str_buf_init(&buf);
8817 __kmp_aux_capture_affinity(gtid, format, &buf);
8818 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8819 __kmp_str_buf_free(&buf);
8823 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8824 int blocktime = arg;
8830 __kmp_save_internal_controls(thread);
8833 if (blocktime < KMP_MIN_BLOCKTIME)
8834 blocktime = KMP_MIN_BLOCKTIME;
8835 else if (blocktime > KMP_MAX_BLOCKTIME)
8836 blocktime = KMP_MAX_BLOCKTIME;
8838 set__blocktime_team(thread->th.th_team, tid, blocktime);
8839 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8843 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8845 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8846 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8852 set__bt_set_team(thread->th.th_team, tid, bt_set);
8853 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8855 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8856 "bt_intervals=%d, monitor_updates=%d\n",
8857 __kmp_gtid_from_tid(tid, thread->th.th_team),
8858 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8859 __kmp_monitor_wakeups));
8861 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8862 __kmp_gtid_from_tid(tid, thread->th.th_team),
8863 thread->th.th_team->t.t_id, tid, blocktime));
8867 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8868 if (!__kmp_init_serial) {
8869 __kmp_serial_initialize();
8871 __kmp_env_initialize(str);
8873 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8881 PACKED_REDUCTION_METHOD_T
8882 __kmp_determine_reduction_method(
8883 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8884 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8885 kmp_critical_name *lck) {
8896 PACKED_REDUCTION_METHOD_T retval;
8900 KMP_DEBUG_ASSERT(lck);
8902 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8904 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8905 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8907 retval = critical_reduce_block;
8910 team_size = __kmp_get_team_num_threads(global_tid);
8911 if (team_size == 1) {
8913 retval = empty_reduce_block;
8917 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8919 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8920 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8921 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8923 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8924 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
8925 KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8927 int teamsize_cutoff = 4;
8929 #if KMP_MIC_SUPPORTED
8930 if (__kmp_mic_type != non_mic) {
8931 teamsize_cutoff = 8;
8934 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8935 if (tree_available) {
8936 if (team_size <= teamsize_cutoff) {
8937 if (atomic_available) {
8938 retval = atomic_reduce_block;
8941 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8943 }
else if (atomic_available) {
8944 retval = atomic_reduce_block;
8947 #error "Unknown or unsupported OS"
8952 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8953 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC
8955 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8956 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HAIKU || KMP_OS_HURD || \
8957 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8961 if (atomic_available) {
8962 if (num_vars <= 2) {
8963 retval = atomic_reduce_block;
8969 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8970 if (atomic_available && (num_vars <= 3)) {
8971 retval = atomic_reduce_block;
8972 }
else if (tree_available) {
8973 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8974 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8975 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8980 #error "Unknown or unsupported OS"
8984 #error "Unknown or unsupported architecture"
8992 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8995 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8997 int atomic_available, tree_available;
8999 switch ((forced_retval = __kmp_force_reduction_method)) {
9000 case critical_reduce_block:
9004 case atomic_reduce_block:
9005 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9006 if (!atomic_available) {
9007 KMP_WARNING(RedMethodNotSupported,
"atomic");
9008 forced_retval = critical_reduce_block;
9012 case tree_reduce_block:
9013 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9014 if (!tree_available) {
9015 KMP_WARNING(RedMethodNotSupported,
"tree");
9016 forced_retval = critical_reduce_block;
9018 #if KMP_FAST_REDUCTION_BARRIER
9019 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9028 retval = forced_retval;
9031 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9033 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
9034 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9039 kmp_int32 __kmp_get_reduce_method(
void) {
9040 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9045 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9049 void __kmp_hard_pause() {
9050 __kmp_pause_status = kmp_hard_paused;
9051 __kmp_internal_end_thread(-1);
9055 void __kmp_resume_if_soft_paused() {
9056 if (__kmp_pause_status == kmp_soft_paused) {
9057 __kmp_pause_status = kmp_not_paused;
9059 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9060 kmp_info_t *thread = __kmp_threads[gtid];
9062 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9064 if (fl.is_sleeping())
9066 else if (__kmp_try_suspend_mx(thread)) {
9067 __kmp_unlock_suspend_mx(thread);
9070 if (fl.is_sleeping()) {
9073 }
else if (__kmp_try_suspend_mx(thread)) {
9074 __kmp_unlock_suspend_mx(thread);
9086 int __kmp_pause_resource(kmp_pause_status_t level) {
9087 if (level == kmp_not_paused) {
9088 if (__kmp_pause_status == kmp_not_paused) {
9092 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9093 __kmp_pause_status == kmp_hard_paused);
9094 __kmp_pause_status = kmp_not_paused;
9097 }
else if (level == kmp_soft_paused) {
9098 if (__kmp_pause_status != kmp_not_paused) {
9105 }
else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
9107 if (__kmp_pause_status != kmp_not_paused) {
9120 void __kmp_omp_display_env(
int verbose) {
9121 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9122 if (__kmp_init_serial == 0)
9123 __kmp_do_serial_initialize();
9124 __kmp_display_env_impl(!verbose, verbose);
9125 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9129 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9131 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9133 kmp_info_t **other_threads = team->t.t_threads;
9137 for (
int f = 1; f < old_nthreads; ++f) {
9138 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9140 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9146 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9147 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9151 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9153 team->t.t_threads[f]->th.th_used_in_team.store(2);
9154 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9157 team->t.b->go_release();
9163 int count = old_nthreads - 1;
9165 count = old_nthreads - 1;
9166 for (
int f = 1; f < old_nthreads; ++f) {
9167 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9168 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9169 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9170 void *, other_threads[f]->th.th_sleep_loc);
9171 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9174 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9180 team->t.b->update_num_threads(new_nthreads);
9181 team->t.b->go_reset();
9184 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9186 KMP_DEBUG_ASSERT(team);
9192 for (
int f = 1; f < new_nthreads; ++f) {
9193 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9194 (void)KMP_COMPARE_AND_STORE_ACQ32(
9195 &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
9196 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9197 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9198 (kmp_flag_32<false, false> *)NULL);
9204 int count = new_nthreads - 1;
9206 count = new_nthreads - 1;
9207 for (
int f = 1; f < new_nthreads; ++f) {
9208 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9216 kmp_info_t **__kmp_hidden_helper_threads;
9217 kmp_info_t *__kmp_hidden_helper_main_thread;
9218 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9220 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9221 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9223 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9224 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9228 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9230 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9235 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9236 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9237 __kmp_hidden_helper_threads_num)
9243 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9244 __kmp_hidden_helper_initz_release();
9245 __kmp_hidden_helper_main_thread_wait();
9247 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9248 __kmp_hidden_helper_worker_thread_signal();
9254 void __kmp_hidden_helper_threads_initz_routine() {
9256 const int gtid = __kmp_register_root(TRUE);
9257 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9258 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9259 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9260 __kmp_hidden_helper_threads_num;
9262 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9267 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9269 __kmp_hidden_helper_threads_deinitz_release();
9289 void __kmp_init_nesting_mode() {
9290 int levels = KMP_HW_LAST;
9291 __kmp_nesting_mode_nlevels = levels;
9292 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9293 for (
int i = 0; i < levels; ++i)
9294 __kmp_nesting_nth_level[i] = 0;
9295 if (__kmp_nested_nth.size < levels) {
9296 __kmp_nested_nth.nth =
9297 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9298 __kmp_nested_nth.size = levels;
9303 void __kmp_set_nesting_mode_threads() {
9304 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9306 if (__kmp_nesting_mode == 1)
9307 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9308 else if (__kmp_nesting_mode > 1)
9309 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9311 if (__kmp_topology) {
9313 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9314 loc < __kmp_nesting_mode_nlevels;
9315 loc++, hw_level++) {
9316 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9317 if (__kmp_nesting_nth_level[loc] == 1)
9321 if (__kmp_nesting_mode > 1 && loc > 1) {
9322 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9323 int num_cores = __kmp_topology->get_count(core_level);
9324 int upper_levels = 1;
9325 for (
int level = 0; level < loc - 1; ++level)
9326 upper_levels *= __kmp_nesting_nth_level[level];
9327 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9328 __kmp_nesting_nth_level[loc - 1] =
9329 num_cores / __kmp_nesting_nth_level[loc - 2];
9331 __kmp_nesting_mode_nlevels = loc;
9332 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9334 if (__kmp_avail_proc >= 4) {
9335 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9336 __kmp_nesting_nth_level[1] = 2;
9337 __kmp_nesting_mode_nlevels = 2;
9339 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9340 __kmp_nesting_mode_nlevels = 1;
9342 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9344 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9345 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9347 set__nproc(thread, __kmp_nesting_nth_level[0]);
9348 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9349 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9350 if (get__max_active_levels(thread) > 1) {
9352 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9354 if (__kmp_nesting_mode == 1)
9355 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9360 #if !KMP_STATS_ENABLED
9361 void __kmp_reset_stats() {}
9364 int __kmp_omp_debug_struct_info = FALSE;
9365 int __kmp_debugging = FALSE;
9367 #if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9368 void __kmp_itt_fini_ittlib() {}
9369 void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)