14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #include "kmp_utils.h"
28 #if KMP_USE_HIER_SCHED
29 #include "kmp_dispatch_hier.h"
33 #include "ompt-specific.h"
36 #include "ompd-specific.h"
39 #if OMP_PROFILING_SUPPORT
40 #include "llvm/Support/TimeProfiler.h"
41 static char *ProfileTraceFile =
nullptr;
45 #define KMP_USE_PRCTL 0
61 #if defined(KMP_GOMP_COMPAT)
62 char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66 char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70 char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79 kmp_info_t __kmp_monitor;
84 void __kmp_cleanup(
void);
86 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91 #if KMP_AFFINITY_SUPPORTED
92 static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95 static void __kmp_do_serial_initialize(
void);
96 void __kmp_fork_barrier(
int gtid,
int tid);
97 void __kmp_join_barrier(
int gtid);
98 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101 #ifdef USE_LOAD_BALANCE
102 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105 static int __kmp_expand_threads(
int nNeed);
107 static int __kmp_unregister_root_other_thread(
int gtid);
109 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
116 static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
118 kmp_nested_nthreads_t *new_nested_nth =
119 (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
120 sizeof(kmp_nested_nthreads_t));
121 int new_size = level + thr->th.th_set_nested_nth_sz;
122 new_nested_nth->nth = (
int *)KMP_INTERNAL_MALLOC(new_size *
sizeof(
int));
123 for (
int i = 0; i < level + 1; ++i)
124 new_nested_nth->nth[i] = 0;
125 for (
int i = level + 1, j = 1; i < new_size; ++i, ++j)
126 new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
127 new_nested_nth->size = new_nested_nth->used = new_size;
128 return new_nested_nth;
134 int __kmp_get_global_thread_id() {
136 kmp_info_t **other_threads;
144 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
145 __kmp_nth, __kmp_all_nth));
152 if (!TCR_4(__kmp_init_gtid))
155 #ifdef KMP_TDATA_GTID
156 if (TCR_4(__kmp_gtid_mode) >= 3) {
157 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
161 if (TCR_4(__kmp_gtid_mode) >= 2) {
162 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
163 return __kmp_gtid_get_specific();
165 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
167 stack_addr = (
char *)&stack_data;
168 other_threads = __kmp_threads;
181 for (i = 0; i < __kmp_threads_capacity; i++) {
183 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
187 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
188 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
192 if (stack_addr <= stack_base) {
193 size_t stack_diff = stack_base - stack_addr;
195 if (stack_diff <= stack_size) {
202 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
203 __kmp_gtid_get_specific() == i);
211 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
212 "thread, using TLS\n"));
213 i = __kmp_gtid_get_specific();
224 if (!TCR_SYNC_PTR(other_threads[i]))
229 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
230 KMP_FATAL(StackOverflow, i);
233 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
234 if (stack_addr > stack_base) {
235 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
236 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
237 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
240 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
241 stack_base - stack_addr);
245 if (__kmp_storage_map) {
246 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
247 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
248 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
249 other_threads[i]->th.th_info.ds.ds_stacksize,
250 "th_%d stack (refinement)", i);
255 int __kmp_get_global_thread_id_reg() {
258 if (!__kmp_init_serial) {
261 #ifdef KMP_TDATA_GTID
262 if (TCR_4(__kmp_gtid_mode) >= 3) {
263 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
267 if (TCR_4(__kmp_gtid_mode) >= 2) {
268 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
269 gtid = __kmp_gtid_get_specific();
272 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
273 gtid = __kmp_get_global_thread_id();
277 if (gtid == KMP_GTID_DNE) {
279 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
280 "Registering a new gtid.\n"));
281 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
282 if (!__kmp_init_serial) {
283 __kmp_do_serial_initialize();
284 gtid = __kmp_gtid_get_specific();
286 gtid = __kmp_register_root(FALSE);
288 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
292 KMP_DEBUG_ASSERT(gtid >= 0);
298 void __kmp_check_stack_overlap(kmp_info_t *th) {
300 char *stack_beg = NULL;
301 char *stack_end = NULL;
304 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
305 if (__kmp_storage_map) {
306 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
307 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
309 gtid = __kmp_gtid_from_thread(th);
311 if (gtid == KMP_GTID_MONITOR) {
312 __kmp_print_storage_map_gtid(
313 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
314 "th_%s stack (%s)",
"mon",
315 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
317 __kmp_print_storage_map_gtid(
318 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
319 "th_%d stack (%s)", gtid,
320 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
326 gtid = __kmp_gtid_from_thread(th);
327 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
329 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
330 if (stack_beg == NULL) {
331 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
332 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
335 for (f = 0; f < __kmp_threads_capacity; f++) {
336 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
338 if (f_th && f_th != th) {
339 char *other_stack_end =
340 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
341 char *other_stack_beg =
342 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
343 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
344 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
347 if (__kmp_storage_map)
348 __kmp_print_storage_map_gtid(
349 -1, other_stack_beg, other_stack_end,
350 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
351 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
353 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
359 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
364 void __kmp_infinite_loop(
void) {
365 static int done = FALSE;
372 #define MAX_MESSAGE 512
374 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
375 char const *format, ...) {
376 char buffer[MAX_MESSAGE];
379 va_start(ap, format);
380 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
381 p2, (
unsigned long)size, format);
382 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
383 __kmp_vprintf(kmp_err, buffer, ap);
384 #if KMP_PRINT_DATA_PLACEMENT
387 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
388 if (__kmp_storage_map_verbose) {
389 node = __kmp_get_host_node(p1);
391 __kmp_storage_map_verbose = FALSE;
395 int localProc = __kmp_get_cpu_from_gtid(gtid);
397 const int page_size = KMP_GET_PAGE_SIZE();
399 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
400 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
402 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
405 __kmp_printf_no_lock(
" GTID %d\n", gtid);
414 (
char *)p1 += page_size;
415 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
416 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
420 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
421 (
char *)p1 + (page_size - 1),
422 __kmp_get_host_node(p1));
424 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
425 (
char *)p2 + (page_size - 1),
426 __kmp_get_host_node(p2));
432 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
435 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
440 void __kmp_warn(
char const *format, ...) {
441 char buffer[MAX_MESSAGE];
444 if (__kmp_generate_warnings == kmp_warnings_off) {
448 va_start(ap, format);
450 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
451 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
452 __kmp_vprintf(kmp_err, buffer, ap);
453 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
458 void __kmp_abort_process() {
460 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
462 if (__kmp_debug_buf) {
463 __kmp_dump_debug_buffer();
469 __kmp_global.g.g_abort = SIGABRT;
483 __kmp_unregister_library();
487 __kmp_infinite_loop();
488 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
492 void __kmp_abort_thread(
void) {
495 __kmp_infinite_loop();
501 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
502 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
506 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
508 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
509 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
511 __kmp_print_storage_map_gtid(
512 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
513 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
515 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
516 &thr->th.th_bar[bs_plain_barrier + 1],
517 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
520 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
521 &thr->th.th_bar[bs_forkjoin_barrier + 1],
522 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
525 #if KMP_FAST_REDUCTION_BARRIER
526 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
527 &thr->th.th_bar[bs_reduction_barrier + 1],
528 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
536 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
537 int team_id,
int num_thr) {
538 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
539 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
542 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
543 &team->t.t_bar[bs_last_barrier],
544 sizeof(kmp_balign_team_t) * bs_last_barrier,
545 "%s_%d.t_bar", header, team_id);
547 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
548 &team->t.t_bar[bs_plain_barrier + 1],
549 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
552 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
553 &team->t.t_bar[bs_forkjoin_barrier + 1],
554 sizeof(kmp_balign_team_t),
555 "%s_%d.t_bar[forkjoin]", header, team_id);
557 #if KMP_FAST_REDUCTION_BARRIER
558 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
559 &team->t.t_bar[bs_reduction_barrier + 1],
560 sizeof(kmp_balign_team_t),
561 "%s_%d.t_bar[reduction]", header, team_id);
564 __kmp_print_storage_map_gtid(
565 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
566 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
568 __kmp_print_storage_map_gtid(
569 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
570 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
572 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
573 &team->t.t_disp_buffer[num_disp_buff],
574 sizeof(dispatch_shared_info_t) * num_disp_buff,
575 "%s_%d.t_disp_buffer", header, team_id);
578 static void __kmp_init_allocator() {
579 __kmp_init_memkind();
580 __kmp_init_target_mem();
582 static void __kmp_fini_allocator() {
583 __kmp_fini_target_mem();
584 __kmp_fini_memkind();
589 #if ENABLE_LIBOMPTARGET
590 static void __kmp_init_omptarget() {
591 __kmp_init_target_task();
600 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
605 case DLL_PROCESS_ATTACH:
606 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
610 case DLL_PROCESS_DETACH:
611 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
624 if (lpReserved == NULL)
625 __kmp_internal_end_library(__kmp_gtid_get_specific());
629 case DLL_THREAD_ATTACH:
630 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
636 case DLL_THREAD_DETACH:
637 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
639 __kmp_internal_end_thread(__kmp_gtid_get_specific());
650 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
651 int gtid = *gtid_ref;
652 #ifdef BUILD_PARALLEL_ORDERED
653 kmp_team_t *team = __kmp_team_from_gtid(gtid);
656 if (__kmp_env_consistency_check) {
657 if (__kmp_threads[gtid]->th.th_root->r.r_active)
658 #if KMP_USE_DYNAMIC_LOCK
659 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
661 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
664 #ifdef BUILD_PARALLEL_ORDERED
665 if (!team->t.t_serialized) {
667 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
675 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
676 int gtid = *gtid_ref;
677 #ifdef BUILD_PARALLEL_ORDERED
678 int tid = __kmp_tid_from_gtid(gtid);
679 kmp_team_t *team = __kmp_team_from_gtid(gtid);
682 if (__kmp_env_consistency_check) {
683 if (__kmp_threads[gtid]->th.th_root->r.r_active)
684 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
686 #ifdef BUILD_PARALLEL_ORDERED
687 if (!team->t.t_serialized) {
692 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
702 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
707 if (!TCR_4(__kmp_init_parallel))
708 __kmp_parallel_initialize();
709 __kmp_resume_if_soft_paused();
711 th = __kmp_threads[gtid];
712 team = th->th.th_team;
715 th->th.th_ident = id_ref;
717 if (team->t.t_serialized) {
720 kmp_int32 old_this = th->th.th_local.this_construct;
722 ++th->th.th_local.this_construct;
726 if (team->t.t_construct == old_this) {
727 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
728 th->th.th_local.this_construct);
731 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
732 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
733 team->t.t_active_level == 1) {
735 __kmp_itt_metadata_single(id_ref);
740 if (__kmp_env_consistency_check) {
741 if (status && push_ws) {
742 __kmp_push_workshare(gtid, ct_psingle, id_ref);
744 __kmp_check_workshare(gtid, ct_psingle, id_ref);
749 __kmp_itt_single_start(gtid);
755 void __kmp_exit_single(
int gtid) {
757 __kmp_itt_single_end(gtid);
759 if (__kmp_env_consistency_check)
760 __kmp_pop_workshare(gtid, ct_psingle, NULL);
769 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
770 int master_tid,
int set_nthreads,
774 KMP_DEBUG_ASSERT(__kmp_init_serial);
775 KMP_DEBUG_ASSERT(root && parent_team);
776 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
780 new_nthreads = set_nthreads;
781 if (!get__dynamic_2(parent_team, master_tid)) {
784 #ifdef USE_LOAD_BALANCE
785 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
786 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
787 if (new_nthreads == 1) {
788 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
789 "reservation to 1 thread\n",
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
795 "reservation to %d threads\n",
796 master_tid, new_nthreads));
800 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
801 new_nthreads = __kmp_avail_proc - __kmp_nth +
802 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
803 if (new_nthreads <= 1) {
804 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
805 "reservation to 1 thread\n",
809 if (new_nthreads < set_nthreads) {
810 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
811 "reservation to %d threads\n",
812 master_tid, new_nthreads));
814 new_nthreads = set_nthreads;
816 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
817 if (set_nthreads > 2) {
818 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
819 new_nthreads = (new_nthreads % set_nthreads) + 1;
820 if (new_nthreads == 1) {
821 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
822 "reservation to 1 thread\n",
826 if (new_nthreads < set_nthreads) {
827 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
828 "reservation to %d threads\n",
829 master_tid, new_nthreads));
837 if (__kmp_nth + new_nthreads -
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
840 int tl_nthreads = __kmp_max_nth - __kmp_nth +
841 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
842 if (tl_nthreads <= 0) {
847 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
848 __kmp_reserve_warn = 1;
849 __kmp_msg(kmp_ms_warning,
850 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
851 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
853 if (tl_nthreads == 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
855 "reduced reservation to 1 thread\n",
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
860 "reservation to %d threads\n",
861 master_tid, tl_nthreads));
862 new_nthreads = tl_nthreads;
866 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
867 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
868 if (cg_nthreads + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
871 int tl_nthreads = max_cg_threads - cg_nthreads +
872 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
873 if (tl_nthreads <= 0) {
878 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
879 __kmp_reserve_warn = 1;
880 __kmp_msg(kmp_ms_warning,
881 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
882 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
884 if (tl_nthreads == 1) {
885 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
886 "reduced reservation to 1 thread\n",
890 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
891 "reservation to %d threads\n",
892 master_tid, tl_nthreads));
893 new_nthreads = tl_nthreads;
899 capacity = __kmp_threads_capacity;
900 if (TCR_PTR(__kmp_threads[0]) == NULL) {
906 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
907 capacity -= __kmp_hidden_helper_threads_num;
909 if (__kmp_nth + new_nthreads -
910 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
913 int slotsRequired = __kmp_nth + new_nthreads -
914 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
916 int slotsAdded = __kmp_expand_threads(slotsRequired);
917 if (slotsAdded < slotsRequired) {
919 new_nthreads -= (slotsRequired - slotsAdded);
920 KMP_ASSERT(new_nthreads >= 1);
923 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
924 __kmp_reserve_warn = 1;
925 if (__kmp_tp_cached) {
926 __kmp_msg(kmp_ms_warning,
927 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
928 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
929 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
931 __kmp_msg(kmp_ms_warning,
932 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
933 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
940 if (new_nthreads == 1) {
942 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
943 "dead roots and rechecking; requested %d threads\n",
944 __kmp_get_gtid(), set_nthreads));
946 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
948 __kmp_get_gtid(), new_nthreads, set_nthreads));
952 if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
953 __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
954 this_thr->th.th_nt_msg);
962 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
963 kmp_info_t *master_th,
int master_gtid,
964 int fork_teams_workers) {
968 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
969 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
973 master_th->th.th_info.ds.ds_tid = 0;
974 master_th->th.th_team = team;
975 master_th->th.th_team_nproc = team->t.t_nproc;
976 master_th->th.th_team_master = master_th;
977 master_th->th.th_team_serialized = FALSE;
978 master_th->th.th_dispatch = &team->t.t_dispatch[0];
981 #if KMP_NESTED_HOT_TEAMS
983 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
986 int level = team->t.t_active_level - 1;
987 if (master_th->th.th_teams_microtask) {
988 if (master_th->th.th_teams_size.nteams > 1) {
992 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
993 master_th->th.th_teams_level == team->t.t_level) {
998 if (level < __kmp_hot_teams_max_level) {
999 if (hot_teams[level].hot_team) {
1001 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1005 hot_teams[level].hot_team = team;
1006 hot_teams[level].hot_team_nth = team->t.t_nproc;
1013 use_hot_team = team == root->r.r_hot_team;
1015 if (!use_hot_team) {
1018 team->t.t_threads[0] = master_th;
1019 __kmp_initialize_info(master_th, team, 0, master_gtid);
1022 for (i = 1; i < team->t.t_nproc; i++) {
1025 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1026 team->t.t_threads[i] = thr;
1027 KMP_DEBUG_ASSERT(thr);
1028 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1030 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1031 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1032 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1033 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1034 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1035 team->t.t_bar[bs_plain_barrier].b_arrived));
1036 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1037 thr->th.th_teams_level = master_th->th.th_teams_level;
1038 thr->th.th_teams_size = master_th->th.th_teams_size;
1041 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1042 for (b = 0; b < bs_last_barrier; ++b) {
1043 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1044 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1046 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1052 #if KMP_AFFINITY_SUPPORTED
1056 if (!fork_teams_workers) {
1057 __kmp_partition_places(team);
1061 if (team->t.t_nproc > 1 &&
1062 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1063 team->t.b->update_num_threads(team->t.t_nproc);
1064 __kmp_add_threads_to_team(team, team->t.t_nproc);
1069 if (__kmp_tasking_mode != tskm_immediate_exec) {
1071 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);
1074 (
"__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "
1075 "%p, new task_team %p / team %p\n",
1076 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
1077 team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],
1081 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1082 master_th->th.th_task_state);
1086 if (team->t.t_nproc > 1) {
1087 KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||
1088 team->t.t_threads[1]->th.th_task_state == 1);
1089 KMP_CHECK_UPDATE(master_th->th.th_task_state,
1090 team->t.t_threads[1]->th.th_task_state);
1092 master_th->th.th_task_state = 0;
1096 KMP_CHECK_UPDATE(team->t.t_primary_task_state,
1097 master_th->th.th_task_state);
1099 master_th->th.th_task_state = 0;
1103 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1104 for (i = 0; i < team->t.t_nproc; i++) {
1105 kmp_info_t *thr = team->t.t_threads[i];
1106 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1107 thr->th.th_prev_level != team->t.t_level) {
1108 team->t.t_display_affinity = 1;
1117 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1121 inline static void propagateFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control) {
1123 kmp_int16 x87_fpu_control_word;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
1140 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1141 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1144 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1148 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1154 inline static void updateHWFPControl(kmp_team_t *team) {
1155 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1158 kmp_int16 x87_fpu_control_word;
1160 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1161 __kmp_store_mxcsr(&mxcsr);
1162 mxcsr &= KMP_X86_MXCSR_MASK;
1164 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1165 __kmp_clear_x87_fpu_status_word();
1166 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1169 if (team->t.t_mxcsr != mxcsr) {
1170 __kmp_load_mxcsr(&team->t.t_mxcsr);
1175 #define propagateFPControl(x) ((void)0)
1176 #define updateHWFPControl(x) ((void)0)
1179 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1184 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1185 kmp_info_t *this_thr;
1186 kmp_team_t *serial_team;
1188 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1195 if (!TCR_4(__kmp_init_parallel))
1196 __kmp_parallel_initialize();
1197 __kmp_resume_if_soft_paused();
1199 this_thr = __kmp_threads[global_tid];
1200 serial_team = this_thr->th.th_serial_team;
1203 KMP_DEBUG_ASSERT(serial_team);
1206 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1207 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1208 proc_bind = proc_bind_false;
1209 }
else if (proc_bind == proc_bind_default) {
1212 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1215 this_thr->th.th_set_proc_bind = proc_bind_default;
1218 this_thr->th.th_set_nproc = 0;
1221 ompt_data_t ompt_parallel_data = ompt_data_none;
1222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1223 if (ompt_enabled.enabled &&
1224 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1226 ompt_task_info_t *parent_task_info;
1227 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1229 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1230 if (ompt_enabled.ompt_callback_parallel_begin) {
1233 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1234 &(parent_task_info->task_data), &(parent_task_info->frame),
1235 &ompt_parallel_data, team_size,
1236 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1241 if (this_thr->th.th_team != serial_team) {
1243 int level = this_thr->th.th_team->t.t_level;
1245 if (serial_team->t.t_serialized) {
1248 kmp_team_t *new_team;
1250 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1253 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1257 proc_bind, &this_thr->th.th_current_task->td_icvs,
1258 0 USE_NESTED_HOT_ARG(NULL));
1259 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1260 KMP_ASSERT(new_team);
1263 new_team->t.t_threads[0] = this_thr;
1264 new_team->t.t_parent = this_thr->th.th_team;
1265 serial_team = new_team;
1266 this_thr->th.th_serial_team = serial_team;
1270 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1271 global_tid, serial_team));
1279 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1280 global_tid, serial_team));
1284 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1285 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1286 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1287 serial_team->t.t_ident = loc;
1288 serial_team->t.t_serialized = 1;
1289 serial_team->t.t_nproc = 1;
1290 serial_team->t.t_parent = this_thr->th.th_team;
1291 if (this_thr->th.th_team->t.t_nested_nth)
1292 serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
1294 serial_team->t.t_nested_nth = &__kmp_nested_nth;
1296 serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
1297 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1298 this_thr->th.th_team = serial_team;
1299 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1301 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1302 this_thr->th.th_current_task));
1303 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1304 this_thr->th.th_current_task->td_flags.executing = 0;
1306 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1311 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1312 &this_thr->th.th_current_task->td_parent->td_icvs);
1316 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1317 if (this_thr->th.th_team->t.t_nested_nth)
1318 nested_nth = this_thr->th.th_team->t.t_nested_nth;
1319 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1320 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1323 if (__kmp_nested_proc_bind.used &&
1324 (level + 1 < __kmp_nested_proc_bind.used)) {
1325 this_thr->th.th_current_task->td_icvs.proc_bind =
1326 __kmp_nested_proc_bind.bind_types[level + 1];
1330 serial_team->t.t_pkfn = (microtask_t)(~0);
1332 this_thr->th.th_info.ds.ds_tid = 0;
1335 this_thr->th.th_team_nproc = 1;
1336 this_thr->th.th_team_master = this_thr;
1337 this_thr->th.th_team_serialized = 1;
1338 this_thr->th.th_task_team = NULL;
1339 this_thr->th.th_task_state = 0;
1341 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1342 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1343 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1345 propagateFPControl(serial_team);
1348 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1349 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1350 serial_team->t.t_dispatch->th_disp_buffer =
1351 (dispatch_private_info_t *)__kmp_allocate(
1352 sizeof(dispatch_private_info_t));
1354 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1361 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1362 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1363 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1364 ++serial_team->t.t_serialized;
1365 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1368 int level = this_thr->th.th_team->t.t_level;
1372 kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
1373 if (serial_team->t.t_nested_nth)
1374 nested_nth = serial_team->t.t_nested_nth;
1375 if (nested_nth->used && (level + 1 < nested_nth->used)) {
1376 this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
1379 serial_team->t.t_level++;
1380 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1381 "of serial team %p to %d\n",
1382 global_tid, serial_team, serial_team->t.t_level));
1385 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1387 dispatch_private_info_t *disp_buffer =
1388 (dispatch_private_info_t *)__kmp_allocate(
1389 sizeof(dispatch_private_info_t));
1390 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1391 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1393 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1396 __kmp_push_task_team_node(this_thr, serial_team);
1400 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1404 if (__kmp_display_affinity) {
1405 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1406 this_thr->th.th_prev_num_threads != 1) {
1408 __kmp_aux_display_affinity(global_tid, NULL);
1409 this_thr->th.th_prev_level = serial_team->t.t_level;
1410 this_thr->th.th_prev_num_threads = 1;
1414 if (__kmp_env_consistency_check)
1415 __kmp_push_parallel(global_tid, NULL);
1417 serial_team->t.ompt_team_info.master_return_address = codeptr;
1418 if (ompt_enabled.enabled &&
1419 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1420 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1421 OMPT_GET_FRAME_ADDRESS(0);
1423 ompt_lw_taskteam_t lw_taskteam;
1424 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1425 &ompt_parallel_data, codeptr);
1427 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1431 if (ompt_enabled.ompt_callback_implicit_task) {
1432 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1433 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1434 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1435 ompt_task_implicit);
1436 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1437 __kmp_tid_from_gtid(global_tid);
1441 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1442 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1443 OMPT_GET_FRAME_ADDRESS(0);
1449 static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1450 microtask_t microtask,
int level,
1451 int teams_level, kmp_va_list ap) {
1452 return (master_th->th.th_teams_microtask && ap &&
1453 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1458 static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1459 int teams_level, kmp_va_list ap) {
1460 return ((ap == NULL && active_level == 0) ||
1461 (ap && teams_level > 0 && teams_level == level));
1468 __kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1469 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1470 enum fork_context_e call_context, microtask_t microtask,
1471 launch_t invoker,
int master_set_numthreads,
int level,
1473 ompt_data_t ompt_parallel_data,
void *return_address,
1479 parent_team->t.t_ident = loc;
1480 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1481 parent_team->t.t_argc = argc;
1482 argv = (
void **)parent_team->t.t_argv;
1483 for (i = argc - 1; i >= 0; --i) {
1484 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1487 if (parent_team == master_th->th.th_serial_team) {
1490 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1492 if (call_context == fork_context_gnu) {
1495 parent_team->t.t_serialized--;
1500 parent_team->t.t_pkfn = microtask;
1505 void **exit_frame_p;
1506 ompt_data_t *implicit_task_data;
1507 ompt_lw_taskteam_t lw_taskteam;
1509 if (ompt_enabled.enabled) {
1510 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1511 &ompt_parallel_data, return_address);
1512 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1514 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1518 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1519 if (ompt_enabled.ompt_callback_implicit_task) {
1520 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1521 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1522 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1523 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1527 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1529 exit_frame_p = &dummy;
1535 parent_team->t.t_serialized--;
1538 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1539 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1540 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1549 if (ompt_enabled.enabled) {
1550 *exit_frame_p = NULL;
1551 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1552 if (ompt_enabled.ompt_callback_implicit_task) {
1553 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1554 ompt_scope_end, NULL, implicit_task_data, 1,
1555 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1557 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1558 __ompt_lw_taskteam_unlink(master_th);
1559 if (ompt_enabled.ompt_callback_parallel_end) {
1560 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1561 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1562 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1564 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1570 parent_team->t.t_pkfn = microtask;
1571 parent_team->t.t_invoke = invoker;
1572 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1573 parent_team->t.t_active_level++;
1574 parent_team->t.t_level++;
1575 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1582 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1585 if (ompt_enabled.enabled) {
1586 ompt_lw_taskteam_t lw_taskteam;
1587 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1589 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1594 if (master_set_numthreads) {
1595 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1597 kmp_info_t **other_threads = parent_team->t.t_threads;
1600 int old_proc = master_th->th.th_teams_size.nth;
1601 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1602 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1603 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1605 parent_team->t.t_nproc = master_set_numthreads;
1606 for (i = 0; i < master_set_numthreads; ++i) {
1607 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1611 master_th->th.th_set_nproc = 0;
1615 if (__kmp_debugging) {
1616 int nth = __kmp_omp_num_threads(loc);
1618 master_set_numthreads = nth;
1624 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1626 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1627 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1628 proc_bind = proc_bind_false;
1631 if (proc_bind == proc_bind_default) {
1632 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1638 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1639 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1640 master_th->th.th_current_task->td_icvs.proc_bind)) {
1641 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1644 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1646 if (proc_bind_icv != proc_bind_default &&
1647 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1648 kmp_info_t **other_threads = parent_team->t.t_threads;
1649 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1650 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1654 master_th->th.th_set_proc_bind = proc_bind_default;
1656 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1657 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1659 __kmp_forkjoin_frames_mode == 3 &&
1660 parent_team->t.t_active_level == 1
1661 && master_th->th.th_teams_size.nteams == 1) {
1662 kmp_uint64 tmp_time = __itt_get_timestamp();
1663 master_th->th.th_frame_time = tmp_time;
1664 parent_team->t.t_region_time = tmp_time;
1666 if (__itt_stack_caller_create_ptr) {
1667 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1669 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1672 #if KMP_AFFINITY_SUPPORTED
1673 __kmp_partition_places(parent_team);
1676 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1677 "master_th=%p, gtid=%d\n",
1678 root, parent_team, master_th, gtid));
1679 __kmp_internal_fork(loc, gtid, parent_team);
1680 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1681 "master_th=%p, gtid=%d\n",
1682 root, parent_team, master_th, gtid));
1684 if (call_context == fork_context_gnu)
1688 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1689 parent_team->t.t_id, parent_team->t.t_pkfn));
1691 if (!parent_team->t.t_invoke(gtid)) {
1692 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1694 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1695 parent_team->t.t_id, parent_team->t.t_pkfn));
1698 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1705 __kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1706 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1707 kmp_info_t *master_th, kmp_team_t *parent_team,
1709 ompt_data_t *ompt_parallel_data,
void **return_address,
1710 ompt_data_t **parent_task_data,
1718 #if KMP_OS_LINUX && \
1719 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1722 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1727 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1732 master_th->th.th_serial_team->t.t_pkfn = microtask;
1735 if (call_context == fork_context_intel) {
1737 master_th->th.th_serial_team->t.t_ident = loc;
1740 master_th->th.th_serial_team->t.t_level--;
1745 void **exit_frame_p;
1746 ompt_task_info_t *task_info;
1747 ompt_lw_taskteam_t lw_taskteam;
1749 if (ompt_enabled.enabled) {
1750 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1751 ompt_parallel_data, *return_address);
1753 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1755 task_info = OMPT_CUR_TASK_INFO(master_th);
1756 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1757 if (ompt_enabled.ompt_callback_implicit_task) {
1758 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1759 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1760 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1761 &(task_info->task_data), 1,
1762 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1766 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1768 exit_frame_p = &dummy;
1773 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1774 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1775 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1784 if (ompt_enabled.enabled) {
1785 *exit_frame_p = NULL;
1786 if (ompt_enabled.ompt_callback_implicit_task) {
1787 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1788 ompt_scope_end, NULL, &(task_info->task_data), 1,
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1791 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1792 __ompt_lw_taskteam_unlink(master_th);
1793 if (ompt_enabled.ompt_callback_parallel_end) {
1794 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1795 ompt_parallel_data, *parent_task_data,
1796 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1798 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1801 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1802 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1803 team = master_th->th.th_team;
1805 team->t.t_invoke = invoker;
1806 __kmp_alloc_argv_entries(argc, team, TRUE);
1807 team->t.t_argc = argc;
1808 argv = (
void **)team->t.t_argv;
1809 for (i = argc - 1; i >= 0; --i)
1810 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1817 if (ompt_enabled.enabled) {
1818 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1819 if (ompt_enabled.ompt_callback_implicit_task) {
1820 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1821 ompt_scope_end, NULL, &(task_info->task_data), 0,
1822 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1824 if (ompt_enabled.ompt_callback_parallel_end) {
1825 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1826 ompt_parallel_data, *parent_task_data,
1827 OMPT_INVOKER(call_context) | ompt_parallel_league,
1830 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1835 for (i = argc - 1; i >= 0; --i)
1836 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1841 void **exit_frame_p;
1842 ompt_task_info_t *task_info;
1843 ompt_lw_taskteam_t lw_taskteam;
1844 ompt_data_t *implicit_task_data;
1846 if (ompt_enabled.enabled) {
1847 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1848 ompt_parallel_data, *return_address);
1849 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1851 task_info = OMPT_CUR_TASK_INFO(master_th);
1852 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1855 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1856 if (ompt_enabled.ompt_callback_implicit_task) {
1857 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1858 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1859 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1860 ompt_task_implicit);
1861 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1865 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1867 exit_frame_p = &dummy;
1872 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1873 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1874 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1883 if (ompt_enabled.enabled) {
1884 *exit_frame_p = NULL;
1885 if (ompt_enabled.ompt_callback_implicit_task) {
1886 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1887 ompt_scope_end, NULL, &(task_info->task_data), 1,
1888 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1891 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1892 __ompt_lw_taskteam_unlink(master_th);
1893 if (ompt_enabled.ompt_callback_parallel_end) {
1894 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1895 ompt_parallel_data, *parent_task_data,
1896 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1898 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1902 }
else if (call_context == fork_context_gnu) {
1904 if (ompt_enabled.enabled) {
1905 ompt_lw_taskteam_t lwt;
1906 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1909 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1910 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1916 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1919 KMP_ASSERT2(call_context < fork_context_last,
1920 "__kmp_serial_fork_call: unknown fork_context parameter");
1923 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1930 int __kmp_fork_call(
ident_t *loc,
int gtid,
1931 enum fork_context_e call_context,
1932 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1937 int master_this_cons;
1939 kmp_team_t *parent_team;
1940 kmp_info_t *master_th;
1944 int master_set_numthreads;
1945 int task_thread_limit = 0;
1949 #if KMP_NESTED_HOT_TEAMS
1950 kmp_hot_team_ptr_t **p_hot_teams;
1953 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1956 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1957 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1960 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1962 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1963 __kmp_stkpadding += (short)((kmp_int64)dummy);
1969 if (!TCR_4(__kmp_init_parallel))
1970 __kmp_parallel_initialize();
1971 __kmp_resume_if_soft_paused();
1976 master_th = __kmp_threads[gtid];
1978 parent_team = master_th->th.th_team;
1979 master_tid = master_th->th.th_info.ds.ds_tid;
1980 master_this_cons = master_th->th.th_local.this_construct;
1981 root = master_th->th.th_root;
1982 master_active = root->r.r_active;
1983 master_set_numthreads = master_th->th.th_set_nproc;
1985 master_th->th.th_current_task->td_icvs.task_thread_limit;
1988 ompt_data_t ompt_parallel_data = ompt_data_none;
1989 ompt_data_t *parent_task_data = NULL;
1990 ompt_frame_t *ompt_frame = NULL;
1991 void *return_address = NULL;
1993 if (ompt_enabled.enabled) {
1994 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1996 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
2001 __kmp_assign_root_init_mask();
2004 level = parent_team->t.t_level;
2006 active_level = parent_team->t.t_active_level;
2008 teams_level = master_th->th.th_teams_level;
2009 #if KMP_NESTED_HOT_TEAMS
2010 p_hot_teams = &master_th->th.th_hot_teams;
2011 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
2012 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
2013 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
2014 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
2016 (*p_hot_teams)[0].hot_team_nth = 1;
2021 if (ompt_enabled.enabled) {
2022 if (ompt_enabled.ompt_callback_parallel_begin) {
2023 int team_size = master_set_numthreads
2024 ? master_set_numthreads
2025 : get__nproc_2(parent_team, master_tid);
2026 int flags = OMPT_INVOKER(call_context) |
2027 ((microtask == (microtask_t)__kmp_teams_master)
2028 ? ompt_parallel_league
2029 : ompt_parallel_team);
2030 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
2031 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
2034 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2038 master_th->th.th_ident = loc;
2041 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
2042 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
2043 call_context, microtask, invoker,
2044 master_set_numthreads, level,
2046 ompt_parallel_data, return_address,
2055 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);
2059 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2060 if ((!enter_teams &&
2061 (parent_team->t.t_active_level >=
2062 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2063 (__kmp_library == library_serial)) {
2064 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2067 nthreads = master_set_numthreads
2068 ? master_set_numthreads
2070 : get__nproc_2(parent_team, master_tid);
2073 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2080 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2085 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2086 nthreads, enter_teams);
2087 if (nthreads == 1) {
2091 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2095 KMP_DEBUG_ASSERT(nthreads > 0);
2098 master_th->th.th_set_nproc = 0;
2100 if (nthreads == 1) {
2101 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2102 invoker, master_th, parent_team,
2104 &ompt_parallel_data, &return_address,
2112 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2113 "curtask=%p, curtask_max_aclevel=%d\n",
2114 parent_team->t.t_active_level, master_th,
2115 master_th->th.th_current_task,
2116 master_th->th.th_current_task->td_icvs.max_active_levels));
2120 master_th->th.th_current_task->td_flags.executing = 0;
2122 if (!master_th->th.th_teams_microtask || level > teams_level) {
2124 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2128 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2129 kmp_nested_nthreads_t *nested_nth = NULL;
2130 if (!master_th->th.th_set_nested_nth &&
2131 (level + 1 < parent_team->t.t_nested_nth->used) &&
2132 (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
2133 nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
2134 }
else if (master_th->th.th_set_nested_nth) {
2135 nested_nth = __kmp_override_nested_nth(master_th, level);
2136 if ((level + 1 < nested_nth->used) &&
2137 (nested_nth->nth[level + 1] != nthreads_icv))
2138 nthreads_icv = nested_nth->nth[level + 1];
2146 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2148 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2149 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2150 proc_bind = proc_bind_false;
2154 if (proc_bind == proc_bind_default) {
2155 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2158 if (master_th->th.th_teams_microtask &&
2159 microtask == (microtask_t)__kmp_teams_master) {
2160 proc_bind = __kmp_teams_proc_bind;
2166 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2167 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2168 master_th->th.th_current_task->td_icvs.proc_bind)) {
2171 if (!master_th->th.th_teams_microtask ||
2172 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2173 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2178 master_th->th.th_set_proc_bind = proc_bind_default;
2180 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2181 kmp_internal_control_t new_icvs;
2182 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2183 new_icvs.next = NULL;
2184 if (nthreads_icv > 0) {
2185 new_icvs.nproc = nthreads_icv;
2187 if (proc_bind_icv != proc_bind_default) {
2188 new_icvs.proc_bind = proc_bind_icv;
2192 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2193 team = __kmp_allocate_team(root, nthreads, nthreads,
2197 proc_bind, &new_icvs,
2198 argc USE_NESTED_HOT_ARG(master_th));
2199 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2200 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2203 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2204 team = __kmp_allocate_team(root, nthreads, nthreads,
2209 &master_th->th.th_current_task->td_icvs,
2210 argc USE_NESTED_HOT_ARG(master_th));
2211 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2212 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2213 &master_th->th.th_current_task->td_icvs);
2216 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2219 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2220 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2221 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2222 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2223 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2225 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2228 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2230 if (!master_th->th.th_teams_microtask || level > teams_level) {
2231 int new_level = parent_team->t.t_level + 1;
2232 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2233 new_level = parent_team->t.t_active_level + 1;
2234 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2237 int new_level = parent_team->t.t_level;
2238 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2239 new_level = parent_team->t.t_active_level;
2240 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2242 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2244 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2246 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2247 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2250 if (team->t.t_nested_nth &&
2251 team->t.t_nested_nth != parent_team->t.t_nested_nth) {
2252 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
2253 KMP_INTERNAL_FREE(team->t.t_nested_nth);
2254 team->t.t_nested_nth = NULL;
2256 team->t.t_nested_nth = parent_team->t.t_nested_nth;
2257 if (master_th->th.th_set_nested_nth) {
2259 nested_nth = __kmp_override_nested_nth(master_th, level);
2260 team->t.t_nested_nth = nested_nth;
2261 KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
2262 master_th->th.th_set_nested_nth = NULL;
2263 master_th->th.th_set_nested_nth_sz = 0;
2264 master_th->th.th_nt_strict =
false;
2268 propagateFPControl(team);
2270 if (ompd_state & OMPD_ENABLE_BP)
2271 ompd_bp_parallel_begin();
2276 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2277 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2279 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2280 (team->t.t_master_tid == 0 &&
2281 (team->t.t_parent == root->r.r_root_team ||
2282 team->t.t_parent->t.t_serialized)));
2286 argv = (
void **)team->t.t_argv;
2288 for (i = argc - 1; i >= 0; --i) {
2289 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2290 KMP_CHECK_UPDATE(*argv, new_argv);
2294 for (i = 0; i < argc; ++i) {
2296 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2301 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2302 if (!root->r.r_active)
2303 root->r.r_active = TRUE;
2305 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2306 __kmp_setup_icv_copy(team, nthreads,
2307 &master_th->th.th_current_task->td_icvs, loc);
2310 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2313 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2316 if (team->t.t_active_level == 1
2317 && !master_th->th.th_teams_microtask) {
2319 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2320 (__kmp_forkjoin_frames_mode == 3 ||
2321 __kmp_forkjoin_frames_mode == 1)) {
2322 kmp_uint64 tmp_time = 0;
2323 if (__itt_get_timestamp_ptr)
2324 tmp_time = __itt_get_timestamp();
2326 master_th->th.th_frame_time = tmp_time;
2327 if (__kmp_forkjoin_frames_mode == 3)
2328 team->t.t_region_time = tmp_time;
2332 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2333 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2335 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2341 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2344 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2345 root, team, master_th, gtid));
2348 if (__itt_stack_caller_create_ptr) {
2351 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2352 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2353 }
else if (parent_team->t.t_serialized) {
2358 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2359 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2367 __kmp_internal_fork(loc, gtid, team);
2368 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2369 "master_th=%p, gtid=%d\n",
2370 root, team, master_th, gtid));
2373 if (call_context == fork_context_gnu) {
2374 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2379 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2380 team->t.t_id, team->t.t_pkfn));
2383 #if KMP_STATS_ENABLED
2387 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2391 if (!team->t.t_invoke(gtid)) {
2392 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2395 #if KMP_STATS_ENABLED
2398 KMP_SET_THREAD_STATE(previous_state);
2402 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2403 team->t.t_id, team->t.t_pkfn));
2406 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2408 if (ompt_enabled.enabled) {
2409 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2417 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2420 thread->th.ompt_thread_info.state =
2421 ((team->t.t_serialized) ? ompt_state_work_serial
2422 : ompt_state_work_parallel);
2425 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2426 kmp_team_t *team, ompt_data_t *parallel_data,
2427 int flags,
void *codeptr) {
2428 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2429 if (ompt_enabled.ompt_callback_parallel_end) {
2430 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2431 parallel_data, &(task_info->task_data), flags, codeptr);
2434 task_info->frame.enter_frame = ompt_data_none;
2435 __kmp_join_restore_state(thread, team);
2439 void __kmp_join_call(
ident_t *loc,
int gtid
2442 enum fork_context_e fork_context
2446 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2448 kmp_team_t *parent_team;
2449 kmp_info_t *master_th;
2453 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2456 master_th = __kmp_threads[gtid];
2457 root = master_th->th.th_root;
2458 team = master_th->th.th_team;
2459 parent_team = team->t.t_parent;
2461 master_th->th.th_ident = loc;
2464 void *team_microtask = (
void *)team->t.t_pkfn;
2468 if (ompt_enabled.enabled &&
2469 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2470 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2475 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2476 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2477 "th_task_team = %p\n",
2478 __kmp_gtid_from_thread(master_th), team,
2479 team->t.t_task_team[master_th->th.th_task_state],
2480 master_th->th.th_task_team));
2481 KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);
2485 if (team->t.t_serialized) {
2486 if (master_th->th.th_teams_microtask) {
2488 int level = team->t.t_level;
2489 int tlevel = master_th->th.th_teams_level;
2490 if (level == tlevel) {
2494 }
else if (level == tlevel + 1) {
2498 team->t.t_serialized++;
2504 if (ompt_enabled.enabled) {
2505 if (fork_context == fork_context_gnu) {
2506 __ompt_lw_taskteam_unlink(master_th);
2508 __kmp_join_restore_state(master_th, parent_team);
2515 master_active = team->t.t_master_active;
2520 __kmp_internal_join(loc, gtid, team);
2522 if (__itt_stack_caller_create_ptr) {
2523 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2525 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2526 team->t.t_stack_id = NULL;
2530 master_th->th.th_task_state =
2533 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2534 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2538 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2539 parent_team->t.t_stack_id = NULL;
2547 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2548 void *codeptr = team->t.ompt_team_info.master_return_address;
2553 if (team->t.t_active_level == 1 &&
2554 (!master_th->th.th_teams_microtask ||
2555 master_th->th.th_teams_size.nteams == 1)) {
2556 master_th->th.th_ident = loc;
2559 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2560 __kmp_forkjoin_frames_mode == 3)
2561 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2562 master_th->th.th_frame_time, 0, loc,
2563 master_th->th.th_team_nproc, 1);
2564 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2565 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2566 __kmp_itt_region_joined(gtid);
2570 #if KMP_AFFINITY_SUPPORTED
2573 master_th->th.th_first_place = team->t.t_first_place;
2574 master_th->th.th_last_place = team->t.t_last_place;
2578 if (master_th->th.th_teams_microtask && !exit_teams &&
2579 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2580 team->t.t_level == master_th->th.th_teams_level + 1) {
2585 ompt_data_t ompt_parallel_data = ompt_data_none;
2586 if (ompt_enabled.enabled) {
2587 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2588 if (ompt_enabled.ompt_callback_implicit_task) {
2589 int ompt_team_size = team->t.t_nproc;
2590 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2591 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2592 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2594 task_info->frame.exit_frame = ompt_data_none;
2595 task_info->task_data = ompt_data_none;
2596 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2597 __ompt_lw_taskteam_unlink(master_th);
2602 team->t.t_active_level--;
2603 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2609 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2610 int old_num = master_th->th.th_team_nproc;
2611 int new_num = master_th->th.th_teams_size.nth;
2612 kmp_info_t **other_threads = team->t.t_threads;
2613 team->t.t_nproc = new_num;
2614 for (
int i = 0; i < old_num; ++i) {
2615 other_threads[i]->th.th_team_nproc = new_num;
2618 for (
int i = old_num; i < new_num; ++i) {
2620 KMP_DEBUG_ASSERT(other_threads[i]);
2621 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2622 for (
int b = 0; b < bs_last_barrier; ++b) {
2623 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2624 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2626 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2629 if (__kmp_tasking_mode != tskm_immediate_exec) {
2631 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2637 if (ompt_enabled.enabled) {
2638 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2639 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2647 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2648 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2650 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2655 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2657 if (!master_th->th.th_teams_microtask ||
2658 team->t.t_level > master_th->th.th_teams_level) {
2660 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2662 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2665 if (ompt_enabled.enabled) {
2666 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2667 if (ompt_enabled.ompt_callback_implicit_task) {
2668 int flags = (team_microtask == (
void *)__kmp_teams_master)
2670 : ompt_task_implicit;
2671 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2672 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2673 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2674 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2676 task_info->frame.exit_frame = ompt_data_none;
2677 task_info->task_data = ompt_data_none;
2681 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2683 __kmp_pop_current_task_from_thread(master_th);
2685 master_th->th.th_def_allocator = team->t.t_def_allocator;
2688 if (ompd_state & OMPD_ENABLE_BP)
2689 ompd_bp_parallel_end();
2691 updateHWFPControl(team);
2693 if (root->r.r_active != master_active)
2694 root->r.r_active = master_active;
2696 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2704 master_th->th.th_team = parent_team;
2705 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2706 master_th->th.th_team_master = parent_team->t.t_threads[0];
2707 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2710 if (parent_team->t.t_serialized &&
2711 parent_team != master_th->th.th_serial_team &&
2712 parent_team != root->r.r_root_team) {
2713 __kmp_free_team(root,
2714 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2715 master_th->th.th_serial_team = parent_team;
2718 if (__kmp_tasking_mode != tskm_immediate_exec) {
2720 KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||
2721 team->t.t_primary_task_state == 1);
2722 master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;
2725 master_th->th.th_task_team =
2726 parent_team->t.t_task_team[master_th->th.th_task_state];
2728 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2729 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2736 master_th->th.th_current_task->td_flags.executing = 1;
2738 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2740 #if KMP_AFFINITY_SUPPORTED
2741 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2742 __kmp_reset_root_init_mask(gtid);
2747 OMPT_INVOKER(fork_context) |
2748 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2749 : ompt_parallel_team);
2750 if (ompt_enabled.enabled) {
2751 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2757 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2762 void __kmp_save_internal_controls(kmp_info_t *thread) {
2764 if (thread->th.th_team != thread->th.th_serial_team) {
2767 if (thread->th.th_team->t.t_serialized > 1) {
2770 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2773 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2774 thread->th.th_team->t.t_serialized) {
2779 kmp_internal_control_t *control =
2780 (kmp_internal_control_t *)__kmp_allocate(
2781 sizeof(kmp_internal_control_t));
2783 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2785 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2787 control->next = thread->th.th_team->t.t_control_stack_top;
2788 thread->th.th_team->t.t_control_stack_top = control;
2794 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2798 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2799 KMP_DEBUG_ASSERT(__kmp_init_serial);
2803 else if (new_nth > __kmp_max_nth)
2804 new_nth = __kmp_max_nth;
2807 thread = __kmp_threads[gtid];
2808 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2811 __kmp_save_internal_controls(thread);
2813 set__nproc(thread, new_nth);
2818 root = thread->th.th_root;
2819 if (__kmp_init_parallel && (!root->r.r_active) &&
2820 (root->r.r_hot_team->t.t_nproc > new_nth)
2821 #
if KMP_NESTED_HOT_TEAMS
2822 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2825 kmp_team_t *hot_team = root->r.r_hot_team;
2828 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2830 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2831 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2834 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2835 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2836 if (__kmp_tasking_mode != tskm_immediate_exec) {
2839 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2841 __kmp_free_thread(hot_team->t.t_threads[f]);
2842 hot_team->t.t_threads[f] = NULL;
2844 hot_team->t.t_nproc = new_nth;
2845 #if KMP_NESTED_HOT_TEAMS
2846 if (thread->th.th_hot_teams) {
2847 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2848 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2852 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2853 hot_team->t.b->update_num_threads(new_nth);
2854 __kmp_add_threads_to_team(hot_team, new_nth);
2857 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2860 for (f = 0; f < new_nth; f++) {
2861 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2862 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2865 hot_team->t.t_size_changed = -1;
2870 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2873 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2875 gtid, max_active_levels));
2876 KMP_DEBUG_ASSERT(__kmp_init_serial);
2879 if (max_active_levels < 0) {
2880 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2885 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2886 "max_active_levels for thread %d = (%d)\n",
2887 gtid, max_active_levels));
2890 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2895 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2896 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2897 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2903 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2904 "max_active_levels for thread %d = (%d)\n",
2905 gtid, max_active_levels));
2907 thread = __kmp_threads[gtid];
2909 __kmp_save_internal_controls(thread);
2911 set__max_active_levels(thread, max_active_levels);
2915 int __kmp_get_max_active_levels(
int gtid) {
2918 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2919 KMP_DEBUG_ASSERT(__kmp_init_serial);
2921 thread = __kmp_threads[gtid];
2922 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2923 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2924 "curtask_maxaclevel=%d\n",
2925 gtid, thread->th.th_current_task,
2926 thread->th.th_current_task->td_icvs.max_active_levels));
2927 return thread->th.th_current_task->td_icvs.max_active_levels;
2931 void __kmp_set_num_teams(
int num_teams) {
2933 __kmp_nteams = num_teams;
2935 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2937 void __kmp_set_teams_thread_limit(
int limit) {
2939 __kmp_teams_thread_limit = limit;
2941 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2943 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2944 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2947 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2949 kmp_sched_t orig_kind;
2952 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2953 gtid, (
int)kind, chunk));
2954 KMP_DEBUG_ASSERT(__kmp_init_serial);
2961 kind = __kmp_sched_without_mods(kind);
2963 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2964 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2966 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2967 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2969 kind = kmp_sched_default;
2973 thread = __kmp_threads[gtid];
2975 __kmp_save_internal_controls(thread);
2977 if (kind < kmp_sched_upper_std) {
2978 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2981 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2983 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2984 __kmp_sch_map[kind - kmp_sched_lower - 1];
2989 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2990 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2991 kmp_sched_lower - 2];
2993 __kmp_sched_apply_mods_intkind(
2994 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2995 if (kind == kmp_sched_auto || chunk < 1) {
2997 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2999 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
3004 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
3008 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3009 KMP_DEBUG_ASSERT(__kmp_init_serial);
3011 thread = __kmp_threads[gtid];
3013 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3014 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3016 case kmp_sch_static_greedy:
3017 case kmp_sch_static_balanced:
3018 *kind = kmp_sched_static;
3019 __kmp_sched_apply_mods_stdkind(kind, th_type);
3022 case kmp_sch_static_chunked:
3023 *kind = kmp_sched_static;
3025 case kmp_sch_dynamic_chunked:
3026 *kind = kmp_sched_dynamic;
3029 case kmp_sch_guided_iterative_chunked:
3030 case kmp_sch_guided_analytical_chunked:
3031 *kind = kmp_sched_guided;
3034 *kind = kmp_sched_auto;
3036 case kmp_sch_trapezoidal:
3037 *kind = kmp_sched_trapezoidal;
3039 #if KMP_STATIC_STEAL_ENABLED
3040 case kmp_sch_static_steal:
3041 *kind = kmp_sched_static_steal;
3045 KMP_FATAL(UnknownSchedulingType, th_type);
3048 __kmp_sched_apply_mods_stdkind(kind, th_type);
3049 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3052 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3058 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3059 KMP_DEBUG_ASSERT(__kmp_init_serial);
3066 thr = __kmp_threads[gtid];
3067 team = thr->th.th_team;
3068 ii = team->t.t_level;
3072 if (thr->th.th_teams_microtask) {
3074 int tlevel = thr->th.th_teams_level;
3077 KMP_DEBUG_ASSERT(ii >= tlevel);
3089 return __kmp_tid_from_gtid(gtid);
3091 dd = team->t.t_serialized;
3093 while (ii > level) {
3094 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3096 if ((team->t.t_serialized) && (!dd)) {
3097 team = team->t.t_parent;
3101 team = team->t.t_parent;
3102 dd = team->t.t_serialized;
3107 return (dd > 1) ? (0) : (team->t.t_master_tid);
3110 int __kmp_get_team_size(
int gtid,
int level) {
3116 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3117 KMP_DEBUG_ASSERT(__kmp_init_serial);
3124 thr = __kmp_threads[gtid];
3125 team = thr->th.th_team;
3126 ii = team->t.t_level;
3130 if (thr->th.th_teams_microtask) {
3132 int tlevel = thr->th.th_teams_level;
3135 KMP_DEBUG_ASSERT(ii >= tlevel);
3146 while (ii > level) {
3147 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3149 if (team->t.t_serialized && (!dd)) {
3150 team = team->t.t_parent;
3154 team = team->t.t_parent;
3159 return team->t.t_nproc;
3162 kmp_r_sched_t __kmp_get_schedule_global() {
3167 kmp_r_sched_t r_sched;
3173 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3174 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3177 r_sched.r_sched_type = __kmp_static;
3180 r_sched.r_sched_type = __kmp_guided;
3182 r_sched.r_sched_type = __kmp_sched;
3184 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3186 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3188 r_sched.chunk = KMP_DEFAULT_CHUNK;
3190 r_sched.chunk = __kmp_chunk;
3198 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3200 KMP_DEBUG_ASSERT(team);
3201 if (!realloc || argc > team->t.t_max_argc) {
3203 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3204 "current entries=%d\n",
3205 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3207 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3208 __kmp_free((
void *)team->t.t_argv);
3210 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3212 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3213 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3215 team->t.t_id, team->t.t_max_argc));
3216 team->t.t_argv = &team->t.t_inline_argv[0];
3217 if (__kmp_storage_map) {
3218 __kmp_print_storage_map_gtid(
3219 -1, &team->t.t_inline_argv[0],
3220 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3221 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3226 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3227 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3229 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3231 team->t.t_id, team->t.t_max_argc));
3233 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3234 if (__kmp_storage_map) {
3235 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3236 &team->t.t_argv[team->t.t_max_argc],
3237 sizeof(
void *) * team->t.t_max_argc,
3238 "team_%d.t_argv", team->t.t_id);
3244 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3246 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3248 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3249 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3250 sizeof(dispatch_shared_info_t) * num_disp_buff);
3251 team->t.t_dispatch =
3252 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3253 team->t.t_implicit_task_taskdata =
3254 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3255 team->t.t_max_nproc = max_nth;
3258 for (i = 0; i < num_disp_buff; ++i) {
3259 team->t.t_disp_buffer[i].buffer_index = i;
3260 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3264 static void __kmp_free_team_arrays(kmp_team_t *team) {
3267 for (i = 0; i < team->t.t_max_nproc; ++i) {
3268 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3269 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3270 team->t.t_dispatch[i].th_disp_buffer = NULL;
3273 #if KMP_USE_HIER_SCHED
3274 __kmp_dispatch_free_hierarchies(team);
3276 __kmp_free(team->t.t_threads);
3277 __kmp_free(team->t.t_disp_buffer);
3278 __kmp_free(team->t.t_dispatch);
3279 __kmp_free(team->t.t_implicit_task_taskdata);
3280 team->t.t_threads = NULL;
3281 team->t.t_disp_buffer = NULL;
3282 team->t.t_dispatch = NULL;
3283 team->t.t_implicit_task_taskdata = 0;
3286 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3287 kmp_info_t **oldThreads = team->t.t_threads;
3289 __kmp_free(team->t.t_disp_buffer);
3290 __kmp_free(team->t.t_dispatch);
3291 __kmp_free(team->t.t_implicit_task_taskdata);
3292 __kmp_allocate_team_arrays(team, max_nth);
3294 KMP_MEMCPY(team->t.t_threads, oldThreads,
3295 team->t.t_nproc *
sizeof(kmp_info_t *));
3297 __kmp_free(oldThreads);
3300 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3302 kmp_r_sched_t r_sched =
3303 __kmp_get_schedule_global();
3305 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3307 kmp_internal_control_t g_icvs = {
3309 (kmp_int8)__kmp_global.g.g_dynamic,
3311 (kmp_int8)__kmp_env_blocktime,
3313 __kmp_dflt_blocktime,
3318 __kmp_dflt_team_nth,
3324 __kmp_dflt_max_active_levels,
3328 __kmp_nested_proc_bind.bind_types[0],
3329 __kmp_default_device,
3336 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3338 kmp_internal_control_t gx_icvs;
3339 gx_icvs.serial_nesting_level =
3341 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3342 gx_icvs.next = NULL;
3347 static void __kmp_initialize_root(kmp_root_t *root) {
3349 kmp_team_t *root_team;
3350 kmp_team_t *hot_team;
3351 int hot_team_max_nth;
3352 kmp_r_sched_t r_sched =
3353 __kmp_get_schedule_global();
3354 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3355 KMP_DEBUG_ASSERT(root);
3356 KMP_ASSERT(!root->r.r_begin);
3359 __kmp_init_lock(&root->r.r_begin_lock);
3360 root->r.r_begin = FALSE;
3361 root->r.r_active = FALSE;
3362 root->r.r_in_parallel = 0;
3363 root->r.r_blocktime = __kmp_dflt_blocktime;
3364 #if KMP_AFFINITY_SUPPORTED
3365 root->r.r_affinity_assigned = FALSE;
3370 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3373 __kmp_allocate_team(root,
3379 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3381 USE_NESTED_HOT_ARG(NULL)
3386 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3389 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3391 root->r.r_root_team = root_team;
3392 root_team->t.t_control_stack_top = NULL;
3395 root_team->t.t_threads[0] = NULL;
3396 root_team->t.t_nproc = 1;
3397 root_team->t.t_serialized = 1;
3399 root_team->t.t_sched.sched = r_sched.sched;
3400 root_team->t.t_nested_nth = &__kmp_nested_nth;
3403 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3404 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3408 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3411 __kmp_allocate_team(root,
3413 __kmp_dflt_team_nth_ub * 2,
3417 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3419 USE_NESTED_HOT_ARG(NULL)
3421 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3423 root->r.r_hot_team = hot_team;
3424 root_team->t.t_control_stack_top = NULL;
3427 hot_team->t.t_parent = root_team;
3430 hot_team_max_nth = hot_team->t.t_max_nproc;
3431 for (f = 0; f < hot_team_max_nth; ++f) {
3432 hot_team->t.t_threads[f] = NULL;
3434 hot_team->t.t_nproc = 1;
3436 hot_team->t.t_sched.sched = r_sched.sched;
3437 hot_team->t.t_size_changed = 0;
3438 hot_team->t.t_nested_nth = &__kmp_nested_nth;
3443 typedef struct kmp_team_list_item {
3444 kmp_team_p
const *entry;
3445 struct kmp_team_list_item *next;
3446 } kmp_team_list_item_t;
3447 typedef kmp_team_list_item_t *kmp_team_list_t;
3449 static void __kmp_print_structure_team_accum(
3450 kmp_team_list_t list,
3451 kmp_team_p
const *team
3461 KMP_DEBUG_ASSERT(list != NULL);
3466 __kmp_print_structure_team_accum(list, team->t.t_parent);
3467 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3471 while (l->next != NULL && l->entry != team) {
3474 if (l->next != NULL) {
3480 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3486 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3487 sizeof(kmp_team_list_item_t));
3494 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3497 __kmp_printf(
"%s", title);
3499 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3501 __kmp_printf(
" - (nil)\n");
3505 static void __kmp_print_structure_thread(
char const *title,
3506 kmp_info_p
const *thread) {
3507 __kmp_printf(
"%s", title);
3508 if (thread != NULL) {
3509 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3511 __kmp_printf(
" - (nil)\n");
3515 void __kmp_print_structure(
void) {
3517 kmp_team_list_t list;
3521 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3525 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3526 "Table\n------------------------------\n");
3529 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3530 __kmp_printf(
"%2d", gtid);
3531 if (__kmp_threads != NULL) {
3532 __kmp_printf(
" %p", __kmp_threads[gtid]);
3534 if (__kmp_root != NULL) {
3535 __kmp_printf(
" %p", __kmp_root[gtid]);
3542 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3544 if (__kmp_threads != NULL) {
3546 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3547 kmp_info_t
const *thread = __kmp_threads[gtid];
3548 if (thread != NULL) {
3549 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3550 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3551 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3552 __kmp_print_structure_team(
" Serial Team: ",
3553 thread->th.th_serial_team);
3554 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3555 __kmp_print_structure_thread(
" Primary: ",
3556 thread->th.th_team_master);
3557 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3558 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3559 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3560 __kmp_print_structure_thread(
" Next in pool: ",
3561 thread->th.th_next_pool);
3563 __kmp_print_structure_team_accum(list, thread->th.th_team);
3564 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3568 __kmp_printf(
"Threads array is not allocated.\n");
3572 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3574 if (__kmp_root != NULL) {
3576 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3577 kmp_root_t
const *root = __kmp_root[gtid];
3579 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3580 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3581 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3582 __kmp_print_structure_thread(
" Uber Thread: ",
3583 root->r.r_uber_thread);
3584 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3585 __kmp_printf(
" In Parallel: %2d\n",
3586 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3588 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3589 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3593 __kmp_printf(
"Ubers array is not allocated.\n");
3596 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3598 while (list->next != NULL) {
3599 kmp_team_p
const *team = list->entry;
3601 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3602 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3603 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3604 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3605 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3606 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3607 for (i = 0; i < team->t.t_nproc; ++i) {
3608 __kmp_printf(
" Thread %2d: ", i);
3609 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3611 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3617 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3619 __kmp_print_structure_thread(
"Thread pool: ",
3620 CCAST(kmp_info_t *, __kmp_thread_pool));
3621 __kmp_print_structure_team(
"Team pool: ",
3622 CCAST(kmp_team_t *, __kmp_team_pool));
3626 while (list != NULL) {
3627 kmp_team_list_item_t *item = list;
3629 KMP_INTERNAL_FREE(item);
3638 static const unsigned __kmp_primes[] = {
3639 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3640 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3641 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3642 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3643 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3644 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3645 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3646 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3647 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3648 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3649 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3653 unsigned short __kmp_get_random(kmp_info_t *thread) {
3654 unsigned x = thread->th.th_x;
3655 unsigned short r = (
unsigned short)(x >> 16);
3657 thread->th.th_x = x * thread->th.th_a + 1;
3659 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3660 thread->th.th_info.ds.ds_tid, r));
3666 void __kmp_init_random(kmp_info_t *thread) {
3667 unsigned seed = thread->th.th_info.ds.ds_tid;
3670 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3671 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3673 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3679 static int __kmp_reclaim_dead_roots(
void) {
3682 for (i = 0; i < __kmp_threads_capacity; ++i) {
3683 if (KMP_UBER_GTID(i) &&
3684 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3687 r += __kmp_unregister_root_other_thread(i);
3712 static int __kmp_expand_threads(
int nNeed) {
3714 int minimumRequiredCapacity;
3716 kmp_info_t **newThreads;
3717 kmp_root_t **newRoot;
3723 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3726 added = __kmp_reclaim_dead_roots();
3755 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3758 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3762 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3764 newCapacity = __kmp_threads_capacity;
3766 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3767 : __kmp_sys_max_nth;
3768 }
while (newCapacity < minimumRequiredCapacity);
3769 newThreads = (kmp_info_t **)__kmp_allocate(
3770 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3772 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3773 KMP_MEMCPY(newThreads, __kmp_threads,
3774 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3775 KMP_MEMCPY(newRoot, __kmp_root,
3776 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3779 kmp_old_threads_list_t *node =
3780 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3781 node->threads = __kmp_threads;
3782 node->next = __kmp_old_threads_list;
3783 __kmp_old_threads_list = node;
3785 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3786 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3787 added += newCapacity - __kmp_threads_capacity;
3788 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3790 if (newCapacity > __kmp_tp_capacity) {
3791 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3792 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3793 __kmp_threadprivate_resize_cache(newCapacity);
3795 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3797 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3806 int __kmp_register_root(
int initial_thread) {
3807 kmp_info_t *root_thread;
3811 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3812 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3829 capacity = __kmp_threads_capacity;
3830 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3837 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3838 capacity -= __kmp_hidden_helper_threads_num;
3842 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3843 if (__kmp_tp_cached) {
3844 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3845 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3846 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3848 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3858 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3861 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3862 gtid <= __kmp_hidden_helper_threads_num;
3865 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3866 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3867 "hidden helper thread: T#%d\n",
3873 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3876 for (gtid = __kmp_hidden_helper_threads_num + 1;
3877 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3881 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3882 KMP_ASSERT(gtid < __kmp_threads_capacity);
3887 TCW_4(__kmp_nth, __kmp_nth + 1);
3891 if (__kmp_adjust_gtid_mode) {
3892 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3893 if (TCR_4(__kmp_gtid_mode) != 2) {
3894 TCW_4(__kmp_gtid_mode, 2);
3897 if (TCR_4(__kmp_gtid_mode) != 1) {
3898 TCW_4(__kmp_gtid_mode, 1);
3903 #ifdef KMP_ADJUST_BLOCKTIME
3906 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3907 if (__kmp_nth > __kmp_avail_proc) {
3908 __kmp_zero_bt = TRUE;
3914 if (!(root = __kmp_root[gtid])) {
3915 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3916 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3919 #if KMP_STATS_ENABLED
3921 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3922 __kmp_stats_thread_ptr->startLife();
3923 KMP_SET_THREAD_STATE(SERIAL_REGION);
3926 __kmp_initialize_root(root);
3929 if (root->r.r_uber_thread) {
3930 root_thread = root->r.r_uber_thread;
3932 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3933 if (__kmp_storage_map) {
3934 __kmp_print_thread_storage_map(root_thread, gtid);
3936 root_thread->th.th_info.ds.ds_gtid = gtid;
3938 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3940 root_thread->th.th_root = root;
3941 if (__kmp_env_consistency_check) {
3942 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3945 __kmp_initialize_fast_memory(root_thread);
3949 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3950 __kmp_initialize_bget(root_thread);
3952 __kmp_init_random(root_thread);
3956 if (!root_thread->th.th_serial_team) {
3957 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3958 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3959 root_thread->th.th_serial_team = __kmp_allocate_team(
3964 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3966 KMP_ASSERT(root_thread->th.th_serial_team);
3967 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3968 root_thread->th.th_serial_team));
3971 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3973 root->r.r_root_team->t.t_threads[0] = root_thread;
3974 root->r.r_hot_team->t.t_threads[0] = root_thread;
3975 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3977 root_thread->th.th_serial_team->t.t_serialized = 0;
3978 root->r.r_uber_thread = root_thread;
3981 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3982 TCW_4(__kmp_init_gtid, TRUE);
3985 __kmp_gtid_set_specific(gtid);
3988 __kmp_itt_thread_name(gtid);
3991 #ifdef KMP_TDATA_GTID
3994 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3995 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3997 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3999 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
4000 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
4001 KMP_INIT_BARRIER_STATE));
4004 for (b = 0; b < bs_last_barrier; ++b) {
4005 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4007 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4011 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4012 KMP_INIT_BARRIER_STATE);
4014 #if KMP_AFFINITY_SUPPORTED
4015 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4016 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4017 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4018 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4020 root_thread->th.th_def_allocator = __kmp_def_allocator;
4021 root_thread->th.th_prev_level = 0;
4022 root_thread->th.th_prev_num_threads = 1;
4024 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4025 tmp->cg_root = root_thread;
4026 tmp->cg_thread_limit = __kmp_cg_max_nth;
4027 tmp->cg_nthreads = 1;
4028 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4029 " cg_nthreads init to 1\n",
4032 root_thread->th.th_cg_roots = tmp;
4034 __kmp_root_counter++;
4037 if (ompt_enabled.enabled) {
4039 kmp_info_t *root_thread = ompt_get_thread();
4041 ompt_set_thread_state(root_thread, ompt_state_overhead);
4043 if (ompt_enabled.ompt_callback_thread_begin) {
4044 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4045 ompt_thread_initial, __ompt_get_thread_data_internal());
4047 ompt_data_t *task_data;
4048 ompt_data_t *parallel_data;
4049 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4051 if (ompt_enabled.ompt_callback_implicit_task) {
4052 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4053 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4056 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4060 if (ompd_state & OMPD_ENABLE_BP)
4061 ompd_bp_thread_begin();
4065 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4070 #if KMP_NESTED_HOT_TEAMS
4071 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4072 const int max_level) {
4074 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4075 if (!hot_teams || !hot_teams[level].hot_team) {
4078 KMP_DEBUG_ASSERT(level < max_level);
4079 kmp_team_t *team = hot_teams[level].hot_team;
4080 nth = hot_teams[level].hot_team_nth;
4082 if (level < max_level - 1) {
4083 for (i = 0; i < nth; ++i) {
4084 kmp_info_t *th = team->t.t_threads[i];
4085 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4086 if (i > 0 && th->th.th_hot_teams) {
4087 __kmp_free(th->th.th_hot_teams);
4088 th->th.th_hot_teams = NULL;
4092 __kmp_free_team(root, team, NULL);
4099 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4100 kmp_team_t *root_team = root->r.r_root_team;
4101 kmp_team_t *hot_team = root->r.r_hot_team;
4102 int n = hot_team->t.t_nproc;
4105 KMP_DEBUG_ASSERT(!root->r.r_active);
4107 root->r.r_root_team = NULL;
4108 root->r.r_hot_team = NULL;
4111 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4112 #if KMP_NESTED_HOT_TEAMS
4113 if (__kmp_hot_teams_max_level >
4115 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4116 kmp_info_t *th = hot_team->t.t_threads[i];
4117 if (__kmp_hot_teams_max_level > 1) {
4118 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4120 if (th->th.th_hot_teams) {
4121 __kmp_free(th->th.th_hot_teams);
4122 th->th.th_hot_teams = NULL;
4127 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4132 if (__kmp_tasking_mode != tskm_immediate_exec) {
4133 __kmp_wait_to_unref_task_teams();
4139 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4141 (LPVOID) & (root->r.r_uber_thread->th),
4142 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4143 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4147 if (ompd_state & OMPD_ENABLE_BP)
4148 ompd_bp_thread_end();
4152 ompt_data_t *task_data;
4153 ompt_data_t *parallel_data;
4154 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4156 if (ompt_enabled.ompt_callback_implicit_task) {
4157 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4158 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4160 if (ompt_enabled.ompt_callback_thread_end) {
4161 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4162 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4168 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4169 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4171 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4172 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4175 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4176 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4177 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4178 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4179 root->r.r_uber_thread->th.th_cg_roots = NULL;
4181 __kmp_reap_thread(root->r.r_uber_thread, 1);
4185 root->r.r_uber_thread = NULL;
4187 root->r.r_begin = FALSE;
4192 void __kmp_unregister_root_current_thread(
int gtid) {
4193 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4197 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4198 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4199 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4202 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4205 kmp_root_t *root = __kmp_root[gtid];
4207 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4208 KMP_ASSERT(KMP_UBER_GTID(gtid));
4209 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4210 KMP_ASSERT(root->r.r_active == FALSE);
4214 kmp_info_t *thread = __kmp_threads[gtid];
4215 kmp_team_t *team = thread->th.th_team;
4216 kmp_task_team_t *task_team = thread->th.th_task_team;
4219 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4220 task_team->tt.tt_hidden_helper_task_encountered)) {
4223 thread->th.ompt_thread_info.state = ompt_state_undefined;
4225 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4228 __kmp_reset_root(gtid, root);
4232 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4234 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4241 static int __kmp_unregister_root_other_thread(
int gtid) {
4242 kmp_root_t *root = __kmp_root[gtid];
4245 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4246 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4247 KMP_ASSERT(KMP_UBER_GTID(gtid));
4248 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4249 KMP_ASSERT(root->r.r_active == FALSE);
4251 r = __kmp_reset_root(gtid, root);
4253 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4259 void __kmp_task_info() {
4261 kmp_int32 gtid = __kmp_entry_gtid();
4262 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4263 kmp_info_t *this_thr = __kmp_threads[gtid];
4264 kmp_team_t *steam = this_thr->th.th_serial_team;
4265 kmp_team_t *team = this_thr->th.th_team;
4268 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4270 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4271 team->t.t_implicit_task_taskdata[tid].td_parent);
4278 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4279 int tid,
int gtid) {
4283 KMP_DEBUG_ASSERT(this_thr != NULL);
4284 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4285 KMP_DEBUG_ASSERT(team);
4286 KMP_DEBUG_ASSERT(team->t.t_threads);
4287 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4288 kmp_info_t *master = team->t.t_threads[0];
4289 KMP_DEBUG_ASSERT(master);
4290 KMP_DEBUG_ASSERT(master->th.th_root);
4294 TCW_SYNC_PTR(this_thr->th.th_team, team);
4296 this_thr->th.th_info.ds.ds_tid = tid;
4297 this_thr->th.th_set_nproc = 0;
4298 if (__kmp_tasking_mode != tskm_immediate_exec)
4301 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4303 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4304 this_thr->th.th_set_proc_bind = proc_bind_default;
4306 #if KMP_AFFINITY_SUPPORTED
4307 this_thr->th.th_new_place = this_thr->th.th_current_place;
4309 this_thr->th.th_root = master->th.th_root;
4312 this_thr->th.th_team_nproc = team->t.t_nproc;
4313 this_thr->th.th_team_master = master;
4314 this_thr->th.th_team_serialized = team->t.t_serialized;
4316 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4318 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4319 tid, gtid, this_thr, this_thr->th.th_current_task));
4321 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4324 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4325 tid, gtid, this_thr, this_thr->th.th_current_task));
4330 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4332 this_thr->th.th_local.this_construct = 0;
4334 if (!this_thr->th.th_pri_common) {
4335 this_thr->th.th_pri_common =
4336 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4337 if (__kmp_storage_map) {
4338 __kmp_print_storage_map_gtid(
4339 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4340 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4342 this_thr->th.th_pri_head = NULL;
4345 if (this_thr != master &&
4346 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4348 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4349 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4352 int i = tmp->cg_nthreads--;
4353 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4354 " on node %p of thread %p to %d\n",
4355 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4360 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4362 this_thr->th.th_cg_roots->cg_nthreads++;
4363 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4364 " node %p of thread %p to %d\n",
4365 this_thr, this_thr->th.th_cg_roots,
4366 this_thr->th.th_cg_roots->cg_root,
4367 this_thr->th.th_cg_roots->cg_nthreads));
4368 this_thr->th.th_current_task->td_icvs.thread_limit =
4369 this_thr->th.th_cg_roots->cg_thread_limit;
4374 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4377 sizeof(dispatch_private_info_t) *
4378 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4379 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4380 team->t.t_max_nproc));
4381 KMP_ASSERT(dispatch);
4382 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4383 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4385 dispatch->th_disp_index = 0;
4386 dispatch->th_doacross_buf_idx = 0;
4387 if (!dispatch->th_disp_buffer) {
4388 dispatch->th_disp_buffer =
4389 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4391 if (__kmp_storage_map) {
4392 __kmp_print_storage_map_gtid(
4393 gtid, &dispatch->th_disp_buffer[0],
4394 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4396 : __kmp_dispatch_num_buffers],
4398 "th_%d.th_dispatch.th_disp_buffer "
4399 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4400 gtid, team->t.t_id, gtid);
4403 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4406 dispatch->th_dispatch_pr_current = 0;
4407 dispatch->th_dispatch_sh_current = 0;
4409 dispatch->th_deo_fcn = 0;
4410 dispatch->th_dxo_fcn = 0;
4413 this_thr->th.th_next_pool = NULL;
4415 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4416 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4426 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4428 kmp_team_t *serial_team;
4429 kmp_info_t *new_thr;
4432 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4433 KMP_DEBUG_ASSERT(root && team);
4434 #if !KMP_NESTED_HOT_TEAMS
4435 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4442 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4443 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4444 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4445 if (new_thr == __kmp_thread_pool_insert_pt) {
4446 __kmp_thread_pool_insert_pt = NULL;
4448 TCW_4(new_thr->th.th_in_pool, FALSE);
4449 __kmp_suspend_initialize_thread(new_thr);
4450 __kmp_lock_suspend_mx(new_thr);
4451 if (new_thr->th.th_active_in_pool == TRUE) {
4452 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4453 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4454 new_thr->th.th_active_in_pool = FALSE;
4456 __kmp_unlock_suspend_mx(new_thr);
4458 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4459 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4460 KMP_ASSERT(!new_thr->th.th_team);
4461 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4464 __kmp_initialize_info(new_thr, team, new_tid,
4465 new_thr->th.th_info.ds.ds_gtid);
4466 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4468 TCW_4(__kmp_nth, __kmp_nth + 1);
4470 new_thr->th.th_task_state = 0;
4472 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4474 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4478 #ifdef KMP_ADJUST_BLOCKTIME
4481 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4482 if (__kmp_nth > __kmp_avail_proc) {
4483 __kmp_zero_bt = TRUE;
4492 kmp_balign_t *balign = new_thr->th.th_bar;
4493 for (b = 0; b < bs_last_barrier; ++b)
4494 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4497 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4498 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4505 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4506 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4511 if (!TCR_4(__kmp_init_monitor)) {
4512 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4513 if (!TCR_4(__kmp_init_monitor)) {
4514 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4515 TCW_4(__kmp_init_monitor, 1);
4516 __kmp_create_monitor(&__kmp_monitor);
4517 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4528 while (TCR_4(__kmp_init_monitor) < 2) {
4531 KF_TRACE(10, (
"after monitor thread has started\n"));
4534 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4541 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4543 : __kmp_hidden_helper_threads_num + 1;
4545 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4547 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4550 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4551 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4556 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4558 new_thr->th.th_nt_strict =
false;
4559 new_thr->th.th_nt_loc = NULL;
4560 new_thr->th.th_nt_sev = severity_fatal;
4561 new_thr->th.th_nt_msg = NULL;
4563 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4565 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4568 __itt_suppress_mark_range(
4569 __itt_suppress_range, __itt_suppress_threading_errors,
4570 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4571 __itt_suppress_mark_range(
4572 __itt_suppress_range, __itt_suppress_threading_errors,
4573 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4575 __itt_suppress_mark_range(
4576 __itt_suppress_range, __itt_suppress_threading_errors,
4577 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4579 __itt_suppress_mark_range(__itt_suppress_range,
4580 __itt_suppress_threading_errors,
4581 &new_thr->th.th_suspend_init_count,
4582 sizeof(new_thr->th.th_suspend_init_count));
4585 __itt_suppress_mark_range(__itt_suppress_range,
4586 __itt_suppress_threading_errors,
4587 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4588 sizeof(new_thr->th.th_bar[0].bb.b_go));
4589 __itt_suppress_mark_range(__itt_suppress_range,
4590 __itt_suppress_threading_errors,
4591 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4592 sizeof(new_thr->th.th_bar[1].bb.b_go));
4593 __itt_suppress_mark_range(__itt_suppress_range,
4594 __itt_suppress_threading_errors,
4595 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4596 sizeof(new_thr->th.th_bar[2].bb.b_go));
4598 if (__kmp_storage_map) {
4599 __kmp_print_thread_storage_map(new_thr, new_gtid);
4604 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4605 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4606 new_thr->th.th_serial_team = serial_team =
4607 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4611 proc_bind_default, &r_icvs,
4612 0 USE_NESTED_HOT_ARG(NULL));
4614 KMP_ASSERT(serial_team);
4615 serial_team->t.t_serialized = 0;
4617 serial_team->t.t_threads[0] = new_thr;
4619 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4623 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4626 __kmp_initialize_fast_memory(new_thr);
4630 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4631 __kmp_initialize_bget(new_thr);
4634 __kmp_init_random(new_thr);
4638 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4639 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4642 kmp_balign_t *balign = new_thr->th.th_bar;
4643 for (b = 0; b < bs_last_barrier; ++b) {
4644 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4645 balign[b].bb.team = NULL;
4646 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4647 balign[b].bb.use_oncore_barrier = 0;
4650 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4651 new_thr->th.th_sleep_loc_type = flag_unset;
4653 new_thr->th.th_spin_here = FALSE;
4654 new_thr->th.th_next_waiting = 0;
4656 new_thr->th.th_blocking =
false;
4659 #if KMP_AFFINITY_SUPPORTED
4660 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4661 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4662 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4663 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4665 new_thr->th.th_def_allocator = __kmp_def_allocator;
4666 new_thr->th.th_prev_level = 0;
4667 new_thr->th.th_prev_num_threads = 1;
4669 TCW_4(new_thr->th.th_in_pool, FALSE);
4670 new_thr->th.th_active_in_pool = FALSE;
4671 TCW_4(new_thr->th.th_active, TRUE);
4673 new_thr->th.th_set_nested_nth = NULL;
4674 new_thr->th.th_set_nested_nth_sz = 0;
4682 if (__kmp_adjust_gtid_mode) {
4683 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4684 if (TCR_4(__kmp_gtid_mode) != 2) {
4685 TCW_4(__kmp_gtid_mode, 2);
4688 if (TCR_4(__kmp_gtid_mode) != 1) {
4689 TCW_4(__kmp_gtid_mode, 1);
4694 #ifdef KMP_ADJUST_BLOCKTIME
4697 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4698 if (__kmp_nth > __kmp_avail_proc) {
4699 __kmp_zero_bt = TRUE;
4704 #if KMP_AFFINITY_SUPPORTED
4706 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4711 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4712 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4714 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4716 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4727 static void __kmp_reinitialize_team(kmp_team_t *team,
4728 kmp_internal_control_t *new_icvs,
4730 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4731 team->t.t_threads[0], team));
4732 KMP_DEBUG_ASSERT(team && new_icvs);
4733 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4734 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4736 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4738 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4739 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4741 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4742 team->t.t_threads[0], team));
4748 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4749 kmp_internal_control_t *new_icvs,
4751 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4754 KMP_DEBUG_ASSERT(team);
4755 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4756 KMP_DEBUG_ASSERT(team->t.t_threads);
4759 team->t.t_master_tid = 0;
4761 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4762 team->t.t_nproc = new_nproc;
4765 team->t.t_next_pool = NULL;
4769 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4770 team->t.t_invoke = NULL;
4773 team->t.t_sched.sched = new_icvs->sched.sched;
4775 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4776 team->t.t_fp_control_saved = FALSE;
4777 team->t.t_x87_fpu_control_word = 0;
4778 team->t.t_mxcsr = 0;
4781 team->t.t_construct = 0;
4783 team->t.t_ordered.dt.t_value = 0;
4784 team->t.t_master_active = FALSE;
4787 team->t.t_copypriv_data = NULL;
4790 team->t.t_copyin_counter = 0;
4793 team->t.t_control_stack_top = NULL;
4795 __kmp_reinitialize_team(team, new_icvs, loc);
4798 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4801 #if KMP_AFFINITY_SUPPORTED
4802 static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4803 int first,
int last,
int newp) {
4804 th->th.th_first_place = first;
4805 th->th.th_last_place = last;
4806 th->th.th_new_place = newp;
4807 if (newp != th->th.th_current_place) {
4808 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4809 team->t.t_display_affinity = 1;
4811 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4812 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4820 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4822 if (KMP_HIDDEN_HELPER_TEAM(team))
4825 kmp_info_t *master_th = team->t.t_threads[0];
4826 KMP_DEBUG_ASSERT(master_th != NULL);
4827 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4828 int first_place = master_th->th.th_first_place;
4829 int last_place = master_th->th.th_last_place;
4830 int masters_place = master_th->th.th_current_place;
4831 int num_masks = __kmp_affinity.num_masks;
4832 team->t.t_first_place = first_place;
4833 team->t.t_last_place = last_place;
4835 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4836 "bound to place %d partition = [%d,%d]\n",
4837 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4838 team->t.t_id, masters_place, first_place, last_place));
4840 switch (proc_bind) {
4842 case proc_bind_default:
4845 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4848 case proc_bind_primary: {
4850 int n_th = team->t.t_nproc;
4851 for (f = 1; f < n_th; f++) {
4852 kmp_info_t *th = team->t.t_threads[f];
4853 KMP_DEBUG_ASSERT(th != NULL);
4854 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4856 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4857 "partition = [%d,%d]\n",
4858 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4859 f, masters_place, first_place, last_place));
4863 case proc_bind_close: {
4865 int n_th = team->t.t_nproc;
4867 if (first_place <= last_place) {
4868 n_places = last_place - first_place + 1;
4870 n_places = num_masks - first_place + last_place + 1;
4872 if (n_th <= n_places) {
4873 int place = masters_place;
4874 for (f = 1; f < n_th; f++) {
4875 kmp_info_t *th = team->t.t_threads[f];
4876 KMP_DEBUG_ASSERT(th != NULL);
4878 if (place == last_place) {
4879 place = first_place;
4880 }
else if (place == (num_masks - 1)) {
4885 __kmp_set_thread_place(team, th, first_place, last_place, place);
4887 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4888 "partition = [%d,%d]\n",
4889 __kmp_gtid_from_thread(team->t.t_threads[f]),
4890 team->t.t_id, f, place, first_place, last_place));
4893 int S, rem, gap, s_count;
4894 S = n_th / n_places;
4896 rem = n_th - (S * n_places);
4897 gap = rem > 0 ? n_places / rem : n_places;
4898 int place = masters_place;
4900 for (f = 0; f < n_th; f++) {
4901 kmp_info_t *th = team->t.t_threads[f];
4902 KMP_DEBUG_ASSERT(th != NULL);
4904 __kmp_set_thread_place(team, th, first_place, last_place, place);
4907 if ((s_count == S) && rem && (gap_ct == gap)) {
4909 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4911 if (place == last_place) {
4912 place = first_place;
4913 }
else if (place == (num_masks - 1)) {
4921 }
else if (s_count == S) {
4922 if (place == last_place) {
4923 place = first_place;
4924 }
else if (place == (num_masks - 1)) {
4934 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4935 "partition = [%d,%d]\n",
4936 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4937 th->th.th_new_place, first_place, last_place));
4939 KMP_DEBUG_ASSERT(place == masters_place);
4943 case proc_bind_spread: {
4945 int n_th = team->t.t_nproc;
4948 if (first_place <= last_place) {
4949 n_places = last_place - first_place + 1;
4951 n_places = num_masks - first_place + last_place + 1;
4953 if (n_th <= n_places) {
4956 if (n_places != num_masks) {
4957 int S = n_places / n_th;
4958 int s_count, rem, gap, gap_ct;
4960 place = masters_place;
4961 rem = n_places - n_th * S;
4962 gap = rem ? n_th / rem : 1;
4965 if (update_master_only == 1)
4967 for (f = 0; f < thidx; f++) {
4968 kmp_info_t *th = team->t.t_threads[f];
4969 KMP_DEBUG_ASSERT(th != NULL);
4971 int fplace = place, nplace = place;
4973 while (s_count < S) {
4974 if (place == last_place) {
4975 place = first_place;
4976 }
else if (place == (num_masks - 1)) {
4983 if (rem && (gap_ct == gap)) {
4984 if (place == last_place) {
4985 place = first_place;
4986 }
else if (place == (num_masks - 1)) {
4994 __kmp_set_thread_place(team, th, fplace, place, nplace);
4997 if (place == last_place) {
4998 place = first_place;
4999 }
else if (place == (num_masks - 1)) {
5006 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5007 "partition = [%d,%d], num_masks: %u\n",
5008 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5009 f, th->th.th_new_place, th->th.th_first_place,
5010 th->th.th_last_place, num_masks));
5016 double current =
static_cast<double>(masters_place);
5018 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5023 if (update_master_only == 1)
5025 for (f = 0; f < thidx; f++) {
5026 first =
static_cast<int>(current);
5027 last =
static_cast<int>(current + spacing) - 1;
5028 KMP_DEBUG_ASSERT(last >= first);
5029 if (first >= n_places) {
5030 if (masters_place) {
5033 if (first == (masters_place + 1)) {
5034 KMP_DEBUG_ASSERT(f == n_th);
5037 if (last == masters_place) {
5038 KMP_DEBUG_ASSERT(f == (n_th - 1));
5042 KMP_DEBUG_ASSERT(f == n_th);
5047 if (last >= n_places) {
5048 last = (n_places - 1);
5053 KMP_DEBUG_ASSERT(0 <= first);
5054 KMP_DEBUG_ASSERT(n_places > first);
5055 KMP_DEBUG_ASSERT(0 <= last);
5056 KMP_DEBUG_ASSERT(n_places > last);
5057 KMP_DEBUG_ASSERT(last_place >= first_place);
5058 th = team->t.t_threads[f];
5059 KMP_DEBUG_ASSERT(th);
5060 __kmp_set_thread_place(team, th, first, last, place);
5062 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5063 "partition = [%d,%d], spacing = %.4f\n",
5064 __kmp_gtid_from_thread(team->t.t_threads[f]),
5065 team->t.t_id, f, th->th.th_new_place,
5066 th->th.th_first_place, th->th.th_last_place, spacing));
5070 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5072 int S, rem, gap, s_count;
5073 S = n_th / n_places;
5075 rem = n_th - (S * n_places);
5076 gap = rem > 0 ? n_places / rem : n_places;
5077 int place = masters_place;
5080 if (update_master_only == 1)
5082 for (f = 0; f < thidx; f++) {
5083 kmp_info_t *th = team->t.t_threads[f];
5084 KMP_DEBUG_ASSERT(th != NULL);
5086 __kmp_set_thread_place(team, th, place, place, place);
5089 if ((s_count == S) && rem && (gap_ct == gap)) {
5091 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5093 if (place == last_place) {
5094 place = first_place;
5095 }
else if (place == (num_masks - 1)) {
5103 }
else if (s_count == S) {
5104 if (place == last_place) {
5105 place = first_place;
5106 }
else if (place == (num_masks - 1)) {
5115 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5116 "partition = [%d,%d]\n",
5117 __kmp_gtid_from_thread(team->t.t_threads[f]),
5118 team->t.t_id, f, th->th.th_new_place,
5119 th->th.th_first_place, th->th.th_last_place));
5121 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5129 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5137 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5139 ompt_data_t ompt_parallel_data,
5141 kmp_proc_bind_t new_proc_bind,
5142 kmp_internal_control_t *new_icvs,
5143 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5144 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5147 int use_hot_team = !root->r.r_active;
5149 int do_place_partition = 1;
5151 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5152 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5153 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5156 #if KMP_NESTED_HOT_TEAMS
5157 kmp_hot_team_ptr_t *hot_teams;
5159 team = master->th.th_team;
5160 level = team->t.t_active_level;
5161 if (master->th.th_teams_microtask) {
5162 if (master->th.th_teams_size.nteams > 1 &&
5165 (microtask_t)__kmp_teams_master ||
5166 master->th.th_teams_level <
5173 if ((master->th.th_teams_size.nteams == 1 &&
5174 master->th.th_teams_level >= team->t.t_level) ||
5175 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5176 do_place_partition = 0;
5178 hot_teams = master->th.th_hot_teams;
5179 if (level < __kmp_hot_teams_max_level && hot_teams &&
5180 hot_teams[level].hot_team) {
5188 KMP_DEBUG_ASSERT(new_nproc == 1);
5192 if (use_hot_team && new_nproc > 1) {
5193 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5194 #if KMP_NESTED_HOT_TEAMS
5195 team = hot_teams[level].hot_team;
5197 team = root->r.r_hot_team;
5200 if (__kmp_tasking_mode != tskm_immediate_exec) {
5201 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5202 "task_team[1] = %p before reinit\n",
5203 team->t.t_task_team[0], team->t.t_task_team[1]));
5207 if (team->t.t_nproc != new_nproc &&
5208 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5210 int old_nthr = team->t.t_nproc;
5211 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5216 if (do_place_partition == 0)
5217 team->t.t_proc_bind = proc_bind_default;
5221 if (team->t.t_nproc == new_nproc) {
5222 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5225 if (team->t.t_size_changed == -1) {
5226 team->t.t_size_changed = 1;
5228 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5232 kmp_r_sched_t new_sched = new_icvs->sched;
5234 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5236 __kmp_reinitialize_team(team, new_icvs,
5237 root->r.r_uber_thread->th.th_ident);
5239 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5240 team->t.t_threads[0], team));
5241 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5243 #if KMP_AFFINITY_SUPPORTED
5244 if ((team->t.t_size_changed == 0) &&
5245 (team->t.t_proc_bind == new_proc_bind)) {
5246 if (new_proc_bind == proc_bind_spread) {
5247 if (do_place_partition) {
5249 __kmp_partition_places(team, 1);
5252 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5253 "proc_bind = %d, partition = [%d,%d]\n",
5254 team->t.t_id, new_proc_bind, team->t.t_first_place,
5255 team->t.t_last_place));
5257 if (do_place_partition) {
5258 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5259 __kmp_partition_places(team);
5263 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5265 }
else if (team->t.t_nproc > new_nproc) {
5267 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5270 team->t.t_size_changed = 1;
5271 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5274 __kmp_add_threads_to_team(team, new_nproc);
5278 if (__kmp_tasking_mode != tskm_immediate_exec) {
5279 for (f = new_nproc; f < team->t.t_nproc; f++) {
5280 kmp_info_t *th = team->t.t_threads[f];
5281 KMP_DEBUG_ASSERT(th);
5282 th->th.th_task_team = NULL;
5285 #if KMP_NESTED_HOT_TEAMS
5286 if (__kmp_hot_teams_mode == 0) {
5289 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5290 hot_teams[level].hot_team_nth = new_nproc;
5293 for (f = new_nproc; f < team->t.t_nproc; f++) {
5294 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5295 __kmp_free_thread(team->t.t_threads[f]);
5296 team->t.t_threads[f] = NULL;
5298 #if KMP_NESTED_HOT_TEAMS
5303 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5304 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5305 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5306 for (
int b = 0; b < bs_last_barrier; ++b) {
5307 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5308 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5310 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5315 team->t.t_nproc = new_nproc;
5317 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5318 __kmp_reinitialize_team(team, new_icvs,
5319 root->r.r_uber_thread->th.th_ident);
5322 for (f = 0; f < new_nproc; ++f) {
5323 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5328 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5329 team->t.t_threads[0], team));
5331 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5334 for (f = 0; f < team->t.t_nproc; f++) {
5335 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5336 team->t.t_threads[f]->th.th_team_nproc ==
5341 if (do_place_partition) {
5342 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5343 #if KMP_AFFINITY_SUPPORTED
5344 __kmp_partition_places(team);
5350 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5352 int old_nproc = team->t.t_nproc;
5353 team->t.t_size_changed = 1;
5355 #if KMP_NESTED_HOT_TEAMS
5356 int avail_threads = hot_teams[level].hot_team_nth;
5357 if (new_nproc < avail_threads)
5358 avail_threads = new_nproc;
5359 kmp_info_t **other_threads = team->t.t_threads;
5360 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5364 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5365 for (b = 0; b < bs_last_barrier; ++b) {
5366 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5367 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5369 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5373 if (hot_teams[level].hot_team_nth >= new_nproc) {
5376 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5377 team->t.t_nproc = new_nproc;
5381 team->t.t_nproc = hot_teams[level].hot_team_nth;
5382 hot_teams[level].hot_team_nth = new_nproc;
5384 if (team->t.t_max_nproc < new_nproc) {
5386 __kmp_reallocate_team_arrays(team, new_nproc);
5387 __kmp_reinitialize_team(team, new_icvs, NULL);
5390 #if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5391 KMP_AFFINITY_SUPPORTED
5397 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5401 for (f = team->t.t_nproc; f < new_nproc; f++) {
5402 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5403 KMP_DEBUG_ASSERT(new_worker);
5404 team->t.t_threads[f] = new_worker;
5407 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5408 "join=%llu, plain=%llu\n",
5409 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5410 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5411 team->t.t_bar[bs_plain_barrier].b_arrived));
5415 kmp_balign_t *balign = new_worker->th.th_bar;
5416 for (b = 0; b < bs_last_barrier; ++b) {
5417 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5418 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5419 KMP_BARRIER_PARENT_FLAG);
5421 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5427 #if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5428 KMP_AFFINITY_SUPPORTED
5430 new_temp_affinity.restore();
5432 #if KMP_NESTED_HOT_TEAMS
5435 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5438 __kmp_add_threads_to_team(team, new_nproc);
5442 __kmp_initialize_team(team, new_nproc, new_icvs,
5443 root->r.r_uber_thread->th.th_ident);
5446 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5447 for (f = 0; f < team->t.t_nproc; ++f)
5448 __kmp_initialize_info(team->t.t_threads[f], team, f,
5449 __kmp_gtid_from_tid(f, team));
5452 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5453 for (f = old_nproc; f < team->t.t_nproc; ++f)
5454 team->t.t_threads[f]->th.th_task_state = old_state;
5457 for (f = 0; f < team->t.t_nproc; ++f) {
5458 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5459 team->t.t_threads[f]->th.th_team_nproc ==
5464 if (do_place_partition) {
5465 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5466 #if KMP_AFFINITY_SUPPORTED
5467 __kmp_partition_places(team);
5472 if (master->th.th_teams_microtask) {
5473 for (f = 1; f < new_nproc; ++f) {
5475 kmp_info_t *thr = team->t.t_threads[f];
5476 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5477 thr->th.th_teams_level = master->th.th_teams_level;
5478 thr->th.th_teams_size = master->th.th_teams_size;
5481 #if KMP_NESTED_HOT_TEAMS
5485 for (f = 1; f < new_nproc; ++f) {
5486 kmp_info_t *thr = team->t.t_threads[f];
5488 kmp_balign_t *balign = thr->th.th_bar;
5489 for (b = 0; b < bs_last_barrier; ++b) {
5490 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5491 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5493 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5501 __kmp_alloc_argv_entries(argc, team, TRUE);
5502 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5506 KF_TRACE(10, (
" hot_team = %p\n", team));
5509 if (__kmp_tasking_mode != tskm_immediate_exec) {
5510 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5511 "task_team[1] = %p after reinit\n",
5512 team->t.t_task_team[0], team->t.t_task_team[1]));
5517 __ompt_team_assign_id(team, ompt_parallel_data);
5527 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5530 if (team->t.t_max_nproc >= max_nproc) {
5532 __kmp_team_pool = team->t.t_next_pool;
5534 if (max_nproc > 1 &&
5535 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5537 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5542 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5544 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5545 "task_team[1] %p to NULL\n",
5546 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5547 team->t.t_task_team[0] = NULL;
5548 team->t.t_task_team[1] = NULL;
5551 __kmp_alloc_argv_entries(argc, team, TRUE);
5552 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5555 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5556 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5559 for (b = 0; b < bs_last_barrier; ++b) {
5560 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5562 team->t.t_bar[b].b_master_arrived = 0;
5563 team->t.t_bar[b].b_team_arrived = 0;
5568 team->t.t_proc_bind = new_proc_bind;
5570 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5574 __ompt_team_assign_id(team, ompt_parallel_data);
5577 team->t.t_nested_nth = NULL;
5588 team = __kmp_reap_team(team);
5589 __kmp_team_pool = team;
5594 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5597 team->t.t_max_nproc = max_nproc;
5598 if (max_nproc > 1 &&
5599 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5601 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5606 __kmp_allocate_team_arrays(team, max_nproc);
5608 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5609 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5611 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5613 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5614 team->t.t_task_team[0] = NULL;
5616 team->t.t_task_team[1] = NULL;
5619 if (__kmp_storage_map) {
5620 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5624 __kmp_alloc_argv_entries(argc, team, FALSE);
5625 team->t.t_argc = argc;
5628 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5629 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5632 for (b = 0; b < bs_last_barrier; ++b) {
5633 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5635 team->t.t_bar[b].b_master_arrived = 0;
5636 team->t.t_bar[b].b_team_arrived = 0;
5641 team->t.t_proc_bind = new_proc_bind;
5644 __ompt_team_assign_id(team, ompt_parallel_data);
5645 team->t.ompt_serialized_team_info = NULL;
5650 team->t.t_nested_nth = NULL;
5652 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5663 void __kmp_free_team(kmp_root_t *root,
5664 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5666 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5670 KMP_DEBUG_ASSERT(root);
5671 KMP_DEBUG_ASSERT(team);
5672 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5673 KMP_DEBUG_ASSERT(team->t.t_threads);
5675 int use_hot_team = team == root->r.r_hot_team;
5676 #if KMP_NESTED_HOT_TEAMS
5679 level = team->t.t_active_level - 1;
5680 if (master->th.th_teams_microtask) {
5681 if (master->th.th_teams_size.nteams > 1) {
5685 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5686 master->th.th_teams_level == team->t.t_level) {
5692 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5694 if (level < __kmp_hot_teams_max_level) {
5695 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5702 TCW_SYNC_PTR(team->t.t_pkfn,
5705 team->t.t_copyin_counter = 0;
5710 if (!use_hot_team) {
5711 if (__kmp_tasking_mode != tskm_immediate_exec) {
5713 for (f = 1; f < team->t.t_nproc; ++f) {
5714 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5715 kmp_info_t *th = team->t.t_threads[f];
5716 volatile kmp_uint32 *state = &th->th.th_reap_state;
5717 while (*state != KMP_SAFE_TO_REAP) {
5721 if (!__kmp_is_thread_alive(th, &ecode)) {
5722 *state = KMP_SAFE_TO_REAP;
5727 if (th->th.th_sleep_loc)
5728 __kmp_null_resume_wrapper(th);
5735 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5736 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5737 if (task_team != NULL) {
5738 for (f = 0; f < team->t.t_nproc; ++f) {
5739 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5740 team->t.t_threads[f]->th.th_task_team = NULL;
5744 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5745 __kmp_get_gtid(), task_team, team->t.t_id));
5746 #if KMP_NESTED_HOT_TEAMS
5747 __kmp_free_task_team(master, task_team);
5749 team->t.t_task_team[tt_idx] = NULL;
5755 if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
5756 team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
5757 KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
5758 KMP_INTERNAL_FREE(team->t.t_nested_nth);
5760 team->t.t_nested_nth = NULL;
5763 team->t.t_parent = NULL;
5764 team->t.t_level = 0;
5765 team->t.t_active_level = 0;
5768 for (f = 1; f < team->t.t_nproc; ++f) {
5769 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5770 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5771 (void)KMP_COMPARE_AND_STORE_ACQ32(
5772 &(team->t.t_threads[f]->th.th_used_in_team), 1, 2);
5774 __kmp_free_thread(team->t.t_threads[f]);
5777 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5780 team->t.b->go_release();
5781 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5782 for (f = 1; f < team->t.t_nproc; ++f) {
5783 if (team->t.b->sleep[f].sleep) {
5784 __kmp_atomic_resume_64(
5785 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5786 (kmp_atomic_flag_64<> *)NULL);
5791 for (
int f = 1; f < team->t.t_nproc; ++f) {
5792 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5798 for (f = 1; f < team->t.t_nproc; ++f) {
5799 team->t.t_threads[f] = NULL;
5802 if (team->t.t_max_nproc > 1 &&
5803 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5804 distributedBarrier::deallocate(team->t.b);
5809 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5810 __kmp_team_pool = (
volatile kmp_team_t *)team;
5813 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5814 team->t.t_threads[1]->th.th_cg_roots);
5815 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5817 for (f = 1; f < team->t.t_nproc; ++f) {
5818 kmp_info_t *thr = team->t.t_threads[f];
5819 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5820 thr->th.th_cg_roots->cg_root == thr);
5822 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5823 thr->th.th_cg_roots = tmp->up;
5824 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5825 " up to node %p. cg_nthreads was %d\n",
5826 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5827 int i = tmp->cg_nthreads--;
5832 if (thr->th.th_cg_roots)
5833 thr->th.th_current_task->td_icvs.thread_limit =
5834 thr->th.th_cg_roots->cg_thread_limit;
5843 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5844 kmp_team_t *next_pool = team->t.t_next_pool;
5846 KMP_DEBUG_ASSERT(team);
5847 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5848 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5849 KMP_DEBUG_ASSERT(team->t.t_threads);
5850 KMP_DEBUG_ASSERT(team->t.t_argv);
5855 __kmp_free_team_arrays(team);
5856 if (team->t.t_argv != &team->t.t_inline_argv[0])
5857 __kmp_free((
void *)team->t.t_argv);
5889 void __kmp_free_thread(kmp_info_t *this_th) {
5893 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5894 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5896 KMP_DEBUG_ASSERT(this_th);
5901 kmp_balign_t *balign = this_th->th.th_bar;
5902 for (b = 0; b < bs_last_barrier; ++b) {
5903 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5904 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5905 balign[b].bb.team = NULL;
5906 balign[b].bb.leaf_kids = 0;
5908 this_th->th.th_task_state = 0;
5909 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5912 TCW_PTR(this_th->th.th_team, NULL);
5913 TCW_PTR(this_th->th.th_root, NULL);
5914 TCW_PTR(this_th->th.th_dispatch, NULL);
5916 while (this_th->th.th_cg_roots) {
5917 this_th->th.th_cg_roots->cg_nthreads--;
5918 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5919 " %p of thread %p to %d\n",
5920 this_th, this_th->th.th_cg_roots,
5921 this_th->th.th_cg_roots->cg_root,
5922 this_th->th.th_cg_roots->cg_nthreads));
5923 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5924 if (tmp->cg_root == this_th) {
5925 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5927 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5928 this_th->th.th_cg_roots = tmp->up;
5931 if (tmp->cg_nthreads == 0) {
5934 this_th->th.th_cg_roots = NULL;
5944 __kmp_free_implicit_task(this_th);
5945 this_th->th.th_current_task = NULL;
5949 gtid = this_th->th.th_info.ds.ds_gtid;
5950 if (__kmp_thread_pool_insert_pt != NULL) {
5951 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5952 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5953 __kmp_thread_pool_insert_pt = NULL;
5962 if (__kmp_thread_pool_insert_pt != NULL) {
5963 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5965 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5967 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5968 scan = &((*scan)->th.th_next_pool))
5973 TCW_PTR(this_th->th.th_next_pool, *scan);
5974 __kmp_thread_pool_insert_pt = *scan = this_th;
5975 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5976 (this_th->th.th_info.ds.ds_gtid <
5977 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5978 TCW_4(this_th->th.th_in_pool, TRUE);
5979 __kmp_suspend_initialize_thread(this_th);
5980 __kmp_lock_suspend_mx(this_th);
5981 if (this_th->th.th_active == TRUE) {
5982 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5983 this_th->th.th_active_in_pool = TRUE;
5987 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5990 __kmp_unlock_suspend_mx(this_th);
5992 TCW_4(__kmp_nth, __kmp_nth - 1);
5994 #ifdef KMP_ADJUST_BLOCKTIME
5997 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5998 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5999 if (__kmp_nth <= __kmp_avail_proc) {
6000 __kmp_zero_bt = FALSE;
6010 void *__kmp_launch_thread(kmp_info_t *this_thr) {
6011 #if OMP_PROFILING_SUPPORT
6012 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6014 if (ProfileTraceFile)
6015 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6018 int gtid = this_thr->th.th_info.ds.ds_gtid;
6020 kmp_team_t **
volatile pteam;
6023 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6025 if (__kmp_env_consistency_check) {
6026 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6030 if (ompd_state & OMPD_ENABLE_BP)
6031 ompd_bp_thread_begin();
6035 ompt_data_t *thread_data =
nullptr;
6036 if (ompt_enabled.enabled) {
6037 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6038 *thread_data = ompt_data_none;
6040 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6041 this_thr->th.ompt_thread_info.wait_id = 0;
6042 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6043 this_thr->th.ompt_thread_info.parallel_flags = 0;
6044 if (ompt_enabled.ompt_callback_thread_begin) {
6045 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6046 ompt_thread_worker, thread_data);
6048 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6053 while (!TCR_4(__kmp_global.g.g_done)) {
6054 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6058 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6061 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6064 if (ompt_enabled.enabled) {
6065 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6069 pteam = &this_thr->th.th_team;
6072 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6074 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6077 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6078 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6079 (*pteam)->t.t_pkfn));
6081 updateHWFPControl(*pteam);
6084 if (ompt_enabled.enabled) {
6085 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6089 rc = (*pteam)->t.t_invoke(gtid);
6093 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6094 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6095 (*pteam)->t.t_pkfn));
6098 if (ompt_enabled.enabled) {
6100 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6102 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6106 __kmp_join_barrier(gtid);
6111 if (ompd_state & OMPD_ENABLE_BP)
6112 ompd_bp_thread_end();
6116 if (ompt_enabled.ompt_callback_thread_end) {
6117 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6121 this_thr->th.th_task_team = NULL;
6123 __kmp_common_destroy_gtid(gtid);
6125 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6128 #if OMP_PROFILING_SUPPORT
6129 llvm::timeTraceProfilerFinishThread();
6136 void __kmp_internal_end_dest(
void *specific_gtid) {
6139 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6141 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6145 __kmp_internal_end_thread(gtid);
6148 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6150 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6151 __kmp_internal_end_atexit();
6158 void __kmp_internal_end_atexit(
void) {
6159 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6183 __kmp_internal_end_library(-1);
6185 __kmp_close_console();
6189 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6194 KMP_DEBUG_ASSERT(thread != NULL);
6196 gtid = thread->th.th_info.ds.ds_gtid;
6199 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6202 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6204 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6206 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6208 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6212 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6214 __kmp_release_64(&flag);
6219 __kmp_reap_worker(thread);
6231 if (thread->th.th_active_in_pool) {
6232 thread->th.th_active_in_pool = FALSE;
6233 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6234 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6238 __kmp_free_implicit_task(thread);
6242 __kmp_free_fast_memory(thread);
6245 __kmp_suspend_uninitialize_thread(thread);
6247 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6248 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6253 #ifdef KMP_ADJUST_BLOCKTIME
6256 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6257 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6258 if (__kmp_nth <= __kmp_avail_proc) {
6259 __kmp_zero_bt = FALSE;
6265 if (__kmp_env_consistency_check) {
6266 if (thread->th.th_cons) {
6267 __kmp_free_cons_stack(thread->th.th_cons);
6268 thread->th.th_cons = NULL;
6272 if (thread->th.th_pri_common != NULL) {
6273 __kmp_free(thread->th.th_pri_common);
6274 thread->th.th_pri_common = NULL;
6278 if (thread->th.th_local.bget_data != NULL) {
6279 __kmp_finalize_bget(thread);
6283 #if KMP_AFFINITY_SUPPORTED
6284 if (thread->th.th_affin_mask != NULL) {
6285 KMP_CPU_FREE(thread->th.th_affin_mask);
6286 thread->th.th_affin_mask = NULL;
6290 #if KMP_USE_HIER_SCHED
6291 if (thread->th.th_hier_bar_data != NULL) {
6292 __kmp_free(thread->th.th_hier_bar_data);
6293 thread->th.th_hier_bar_data = NULL;
6297 __kmp_reap_team(thread->th.th_serial_team);
6298 thread->th.th_serial_team = NULL;
6305 static void __kmp_itthash_clean(kmp_info_t *th) {
6307 if (__kmp_itt_region_domains.count > 0) {
6308 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6309 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6311 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6312 __kmp_thread_free(th, bucket);
6317 if (__kmp_itt_barrier_domains.count > 0) {
6318 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6319 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6321 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6322 __kmp_thread_free(th, bucket);
6330 static void __kmp_internal_end(
void) {
6334 __kmp_unregister_library();
6341 __kmp_reclaim_dead_roots();
6345 for (i = 0; i < __kmp_threads_capacity; i++)
6347 if (__kmp_root[i]->r.r_active)
6350 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6352 if (i < __kmp_threads_capacity) {
6364 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6365 if (TCR_4(__kmp_init_monitor)) {
6366 __kmp_reap_monitor(&__kmp_monitor);
6367 TCW_4(__kmp_init_monitor, 0);
6369 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6370 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6376 for (i = 0; i < __kmp_threads_capacity; i++) {
6377 if (__kmp_root[i]) {
6380 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6389 while (__kmp_thread_pool != NULL) {
6391 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6392 __kmp_thread_pool = thread->th.th_next_pool;
6394 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6395 thread->th.th_next_pool = NULL;
6396 thread->th.th_in_pool = FALSE;
6397 __kmp_reap_thread(thread, 0);
6399 __kmp_thread_pool_insert_pt = NULL;
6402 while (__kmp_team_pool != NULL) {
6404 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6405 __kmp_team_pool = team->t.t_next_pool;
6407 team->t.t_next_pool = NULL;
6408 __kmp_reap_team(team);
6411 __kmp_reap_task_teams();
6418 for (i = 0; i < __kmp_threads_capacity; i++) {
6419 kmp_info_t *thr = __kmp_threads[i];
6420 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6425 for (i = 0; i < __kmp_threads_capacity; ++i) {
6432 TCW_SYNC_4(__kmp_init_common, FALSE);
6434 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6442 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6443 if (TCR_4(__kmp_init_monitor)) {
6444 __kmp_reap_monitor(&__kmp_monitor);
6445 TCW_4(__kmp_init_monitor, 0);
6447 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6448 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6451 TCW_4(__kmp_init_gtid, FALSE);
6460 void __kmp_internal_end_library(
int gtid_req) {
6467 if (__kmp_global.g.g_abort) {
6468 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6472 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6473 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6478 if (TCR_4(__kmp_init_hidden_helper) &&
6479 !TCR_4(__kmp_hidden_helper_team_done)) {
6480 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6482 __kmp_hidden_helper_main_thread_release();
6484 __kmp_hidden_helper_threads_deinitz_wait();
6490 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6492 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6493 if (gtid == KMP_GTID_SHUTDOWN) {
6494 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6495 "already shutdown\n"));
6497 }
else if (gtid == KMP_GTID_MONITOR) {
6498 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6499 "registered, or system shutdown\n"));
6501 }
else if (gtid == KMP_GTID_DNE) {
6502 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6505 }
else if (KMP_UBER_GTID(gtid)) {
6507 if (__kmp_root[gtid]->r.r_active) {
6508 __kmp_global.g.g_abort = -1;
6509 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6510 __kmp_unregister_library();
6512 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6516 __kmp_itthash_clean(__kmp_threads[gtid]);
6519 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6520 __kmp_unregister_root_current_thread(gtid);
6527 #ifdef DUMP_DEBUG_ON_EXIT
6528 if (__kmp_debug_buf)
6529 __kmp_dump_debug_buffer();
6534 __kmp_unregister_library();
6539 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6542 if (__kmp_global.g.g_abort) {
6543 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6545 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6548 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6549 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6558 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6561 __kmp_internal_end();
6563 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6564 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6566 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6568 #ifdef DUMP_DEBUG_ON_EXIT
6569 if (__kmp_debug_buf)
6570 __kmp_dump_debug_buffer();
6574 __kmp_close_console();
6577 __kmp_fini_allocator();
6581 void __kmp_internal_end_thread(
int gtid_req) {
6590 if (__kmp_global.g.g_abort) {
6591 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6595 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6596 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6601 if (TCR_4(__kmp_init_hidden_helper) &&
6602 !TCR_4(__kmp_hidden_helper_team_done)) {
6603 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6605 __kmp_hidden_helper_main_thread_release();
6607 __kmp_hidden_helper_threads_deinitz_wait();
6614 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6616 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6617 if (gtid == KMP_GTID_SHUTDOWN) {
6618 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6619 "already shutdown\n"));
6621 }
else if (gtid == KMP_GTID_MONITOR) {
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6623 "registered, or system shutdown\n"));
6625 }
else if (gtid == KMP_GTID_DNE) {
6626 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6630 }
else if (KMP_UBER_GTID(gtid)) {
6632 if (__kmp_root[gtid]->r.r_active) {
6633 __kmp_global.g.g_abort = -1;
6634 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6636 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6640 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6642 __kmp_unregister_root_current_thread(gtid);
6646 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6649 __kmp_threads[gtid]->th.th_task_team = NULL;
6653 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6659 if (__kmp_pause_status != kmp_hard_paused)
6663 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6668 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6671 if (__kmp_global.g.g_abort) {
6672 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6674 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6677 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6678 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6689 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6691 for (i = 0; i < __kmp_threads_capacity; ++i) {
6692 if (KMP_UBER_GTID(i)) {
6695 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6696 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6697 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6704 __kmp_internal_end();
6706 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6707 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6709 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6711 #ifdef DUMP_DEBUG_ON_EXIT
6712 if (__kmp_debug_buf)
6713 __kmp_dump_debug_buffer();
6720 static long __kmp_registration_flag = 0;
6722 static char *__kmp_registration_str = NULL;
6725 static inline char *__kmp_reg_status_name() {
6731 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6732 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6735 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6739 #if defined(KMP_USE_SHM)
6740 bool __kmp_shm_available =
false;
6741 bool __kmp_tmp_available =
false;
6743 char *temp_reg_status_file_name =
nullptr;
6746 void __kmp_register_library_startup(
void) {
6748 char *name = __kmp_reg_status_name();
6754 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6755 __kmp_initialize_system_tick();
6757 __kmp_read_system_time(&time.dtime);
6758 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6759 __kmp_registration_str =
6760 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6761 __kmp_registration_flag, KMP_LIBRARY_FILE);
6763 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6764 __kmp_registration_str));
6770 #if defined(KMP_USE_SHM)
6771 char *shm_name =
nullptr;
6772 char *data1 =
nullptr;
6773 __kmp_shm_available = __kmp_detect_shm();
6774 if (__kmp_shm_available) {
6776 shm_name = __kmp_str_format(
"/%s", name);
6777 int shm_preexist = 0;
6778 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6779 if ((fd1 == -1) && (errno == EEXIST)) {
6782 fd1 = shm_open(shm_name, O_RDWR, 0600);
6784 KMP_WARNING(FunctionError,
"Can't open SHM");
6785 __kmp_shm_available =
false;
6790 if (__kmp_shm_available && shm_preexist == 0) {
6791 if (ftruncate(fd1, SHM_SIZE) == -1) {
6792 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6793 __kmp_shm_available =
false;
6796 if (__kmp_shm_available) {
6797 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6799 if (data1 == MAP_FAILED) {
6800 KMP_WARNING(FunctionError,
"Can't map SHM");
6801 __kmp_shm_available =
false;
6804 if (__kmp_shm_available) {
6805 if (shm_preexist == 0) {
6806 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6809 value = __kmp_str_format(
"%s", data1);
6810 munmap(data1, SHM_SIZE);
6815 if (!__kmp_shm_available)
6816 __kmp_tmp_available = __kmp_detect_tmp();
6817 if (!__kmp_shm_available && __kmp_tmp_available) {
6824 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6825 int tmp_preexist = 0;
6826 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6827 if ((fd1 == -1) && (errno == EEXIST)) {
6830 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6832 KMP_WARNING(FunctionError,
"Can't open TEMP");
6833 __kmp_tmp_available =
false;
6838 if (__kmp_tmp_available && tmp_preexist == 0) {
6840 if (ftruncate(fd1, SHM_SIZE) == -1) {
6841 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6842 __kmp_tmp_available =
false;
6845 if (__kmp_tmp_available) {
6846 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6848 if (data1 == MAP_FAILED) {
6849 KMP_WARNING(FunctionError,
"Can't map /tmp");
6850 __kmp_tmp_available =
false;
6853 if (__kmp_tmp_available) {
6854 if (tmp_preexist == 0) {
6855 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6858 value = __kmp_str_format(
"%s", data1);
6859 munmap(data1, SHM_SIZE);
6864 if (!__kmp_shm_available && !__kmp_tmp_available) {
6867 __kmp_env_set(name, __kmp_registration_str, 0);
6869 value = __kmp_env_get(name);
6873 __kmp_env_set(name, __kmp_registration_str, 0);
6875 value = __kmp_env_get(name);
6878 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6885 char *flag_addr_str = NULL;
6886 char *flag_val_str = NULL;
6887 char const *file_name = NULL;
6888 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6889 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6892 unsigned long *flag_addr = 0;
6893 unsigned long flag_val = 0;
6894 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6895 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6896 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6900 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6914 file_name =
"unknown library";
6919 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6920 if (!__kmp_str_match_true(duplicate_ok)) {
6922 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6923 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6925 KMP_INTERNAL_FREE(duplicate_ok);
6926 __kmp_duplicate_library_ok = 1;
6931 #if defined(KMP_USE_SHM)
6932 if (__kmp_shm_available) {
6933 shm_unlink(shm_name);
6934 }
else if (__kmp_tmp_available) {
6935 unlink(temp_reg_status_file_name);
6938 __kmp_env_unset(name);
6942 __kmp_env_unset(name);
6946 KMP_DEBUG_ASSERT(0);
6950 KMP_INTERNAL_FREE((
void *)value);
6951 #if defined(KMP_USE_SHM)
6953 KMP_INTERNAL_FREE((
void *)shm_name);
6956 KMP_INTERNAL_FREE((
void *)name);
6960 void __kmp_unregister_library(
void) {
6962 char *name = __kmp_reg_status_name();
6965 #if defined(KMP_USE_SHM)
6966 char *shm_name =
nullptr;
6968 if (__kmp_shm_available) {
6969 shm_name = __kmp_str_format(
"/%s", name);
6970 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6972 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6973 if (data1 != MAP_FAILED) {
6974 value = __kmp_str_format(
"%s", data1);
6975 munmap(data1, SHM_SIZE);
6979 }
else if (__kmp_tmp_available) {
6980 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6982 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6983 if (data1 != MAP_FAILED) {
6984 value = __kmp_str_format(
"%s", data1);
6985 munmap(data1, SHM_SIZE);
6990 value = __kmp_env_get(name);
6993 value = __kmp_env_get(name);
6996 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6997 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6998 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
7000 #if defined(KMP_USE_SHM)
7001 if (__kmp_shm_available) {
7002 shm_unlink(shm_name);
7003 }
else if (__kmp_tmp_available) {
7004 unlink(temp_reg_status_file_name);
7006 __kmp_env_unset(name);
7009 __kmp_env_unset(name);
7013 #if defined(KMP_USE_SHM)
7015 KMP_INTERNAL_FREE(shm_name);
7016 if (temp_reg_status_file_name)
7017 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7020 KMP_INTERNAL_FREE(__kmp_registration_str);
7021 KMP_INTERNAL_FREE(value);
7022 KMP_INTERNAL_FREE(name);
7024 __kmp_registration_flag = 0;
7025 __kmp_registration_str = NULL;
7032 #if KMP_MIC_SUPPORTED
7034 static void __kmp_check_mic_type() {
7035 kmp_cpuid_t cpuid_state = {0};
7036 kmp_cpuid_t *cs_p = &cpuid_state;
7037 __kmp_x86_cpuid(1, 0, cs_p);
7039 if ((cs_p->eax & 0xff0) == 0xB10) {
7040 __kmp_mic_type = mic2;
7041 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7042 __kmp_mic_type = mic3;
7044 __kmp_mic_type = non_mic;
7051 static void __kmp_user_level_mwait_init() {
7052 struct kmp_cpuid buf;
7053 __kmp_x86_cpuid(7, 0, &buf);
7054 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7055 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7056 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7057 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7058 __kmp_umwait_enabled));
7060 #elif KMP_HAVE_MWAIT
7061 #ifndef AT_INTELPHIUSERMWAIT
7064 #define AT_INTELPHIUSERMWAIT 10000
7069 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7070 unsigned long getauxval(
unsigned long) {
return 0; }
7072 static void __kmp_user_level_mwait_init() {
7077 if (__kmp_mic_type == mic3) {
7078 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7079 if ((res & 0x1) || __kmp_user_level_mwait) {
7080 __kmp_mwait_enabled = TRUE;
7081 if (__kmp_user_level_mwait) {
7082 KMP_INFORM(EnvMwaitWarn);
7085 __kmp_mwait_enabled = FALSE;
7088 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7089 "__kmp_mwait_enabled = %d\n",
7090 __kmp_mic_type, __kmp_mwait_enabled));
7094 static void __kmp_do_serial_initialize(
void) {
7098 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7100 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7101 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7102 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7103 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7104 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7114 __kmp_validate_locks();
7116 #if ENABLE_LIBOMPTARGET
7118 __kmp_init_omptarget();
7122 __kmp_init_allocator();
7128 if (__kmp_need_register_serial)
7129 __kmp_register_library_startup();
7132 if (TCR_4(__kmp_global.g.g_done)) {
7133 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7136 __kmp_global.g.g_abort = 0;
7137 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7140 #if KMP_USE_ADAPTIVE_LOCKS
7141 #if KMP_DEBUG_ADAPTIVE_LOCKS
7142 __kmp_init_speculative_stats();
7145 #if KMP_STATS_ENABLED
7148 __kmp_init_lock(&__kmp_global_lock);
7149 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7150 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7151 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7152 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7153 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7154 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7155 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7156 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7157 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7158 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7159 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7160 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7161 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7162 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7163 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7165 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7167 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7171 __kmp_runtime_initialize();
7173 #if KMP_MIC_SUPPORTED
7174 __kmp_check_mic_type();
7181 __kmp_abort_delay = 0;
7185 __kmp_dflt_team_nth_ub = __kmp_xproc;
7186 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7187 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7189 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7190 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7192 __kmp_max_nth = __kmp_sys_max_nth;
7193 __kmp_cg_max_nth = __kmp_sys_max_nth;
7194 __kmp_teams_max_nth = __kmp_xproc;
7195 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7196 __kmp_teams_max_nth = __kmp_sys_max_nth;
7201 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7203 __kmp_monitor_wakeups =
7204 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7205 __kmp_bt_intervals =
7206 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7209 __kmp_library = library_throughput;
7211 __kmp_static = kmp_sch_static_balanced;
7218 #if KMP_FAST_REDUCTION_BARRIER
7219 #define kmp_reduction_barrier_gather_bb ((int)1)
7220 #define kmp_reduction_barrier_release_bb ((int)1)
7221 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7222 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7224 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7225 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7226 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7227 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7228 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7229 #if KMP_FAST_REDUCTION_BARRIER
7230 if (i == bs_reduction_barrier) {
7232 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7233 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7234 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7235 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7239 #if KMP_FAST_REDUCTION_BARRIER
7240 #undef kmp_reduction_barrier_release_pat
7241 #undef kmp_reduction_barrier_gather_pat
7242 #undef kmp_reduction_barrier_release_bb
7243 #undef kmp_reduction_barrier_gather_bb
7245 #if KMP_MIC_SUPPORTED
7246 if (__kmp_mic_type == mic2) {
7248 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7249 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7251 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7252 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7254 #if KMP_FAST_REDUCTION_BARRIER
7255 if (__kmp_mic_type == mic2) {
7256 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7257 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7264 __kmp_env_checks = TRUE;
7266 __kmp_env_checks = FALSE;
7270 __kmp_foreign_tp = TRUE;
7272 __kmp_global.g.g_dynamic = FALSE;
7273 __kmp_global.g.g_dynamic_mode = dynamic_default;
7275 __kmp_init_nesting_mode();
7277 __kmp_env_initialize(NULL);
7279 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7280 __kmp_user_level_mwait_init();
7284 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7285 if (__kmp_str_match_true(val)) {
7286 kmp_str_buf_t buffer;
7287 __kmp_str_buf_init(&buffer);
7288 __kmp_i18n_dump_catalog(&buffer);
7289 __kmp_printf(
"%s", buffer.str);
7290 __kmp_str_buf_free(&buffer);
7292 __kmp_env_free(&val);
7295 __kmp_threads_capacity =
7296 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7298 __kmp_tp_capacity = __kmp_default_tp_capacity(
7299 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7304 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7305 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7306 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7307 __kmp_thread_pool = NULL;
7308 __kmp_thread_pool_insert_pt = NULL;
7309 __kmp_team_pool = NULL;
7316 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7318 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7319 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7320 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7323 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7325 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7330 gtid = __kmp_register_root(TRUE);
7331 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7332 KMP_ASSERT(KMP_UBER_GTID(gtid));
7333 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7337 __kmp_common_initialize();
7341 __kmp_register_atfork();
7344 #if !KMP_DYNAMIC_LIB || \
7345 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7350 int rc = atexit(__kmp_internal_end_atexit);
7352 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7358 #if KMP_HANDLE_SIGNALS
7364 __kmp_install_signals(FALSE);
7367 __kmp_install_signals(TRUE);
7372 __kmp_init_counter++;
7374 __kmp_init_serial = TRUE;
7376 if (__kmp_version) {
7377 __kmp_print_version_1();
7380 if (__kmp_settings) {
7384 if (__kmp_display_env || __kmp_display_env_verbose) {
7385 __kmp_env_print_2();
7394 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7397 void __kmp_serial_initialize(
void) {
7398 if (__kmp_init_serial) {
7401 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7402 if (__kmp_init_serial) {
7403 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7406 __kmp_do_serial_initialize();
7407 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7410 static void __kmp_do_middle_initialize(
void) {
7412 int prev_dflt_team_nth;
7414 if (!__kmp_init_serial) {
7415 __kmp_do_serial_initialize();
7418 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7420 if (UNLIKELY(!__kmp_need_register_serial)) {
7423 __kmp_register_library_startup();
7428 prev_dflt_team_nth = __kmp_dflt_team_nth;
7430 #if KMP_AFFINITY_SUPPORTED
7433 __kmp_affinity_initialize(__kmp_affinity);
7437 KMP_ASSERT(__kmp_xproc > 0);
7438 if (__kmp_avail_proc == 0) {
7439 __kmp_avail_proc = __kmp_xproc;
7445 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7446 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7451 if (__kmp_dflt_team_nth == 0) {
7452 #ifdef KMP_DFLT_NTH_CORES
7454 __kmp_dflt_team_nth = __kmp_ncores;
7455 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7456 "__kmp_ncores (%d)\n",
7457 __kmp_dflt_team_nth));
7460 __kmp_dflt_team_nth = __kmp_avail_proc;
7461 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7462 "__kmp_avail_proc(%d)\n",
7463 __kmp_dflt_team_nth));
7467 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7468 __kmp_dflt_team_nth = KMP_MIN_NTH;
7470 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7471 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7474 if (__kmp_nesting_mode > 0)
7475 __kmp_set_nesting_mode_threads();
7479 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7481 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7486 for (i = 0; i < __kmp_threads_capacity; i++) {
7487 kmp_info_t *thread = __kmp_threads[i];
7490 if (thread->th.th_current_task->td_icvs.nproc != 0)
7493 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7498 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7499 __kmp_dflt_team_nth));
7501 #ifdef KMP_ADJUST_BLOCKTIME
7503 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7504 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7505 if (__kmp_nth > __kmp_avail_proc) {
7506 __kmp_zero_bt = TRUE;
7512 TCW_SYNC_4(__kmp_init_middle, TRUE);
7514 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7517 void __kmp_middle_initialize(
void) {
7518 if (__kmp_init_middle) {
7521 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7522 if (__kmp_init_middle) {
7523 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7526 __kmp_do_middle_initialize();
7527 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7530 void __kmp_parallel_initialize(
void) {
7531 int gtid = __kmp_entry_gtid();
7534 if (TCR_4(__kmp_init_parallel))
7536 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7537 if (TCR_4(__kmp_init_parallel)) {
7538 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7543 if (TCR_4(__kmp_global.g.g_done)) {
7546 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7547 __kmp_infinite_loop();
7553 if (!__kmp_init_middle) {
7554 __kmp_do_middle_initialize();
7556 __kmp_assign_root_init_mask();
7557 __kmp_resume_if_hard_paused();
7560 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7561 KMP_ASSERT(KMP_UBER_GTID(gtid));
7563 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7566 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7567 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7568 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7572 #if KMP_HANDLE_SIGNALS
7574 __kmp_install_signals(TRUE);
7578 __kmp_suspend_initialize();
7580 #if defined(USE_LOAD_BALANCE)
7581 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7582 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7585 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7586 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7590 if (__kmp_version) {
7591 __kmp_print_version_2();
7595 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7598 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7600 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7603 void __kmp_hidden_helper_initialize() {
7604 if (TCR_4(__kmp_init_hidden_helper))
7608 if (!TCR_4(__kmp_init_parallel))
7609 __kmp_parallel_initialize();
7613 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7614 if (TCR_4(__kmp_init_hidden_helper)) {
7615 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7619 #if KMP_AFFINITY_SUPPORTED
7623 if (!__kmp_hh_affinity.flags.initialized)
7624 __kmp_affinity_initialize(__kmp_hh_affinity);
7628 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7632 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7635 __kmp_do_initialize_hidden_helper_threads();
7638 __kmp_hidden_helper_threads_initz_wait();
7641 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7643 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7648 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7650 kmp_disp_t *dispatch;
7655 this_thr->th.th_local.this_construct = 0;
7656 #if KMP_CACHE_MANAGE
7657 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7659 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7660 KMP_DEBUG_ASSERT(dispatch);
7661 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7665 dispatch->th_disp_index = 0;
7666 dispatch->th_doacross_buf_idx = 0;
7667 if (__kmp_env_consistency_check)
7668 __kmp_push_parallel(gtid, team->t.t_ident);
7673 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7675 if (__kmp_env_consistency_check)
7676 __kmp_pop_parallel(gtid, team->t.t_ident);
7678 __kmp_finish_implicit_task(this_thr);
7681 int __kmp_invoke_task_func(
int gtid) {
7683 int tid = __kmp_tid_from_gtid(gtid);
7684 kmp_info_t *this_thr = __kmp_threads[gtid];
7685 kmp_team_t *team = this_thr->th.th_team;
7687 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7689 if (__itt_stack_caller_create_ptr) {
7691 if (team->t.t_stack_id != NULL) {
7692 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7694 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7695 __kmp_itt_stack_callee_enter(
7696 (__itt_caller)team->t.t_parent->t.t_stack_id);
7700 #if INCLUDE_SSC_MARKS
7701 SSC_MARK_INVOKING();
7706 void **exit_frame_p;
7707 ompt_data_t *my_task_data;
7708 ompt_data_t *my_parallel_data;
7711 if (ompt_enabled.enabled) {
7712 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7713 .ompt_task_info.frame.exit_frame.ptr);
7715 exit_frame_p = &dummy;
7719 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7720 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7721 if (ompt_enabled.ompt_callback_implicit_task) {
7722 ompt_team_size = team->t.t_nproc;
7723 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7724 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7725 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7726 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7730 #if KMP_STATS_ENABLED
7732 if (previous_state == stats_state_e::TEAMS_REGION) {
7733 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7735 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7737 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7740 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7741 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7748 *exit_frame_p = NULL;
7749 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;
7752 #if KMP_STATS_ENABLED
7753 if (previous_state == stats_state_e::TEAMS_REGION) {
7754 KMP_SET_THREAD_STATE(previous_state);
7756 KMP_POP_PARTITIONED_TIMER();
7760 if (__itt_stack_caller_create_ptr) {
7762 if (team->t.t_stack_id != NULL) {
7763 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7765 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7766 __kmp_itt_stack_callee_leave(
7767 (__itt_caller)team->t.t_parent->t.t_stack_id);
7771 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7776 void __kmp_teams_master(
int gtid) {
7778 kmp_info_t *thr = __kmp_threads[gtid];
7779 kmp_team_t *team = thr->th.th_team;
7780 ident_t *loc = team->t.t_ident;
7781 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7782 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7783 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7784 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7785 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7788 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7791 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7792 tmp->cg_nthreads = 1;
7793 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7794 " cg_nthreads to 1\n",
7796 tmp->up = thr->th.th_cg_roots;
7797 thr->th.th_cg_roots = tmp;
7801 #if INCLUDE_SSC_MARKS
7804 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7805 (microtask_t)thr->th.th_teams_microtask,
7806 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7807 #if INCLUDE_SSC_MARKS
7811 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7812 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7815 __kmp_join_call(loc, gtid
7824 int __kmp_invoke_teams_master(
int gtid) {
7825 kmp_info_t *this_thr = __kmp_threads[gtid];
7826 kmp_team_t *team = this_thr->th.th_team;
7828 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7829 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7830 (
void *)__kmp_teams_master);
7832 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7834 int tid = __kmp_tid_from_gtid(gtid);
7835 ompt_data_t *task_data =
7836 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7837 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7838 if (ompt_enabled.ompt_callback_implicit_task) {
7839 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7840 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7842 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7845 __kmp_teams_master(gtid);
7847 this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;
7849 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7857 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7858 kmp_info_t *thr = __kmp_threads[gtid];
7860 if (num_threads > 0)
7861 thr->th.th_set_nproc = num_threads;
7864 void __kmp_push_num_threads_list(
ident_t *
id,
int gtid, kmp_uint32 list_length,
7865 int *num_threads_list) {
7866 kmp_info_t *thr = __kmp_threads[gtid];
7868 KMP_DEBUG_ASSERT(list_length > 1);
7870 if (num_threads_list[0] > 0)
7871 thr->th.th_set_nproc = num_threads_list[0];
7872 thr->th.th_set_nested_nth =
7873 (
int *)KMP_INTERNAL_MALLOC(list_length *
sizeof(
int));
7874 for (kmp_uint32 i = 0; i < list_length; ++i)
7875 thr->th.th_set_nested_nth[i] = num_threads_list[i];
7876 thr->th.th_set_nested_nth_sz = list_length;
7879 void __kmp_set_strict_num_threads(
ident_t *loc,
int gtid,
int sev,
7881 kmp_info_t *thr = __kmp_threads[gtid];
7882 thr->th.th_nt_strict =
true;
7883 thr->th.th_nt_loc = loc;
7885 if (sev == severity_warning)
7886 thr->th.th_nt_sev = sev;
7888 thr->th.th_nt_sev = severity_fatal;
7891 thr->th.th_nt_msg = msg;
7893 thr->th.th_nt_msg =
"Cannot form team with number of threads specified by "
7894 "strict num_threads clause.";
7897 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7899 KMP_DEBUG_ASSERT(thr);
7901 if (!TCR_4(__kmp_init_middle))
7902 __kmp_middle_initialize();
7903 __kmp_assign_root_init_mask();
7904 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7905 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7907 if (num_threads == 0) {
7908 if (__kmp_teams_thread_limit > 0) {
7909 num_threads = __kmp_teams_thread_limit;
7911 num_threads = __kmp_avail_proc / num_teams;
7916 if (num_threads > __kmp_dflt_team_nth) {
7917 num_threads = __kmp_dflt_team_nth;
7919 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7920 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7922 if (num_teams * num_threads > __kmp_teams_max_nth) {
7923 num_threads = __kmp_teams_max_nth / num_teams;
7925 if (num_threads == 0) {
7929 if (num_threads < 0) {
7930 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7936 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7938 if (num_threads > __kmp_dflt_team_nth) {
7939 num_threads = __kmp_dflt_team_nth;
7941 if (num_teams * num_threads > __kmp_teams_max_nth) {
7942 int new_threads = __kmp_teams_max_nth / num_teams;
7943 if (new_threads == 0) {
7946 if (new_threads != num_threads) {
7947 if (!__kmp_reserve_warn) {
7948 __kmp_reserve_warn = 1;
7949 __kmp_msg(kmp_ms_warning,
7950 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7951 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7954 num_threads = new_threads;
7957 thr->th.th_teams_size.nth = num_threads;
7962 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7964 kmp_info_t *thr = __kmp_threads[gtid];
7965 if (num_teams < 0) {
7968 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7972 if (num_teams == 0) {
7973 if (__kmp_nteams > 0) {
7974 num_teams = __kmp_nteams;
7979 if (num_teams > __kmp_teams_max_nth) {
7980 if (!__kmp_reserve_warn) {
7981 __kmp_reserve_warn = 1;
7982 __kmp_msg(kmp_ms_warning,
7983 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7984 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7986 num_teams = __kmp_teams_max_nth;
7990 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7992 __kmp_push_thread_limit(thr, num_teams, num_threads);
7997 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7998 int num_teams_ub,
int num_threads) {
7999 kmp_info_t *thr = __kmp_threads[gtid];
8000 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
8001 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
8002 KMP_DEBUG_ASSERT(num_threads >= 0);
8004 if (num_teams_lb > num_teams_ub) {
8005 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
8006 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
8011 if (num_teams_lb == 0 && num_teams_ub > 0)
8012 num_teams_lb = num_teams_ub;
8014 if (num_teams_lb == 0 && num_teams_ub == 0) {
8015 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
8016 if (num_teams > __kmp_teams_max_nth) {
8017 if (!__kmp_reserve_warn) {
8018 __kmp_reserve_warn = 1;
8019 __kmp_msg(kmp_ms_warning,
8020 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
8021 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
8023 num_teams = __kmp_teams_max_nth;
8025 }
else if (num_teams_lb == num_teams_ub) {
8026 num_teams = num_teams_ub;
8028 if (num_threads <= 0) {
8029 if (num_teams_ub > __kmp_teams_max_nth) {
8030 num_teams = num_teams_lb;
8032 num_teams = num_teams_ub;
8035 num_teams = (num_threads > __kmp_teams_max_nth)
8037 : __kmp_teams_max_nth / num_threads;
8038 if (num_teams < num_teams_lb) {
8039 num_teams = num_teams_lb;
8040 }
else if (num_teams > num_teams_ub) {
8041 num_teams = num_teams_ub;
8047 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8049 __kmp_push_thread_limit(thr, num_teams, num_threads);
8053 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8054 kmp_info_t *thr = __kmp_threads[gtid];
8055 thr->th.th_set_proc_bind = proc_bind;
8060 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8061 kmp_info_t *this_thr = __kmp_threads[gtid];
8067 KMP_DEBUG_ASSERT(team);
8068 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8069 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8072 team->t.t_construct = 0;
8073 team->t.t_ordered.dt.t_value =
8077 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8078 if (team->t.t_max_nproc > 1) {
8080 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8081 team->t.t_disp_buffer[i].buffer_index = i;
8082 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8085 team->t.t_disp_buffer[0].buffer_index = 0;
8086 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8090 KMP_ASSERT(this_thr->th.th_team == team);
8093 for (f = 0; f < team->t.t_nproc; f++) {
8094 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8095 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8100 __kmp_fork_barrier(gtid, 0);
8103 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8104 kmp_info_t *this_thr = __kmp_threads[gtid];
8106 KMP_DEBUG_ASSERT(team);
8107 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8108 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8114 if (__kmp_threads[gtid] &&
8115 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8116 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8117 __kmp_threads[gtid]);
8118 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8119 "team->t.t_nproc=%d\n",
8120 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8122 __kmp_print_structure();
8124 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8125 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8128 __kmp_join_barrier(gtid);
8130 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
8131 if (ompt_enabled.enabled &&
8132 (ompt_state == ompt_state_wait_barrier_teams ||
8133 ompt_state == ompt_state_wait_barrier_implicit_parallel)) {
8134 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8135 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8136 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8138 void *codeptr = NULL;
8139 if (KMP_MASTER_TID(ds_tid) &&
8140 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8141 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8142 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8144 ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
8145 if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
8146 sync_kind = ompt_sync_region_barrier_teams;
8147 if (ompt_enabled.ompt_callback_sync_region_wait) {
8148 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8149 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8151 if (ompt_enabled.ompt_callback_sync_region) {
8152 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8153 sync_kind, ompt_scope_end, NULL, task_data, codeptr);
8156 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8157 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8158 ompt_scope_end, NULL, task_data, 0, ds_tid,
8159 ompt_task_implicit);
8165 KMP_ASSERT(this_thr->th.th_team == team);
8170 #ifdef USE_LOAD_BALANCE
8174 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8177 kmp_team_t *hot_team;
8179 if (root->r.r_active) {
8182 hot_team = root->r.r_hot_team;
8183 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8184 return hot_team->t.t_nproc - 1;
8189 for (i = 1; i < hot_team->t.t_nproc; i++) {
8190 if (hot_team->t.t_threads[i]->th.th_active) {
8199 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8202 int hot_team_active;
8203 int team_curr_active;
8206 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8208 KMP_DEBUG_ASSERT(root);
8209 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8210 ->th.th_current_task->td_icvs.dynamic == TRUE);
8211 KMP_DEBUG_ASSERT(set_nproc > 1);
8213 if (set_nproc == 1) {
8214 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8223 pool_active = __kmp_thread_pool_active_nth;
8224 hot_team_active = __kmp_active_hot_team_nproc(root);
8225 team_curr_active = pool_active + hot_team_active + 1;
8228 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8229 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8230 "hot team active = %d\n",
8231 system_active, pool_active, hot_team_active));
8233 if (system_active < 0) {
8237 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8238 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8241 retval = __kmp_avail_proc - __kmp_nth +
8242 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8243 if (retval > set_nproc) {
8246 if (retval < KMP_MIN_NTH) {
8247 retval = KMP_MIN_NTH;
8250 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8258 if (system_active < team_curr_active) {
8259 system_active = team_curr_active;
8261 retval = __kmp_avail_proc - system_active + team_curr_active;
8262 if (retval > set_nproc) {
8265 if (retval < KMP_MIN_NTH) {
8266 retval = KMP_MIN_NTH;
8269 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8278 void __kmp_cleanup(
void) {
8281 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8283 if (TCR_4(__kmp_init_parallel)) {
8284 #if KMP_HANDLE_SIGNALS
8285 __kmp_remove_signals();
8287 TCW_4(__kmp_init_parallel, FALSE);
8290 if (TCR_4(__kmp_init_middle)) {
8291 #if KMP_AFFINITY_SUPPORTED
8292 __kmp_affinity_uninitialize();
8294 __kmp_cleanup_hierarchy();
8295 TCW_4(__kmp_init_middle, FALSE);
8298 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8300 if (__kmp_init_serial) {
8301 __kmp_runtime_destroy();
8302 __kmp_init_serial = FALSE;
8305 __kmp_cleanup_threadprivate_caches();
8307 for (f = 0; f < __kmp_threads_capacity; f++) {
8308 if (__kmp_root[f] != NULL) {
8309 __kmp_free(__kmp_root[f]);
8310 __kmp_root[f] = NULL;
8313 __kmp_free(__kmp_threads);
8316 __kmp_threads = NULL;
8318 __kmp_threads_capacity = 0;
8321 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8323 kmp_old_threads_list_t *next = ptr->next;
8324 __kmp_free(ptr->threads);
8329 #if KMP_USE_DYNAMIC_LOCK
8330 __kmp_cleanup_indirect_user_locks();
8332 __kmp_cleanup_user_locks();
8336 __kmp_free(ompd_env_block);
8337 ompd_env_block = NULL;
8338 ompd_env_block_size = 0;
8342 #if KMP_AFFINITY_SUPPORTED
8343 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8344 __kmp_cpuinfo_file = NULL;
8347 #if KMP_USE_ADAPTIVE_LOCKS
8348 #if KMP_DEBUG_ADAPTIVE_LOCKS
8349 __kmp_print_speculative_stats();
8352 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8353 __kmp_nested_nth.nth = NULL;
8354 __kmp_nested_nth.size = 0;
8355 __kmp_nested_nth.used = 0;
8357 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8358 __kmp_nested_proc_bind.bind_types = NULL;
8359 __kmp_nested_proc_bind.size = 0;
8360 __kmp_nested_proc_bind.used = 0;
8361 if (__kmp_affinity_format) {
8362 KMP_INTERNAL_FREE(__kmp_affinity_format);
8363 __kmp_affinity_format = NULL;
8366 __kmp_i18n_catclose();
8368 #if KMP_USE_HIER_SCHED
8369 __kmp_hier_scheds.deallocate();
8372 #if KMP_STATS_ENABLED
8376 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8381 int __kmp_ignore_mppbeg(
void) {
8384 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8385 if (__kmp_str_match_false(env))
8392 int __kmp_ignore_mppend(
void) {
8395 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8396 if (__kmp_str_match_false(env))
8403 void __kmp_internal_begin(
void) {
8409 gtid = __kmp_entry_gtid();
8410 root = __kmp_threads[gtid]->th.th_root;
8411 KMP_ASSERT(KMP_UBER_GTID(gtid));
8413 if (root->r.r_begin)
8415 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8416 if (root->r.r_begin) {
8417 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8421 root->r.r_begin = TRUE;
8423 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8428 void __kmp_user_set_library(
enum library_type arg) {
8435 gtid = __kmp_entry_gtid();
8436 thread = __kmp_threads[gtid];
8438 root = thread->th.th_root;
8440 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8442 if (root->r.r_in_parallel) {
8444 KMP_WARNING(SetLibraryIncorrectCall);
8449 case library_serial:
8450 thread->th.th_set_nproc = 0;
8451 set__nproc(thread, 1);
8453 case library_turnaround:
8454 thread->th.th_set_nproc = 0;
8455 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8456 : __kmp_dflt_team_nth_ub);
8458 case library_throughput:
8459 thread->th.th_set_nproc = 0;
8460 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8461 : __kmp_dflt_team_nth_ub);
8464 KMP_FATAL(UnknownLibraryType, arg);
8467 __kmp_aux_set_library(arg);
8470 void __kmp_aux_set_stacksize(
size_t arg) {
8471 if (!__kmp_init_serial)
8472 __kmp_serial_initialize();
8475 if (arg & (0x1000 - 1)) {
8476 arg &= ~(0x1000 - 1);
8481 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8484 if (!TCR_4(__kmp_init_parallel)) {
8487 if (value < __kmp_sys_min_stksize)
8488 value = __kmp_sys_min_stksize;
8489 else if (value > KMP_MAX_STKSIZE)
8490 value = KMP_MAX_STKSIZE;
8492 __kmp_stksize = value;
8494 __kmp_env_stksize = TRUE;
8497 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8502 void __kmp_aux_set_library(
enum library_type arg) {
8503 __kmp_library = arg;
8505 switch (__kmp_library) {
8506 case library_serial: {
8507 KMP_INFORM(LibraryIsSerial);
8509 case library_turnaround:
8510 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8511 __kmp_use_yield = 2;
8513 case library_throughput:
8514 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8515 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8518 KMP_FATAL(UnknownLibraryType, arg);
8524 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8525 kmp_info_t *thr = __kmp_entry_thread();
8526 teams_serialized = 0;
8527 if (thr->th.th_teams_microtask) {
8528 kmp_team_t *team = thr->th.th_team;
8529 int tlevel = thr->th.th_teams_level;
8530 int ii = team->t.t_level;
8531 teams_serialized = team->t.t_serialized;
8532 int level = tlevel + 1;
8533 KMP_DEBUG_ASSERT(ii >= tlevel);
8534 while (ii > level) {
8535 for (teams_serialized = team->t.t_serialized;
8536 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8538 if (team->t.t_serialized && (!teams_serialized)) {
8539 team = team->t.t_parent;
8543 team = team->t.t_parent;
8552 int __kmp_aux_get_team_num() {
8554 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8556 if (serialized > 1) {
8559 return team->t.t_master_tid;
8565 int __kmp_aux_get_num_teams() {
8567 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8569 if (serialized > 1) {
8572 return team->t.t_parent->t.t_nproc;
8611 typedef struct kmp_affinity_format_field_t {
8613 const char *long_name;
8616 } kmp_affinity_format_field_t;
8618 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8619 #if KMP_AFFINITY_SUPPORTED
8620 {
'A',
"thread_affinity",
's'},
8622 {
't',
"team_num",
'd'},
8623 {
'T',
"num_teams",
'd'},
8624 {
'L',
"nesting_level",
'd'},
8625 {
'n',
"thread_num",
'd'},
8626 {
'N',
"num_threads",
'd'},
8627 {
'a',
"ancestor_tnum",
'd'},
8629 {
'P',
"process_id",
'd'},
8630 {
'i',
"native_thread_id",
'd'}};
8633 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8635 kmp_str_buf_t *field_buffer) {
8636 int rc, format_index, field_value;
8637 const char *width_left, *width_right;
8638 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8639 static const int FORMAT_SIZE = 20;
8640 char format[FORMAT_SIZE] = {0};
8641 char absolute_short_name = 0;
8643 KMP_DEBUG_ASSERT(gtid >= 0);
8644 KMP_DEBUG_ASSERT(th);
8645 KMP_DEBUG_ASSERT(**ptr ==
'%');
8646 KMP_DEBUG_ASSERT(field_buffer);
8648 __kmp_str_buf_clear(field_buffer);
8655 __kmp_str_buf_cat(field_buffer,
"%", 1);
8666 right_justify =
false;
8668 right_justify =
true;
8672 width_left = width_right = NULL;
8673 if (**ptr >=
'0' && **ptr <=
'9') {
8681 format[format_index++] =
'%';
8683 format[format_index++] =
'-';
8685 format[format_index++] =
'0';
8686 if (width_left && width_right) {
8690 while (i < 8 && width_left < width_right) {
8691 format[format_index++] = *width_left;
8699 found_valid_name =
false;
8700 parse_long_name = (**ptr ==
'{');
8701 if (parse_long_name)
8703 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8704 sizeof(__kmp_affinity_format_table[0]);
8706 char short_name = __kmp_affinity_format_table[i].short_name;
8707 const char *long_name = __kmp_affinity_format_table[i].long_name;
8708 char field_format = __kmp_affinity_format_table[i].field_format;
8709 if (parse_long_name) {
8710 size_t length = KMP_STRLEN(long_name);
8711 if (strncmp(*ptr, long_name, length) == 0) {
8712 found_valid_name =
true;
8715 }
else if (**ptr == short_name) {
8716 found_valid_name =
true;
8719 if (found_valid_name) {
8720 format[format_index++] = field_format;
8721 format[format_index++] =
'\0';
8722 absolute_short_name = short_name;
8726 if (parse_long_name) {
8728 absolute_short_name = 0;
8736 switch (absolute_short_name) {
8738 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8741 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8744 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8747 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8750 static const int BUFFER_SIZE = 256;
8751 char buf[BUFFER_SIZE];
8752 __kmp_expand_host_name(buf, BUFFER_SIZE);
8753 rc = __kmp_str_buf_print(field_buffer, format, buf);
8756 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8759 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8762 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8766 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8767 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8769 #if KMP_AFFINITY_SUPPORTED
8772 __kmp_str_buf_init(&buf);
8773 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8774 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8775 __kmp_str_buf_free(&buf);
8781 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8783 if (parse_long_name) {
8792 KMP_ASSERT(format_index <= FORMAT_SIZE);
8802 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8803 kmp_str_buf_t *buffer) {
8804 const char *parse_ptr;
8806 const kmp_info_t *th;
8807 kmp_str_buf_t field;
8809 KMP_DEBUG_ASSERT(buffer);
8810 KMP_DEBUG_ASSERT(gtid >= 0);
8812 __kmp_str_buf_init(&field);
8813 __kmp_str_buf_clear(buffer);
8815 th = __kmp_threads[gtid];
8821 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8822 parse_ptr = __kmp_affinity_format;
8824 KMP_DEBUG_ASSERT(parse_ptr);
8826 while (*parse_ptr !=
'\0') {
8828 if (*parse_ptr ==
'%') {
8830 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8831 __kmp_str_buf_catbuf(buffer, &field);
8835 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8840 __kmp_str_buf_free(&field);
8845 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8847 __kmp_str_buf_init(&buf);
8848 __kmp_aux_capture_affinity(gtid, format, &buf);
8849 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8850 __kmp_str_buf_free(&buf);
8854 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8855 int blocktime = arg;
8861 __kmp_save_internal_controls(thread);
8864 if (blocktime < KMP_MIN_BLOCKTIME)
8865 blocktime = KMP_MIN_BLOCKTIME;
8866 else if (blocktime > KMP_MAX_BLOCKTIME)
8867 blocktime = KMP_MAX_BLOCKTIME;
8869 set__blocktime_team(thread->th.th_team, tid, blocktime);
8870 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8874 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8876 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8877 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8883 set__bt_set_team(thread->th.th_team, tid, bt_set);
8884 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8886 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8887 "bt_intervals=%d, monitor_updates=%d\n",
8888 __kmp_gtid_from_tid(tid, thread->th.th_team),
8889 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8890 __kmp_monitor_wakeups));
8892 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8893 __kmp_gtid_from_tid(tid, thread->th.th_team),
8894 thread->th.th_team->t.t_id, tid, blocktime));
8898 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8899 if (!__kmp_init_serial) {
8900 __kmp_serial_initialize();
8902 __kmp_env_initialize(str);
8904 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8912 PACKED_REDUCTION_METHOD_T
8913 __kmp_determine_reduction_method(
8914 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8915 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8916 kmp_critical_name *lck) {
8927 PACKED_REDUCTION_METHOD_T retval;
8931 KMP_DEBUG_ASSERT(lck);
8933 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8935 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8936 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8938 retval = critical_reduce_block;
8941 team_size = __kmp_get_team_num_threads(global_tid);
8942 if (team_size == 1) {
8944 retval = empty_reduce_block;
8948 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8950 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8951 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8952 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8954 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8955 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HAIKU || \
8956 KMP_OS_HURD || KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8958 int teamsize_cutoff = 4;
8960 #if KMP_MIC_SUPPORTED
8961 if (__kmp_mic_type != non_mic) {
8962 teamsize_cutoff = 8;
8965 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8966 if (tree_available) {
8967 if (team_size <= teamsize_cutoff) {
8968 if (atomic_available) {
8969 retval = atomic_reduce_block;
8972 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8974 }
else if (atomic_available) {
8975 retval = atomic_reduce_block;
8978 #error "Unknown or unsupported OS"
8983 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8984 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC
8986 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8987 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HAIKU || KMP_OS_HURD || \
8988 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8992 if (atomic_available) {
8993 if (num_vars <= 2) {
8994 retval = atomic_reduce_block;
9000 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9001 if (atomic_available && (num_vars <= 3)) {
9002 retval = atomic_reduce_block;
9003 }
else if (tree_available) {
9004 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
9005 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
9006 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
9011 #error "Unknown or unsupported OS"
9015 #error "Unknown or unsupported architecture"
9023 if (__kmp_force_reduction_method != reduction_method_not_defined &&
9026 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
9028 int atomic_available, tree_available;
9030 switch ((forced_retval = __kmp_force_reduction_method)) {
9031 case critical_reduce_block:
9035 case atomic_reduce_block:
9036 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
9037 if (!atomic_available) {
9038 KMP_WARNING(RedMethodNotSupported,
"atomic");
9039 forced_retval = critical_reduce_block;
9043 case tree_reduce_block:
9044 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
9045 if (!tree_available) {
9046 KMP_WARNING(RedMethodNotSupported,
"tree");
9047 forced_retval = critical_reduce_block;
9049 #if KMP_FAST_REDUCTION_BARRIER
9050 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9059 retval = forced_retval;
9062 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9064 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
9065 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9070 kmp_int32 __kmp_get_reduce_method(
void) {
9071 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9076 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9080 void __kmp_hard_pause() {
9081 __kmp_pause_status = kmp_hard_paused;
9082 __kmp_internal_end_thread(-1);
9086 void __kmp_resume_if_soft_paused() {
9087 if (__kmp_pause_status == kmp_soft_paused) {
9088 __kmp_pause_status = kmp_not_paused;
9090 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9091 kmp_info_t *thread = __kmp_threads[gtid];
9093 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9095 if (fl.is_sleeping())
9097 else if (__kmp_try_suspend_mx(thread)) {
9098 __kmp_unlock_suspend_mx(thread);
9101 if (fl.is_sleeping()) {
9104 }
else if (__kmp_try_suspend_mx(thread)) {
9105 __kmp_unlock_suspend_mx(thread);
9117 int __kmp_pause_resource(kmp_pause_status_t level) {
9118 if (level == kmp_not_paused) {
9119 if (__kmp_pause_status == kmp_not_paused) {
9123 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9124 __kmp_pause_status == kmp_hard_paused);
9125 __kmp_pause_status = kmp_not_paused;
9128 }
else if (level == kmp_soft_paused) {
9129 if (__kmp_pause_status != kmp_not_paused) {
9136 }
else if (level == kmp_hard_paused || level == kmp_stop_tool_paused) {
9138 if (__kmp_pause_status != kmp_not_paused) {
9151 void __kmp_omp_display_env(
int verbose) {
9152 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9153 if (__kmp_init_serial == 0)
9154 __kmp_do_serial_initialize();
9155 __kmp_display_env_impl(!verbose, verbose);
9156 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9160 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9162 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9164 kmp_info_t **other_threads = team->t.t_threads;
9168 for (
int f = 1; f < old_nthreads; ++f) {
9169 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9171 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9177 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9178 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9182 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9184 team->t.t_threads[f]->th.th_used_in_team.store(2);
9185 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9188 team->t.b->go_release();
9194 int count = old_nthreads - 1;
9196 count = old_nthreads - 1;
9197 for (
int f = 1; f < old_nthreads; ++f) {
9198 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9199 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9200 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9201 void *, other_threads[f]->th.th_sleep_loc);
9202 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9205 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9211 team->t.b->update_num_threads(new_nthreads);
9212 team->t.b->go_reset();
9215 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9217 KMP_DEBUG_ASSERT(team);
9223 for (
int f = 1; f < new_nthreads; ++f) {
9224 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9225 (void)KMP_COMPARE_AND_STORE_ACQ32(
9226 &(team->t.t_threads[f]->th.th_used_in_team), 0, 3);
9227 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9228 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9229 (kmp_flag_32<false, false> *)NULL);
9235 int count = new_nthreads - 1;
9237 count = new_nthreads - 1;
9238 for (
int f = 1; f < new_nthreads; ++f) {
9239 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9247 kmp_info_t **__kmp_hidden_helper_threads;
9248 kmp_info_t *__kmp_hidden_helper_main_thread;
9249 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9251 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9252 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9254 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9255 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9259 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9261 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9266 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9267 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9268 __kmp_hidden_helper_threads_num)
9274 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9275 __kmp_hidden_helper_initz_release();
9276 __kmp_hidden_helper_main_thread_wait();
9278 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9279 __kmp_hidden_helper_worker_thread_signal();
9285 void __kmp_hidden_helper_threads_initz_routine() {
9287 const int gtid = __kmp_register_root(TRUE);
9288 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9289 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9290 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9291 __kmp_hidden_helper_threads_num;
9293 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9298 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9300 __kmp_hidden_helper_threads_deinitz_release();
9320 void __kmp_init_nesting_mode() {
9321 int levels = KMP_HW_LAST;
9322 __kmp_nesting_mode_nlevels = levels;
9323 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9324 for (
int i = 0; i < levels; ++i)
9325 __kmp_nesting_nth_level[i] = 0;
9326 if (__kmp_nested_nth.size < levels) {
9327 __kmp_nested_nth.nth =
9328 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9329 __kmp_nested_nth.size = levels;
9334 void __kmp_set_nesting_mode_threads() {
9335 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9337 if (__kmp_nesting_mode == 1)
9338 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9339 else if (__kmp_nesting_mode > 1)
9340 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9342 if (__kmp_topology) {
9344 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9345 loc < __kmp_nesting_mode_nlevels;
9346 loc++, hw_level++) {
9347 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9348 if (__kmp_nesting_nth_level[loc] == 1)
9352 if (__kmp_nesting_mode > 1 && loc > 1) {
9353 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9354 int num_cores = __kmp_topology->get_count(core_level);
9355 int upper_levels = 1;
9356 for (
int level = 0; level < loc - 1; ++level)
9357 upper_levels *= __kmp_nesting_nth_level[level];
9358 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9359 __kmp_nesting_nth_level[loc - 1] =
9360 num_cores / __kmp_nesting_nth_level[loc - 2];
9362 __kmp_nesting_mode_nlevels = loc;
9363 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9365 if (__kmp_avail_proc >= 4) {
9366 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9367 __kmp_nesting_nth_level[1] = 2;
9368 __kmp_nesting_mode_nlevels = 2;
9370 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9371 __kmp_nesting_mode_nlevels = 1;
9373 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9375 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9376 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9378 set__nproc(thread, __kmp_nesting_nth_level[0]);
9379 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9380 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9381 if (get__max_active_levels(thread) > 1) {
9383 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9385 if (__kmp_nesting_mode == 1)
9386 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9391 #if !KMP_STATS_ENABLED
9392 void __kmp_reset_stats() {}
9395 int __kmp_omp_debug_struct_info = FALSE;
9396 int __kmp_debugging = FALSE;
9398 #if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9399 void __kmp_itt_fini_ittlib() {}
9400 void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)