14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117 int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
138 #ifdef KMP_TDATA_GTID
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
185 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
186 __kmp_gtid_get_specific() == i);
194 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
195 "thread, using TLS\n"));
196 i = __kmp_gtid_get_specific();
207 if (!TCR_SYNC_PTR(other_threads[i]))
212 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
213 KMP_FATAL(StackOverflow, i);
216 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
217 if (stack_addr > stack_base) {
218 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
219 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
220 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
223 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
224 stack_base - stack_addr);
228 if (__kmp_storage_map) {
229 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
230 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
231 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
232 other_threads[i]->th.th_info.ds.ds_stacksize,
233 "th_%d stack (refinement)", i);
238 int __kmp_get_global_thread_id_reg() {
241 if (!__kmp_init_serial) {
244 #ifdef KMP_TDATA_GTID
245 if (TCR_4(__kmp_gtid_mode) >= 3) {
246 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
250 if (TCR_4(__kmp_gtid_mode) >= 2) {
251 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
252 gtid = __kmp_gtid_get_specific();
255 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
256 gtid = __kmp_get_global_thread_id();
260 if (gtid == KMP_GTID_DNE) {
262 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
263 "Registering a new gtid.\n"));
264 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
265 if (!__kmp_init_serial) {
266 __kmp_do_serial_initialize();
267 gtid = __kmp_gtid_get_specific();
269 gtid = __kmp_register_root(FALSE);
271 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
275 KMP_DEBUG_ASSERT(gtid >= 0);
281 void __kmp_check_stack_overlap(kmp_info_t *th) {
283 char *stack_beg = NULL;
284 char *stack_end = NULL;
287 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
288 if (__kmp_storage_map) {
289 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
290 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 gtid = __kmp_gtid_from_thread(th);
294 if (gtid == KMP_GTID_MONITOR) {
295 __kmp_print_storage_map_gtid(
296 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
297 "th_%s stack (%s)",
"mon",
298 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
300 __kmp_print_storage_map_gtid(
301 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
302 "th_%d stack (%s)", gtid,
303 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
309 gtid = __kmp_gtid_from_thread(th);
310 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
312 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
313 if (stack_beg == NULL) {
314 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
315 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
318 for (f = 0; f < __kmp_threads_capacity; f++) {
319 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
321 if (f_th && f_th != th) {
322 char *other_stack_end =
323 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
324 char *other_stack_beg =
325 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
326 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
327 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
330 if (__kmp_storage_map)
331 __kmp_print_storage_map_gtid(
332 -1, other_stack_beg, other_stack_end,
333 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
334 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
336 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
342 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
347 void __kmp_infinite_loop(
void) {
348 static int done = FALSE;
355 #define MAX_MESSAGE 512
357 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
358 char const *format, ...) {
359 char buffer[MAX_MESSAGE];
362 va_start(ap, format);
363 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
364 p2, (
unsigned long)size, format);
365 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
366 __kmp_vprintf(kmp_err, buffer, ap);
367 #if KMP_PRINT_DATA_PLACEMENT
370 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
371 if (__kmp_storage_map_verbose) {
372 node = __kmp_get_host_node(p1);
374 __kmp_storage_map_verbose = FALSE;
378 int localProc = __kmp_get_cpu_from_gtid(gtid);
380 const int page_size = KMP_GET_PAGE_SIZE();
382 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
383 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
385 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
388 __kmp_printf_no_lock(
" GTID %d\n", gtid);
397 (
char *)p1 += page_size;
398 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
399 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
403 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
404 (
char *)p1 + (page_size - 1),
405 __kmp_get_host_node(p1));
407 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
408 (
char *)p2 + (page_size - 1),
409 __kmp_get_host_node(p2));
415 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
418 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
423 void __kmp_warn(
char const *format, ...) {
424 char buffer[MAX_MESSAGE];
427 if (__kmp_generate_warnings == kmp_warnings_off) {
431 va_start(ap, format);
433 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
434 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
435 __kmp_vprintf(kmp_err, buffer, ap);
436 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
441 void __kmp_abort_process() {
443 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
445 if (__kmp_debug_buf) {
446 __kmp_dump_debug_buffer();
449 if (KMP_OS_WINDOWS) {
452 __kmp_global.g.g_abort = SIGABRT;
466 __kmp_unregister_library();
470 __kmp_infinite_loop();
471 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
475 void __kmp_abort_thread(
void) {
478 __kmp_infinite_loop();
484 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
485 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
488 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
489 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
491 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
492 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
494 __kmp_print_storage_map_gtid(
495 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
496 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
498 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
499 &thr->th.th_bar[bs_plain_barrier + 1],
500 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
503 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
504 &thr->th.th_bar[bs_forkjoin_barrier + 1],
505 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
508 #if KMP_FAST_REDUCTION_BARRIER
509 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
510 &thr->th.th_bar[bs_reduction_barrier + 1],
511 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
519 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
520 int team_id,
int num_thr) {
521 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
522 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
525 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
526 &team->t.t_bar[bs_last_barrier],
527 sizeof(kmp_balign_team_t) * bs_last_barrier,
528 "%s_%d.t_bar", header, team_id);
530 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
531 &team->t.t_bar[bs_plain_barrier + 1],
532 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
535 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
536 &team->t.t_bar[bs_forkjoin_barrier + 1],
537 sizeof(kmp_balign_team_t),
538 "%s_%d.t_bar[forkjoin]", header, team_id);
540 #if KMP_FAST_REDUCTION_BARRIER
541 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
542 &team->t.t_bar[bs_reduction_barrier + 1],
543 sizeof(kmp_balign_team_t),
544 "%s_%d.t_bar[reduction]", header, team_id);
547 __kmp_print_storage_map_gtid(
548 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
549 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
551 __kmp_print_storage_map_gtid(
552 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
553 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
555 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
556 &team->t.t_disp_buffer[num_disp_buff],
557 sizeof(dispatch_shared_info_t) * num_disp_buff,
558 "%s_%d.t_disp_buffer", header, team_id);
561 static void __kmp_init_allocator() {
562 __kmp_init_memkind();
563 __kmp_init_target_mem();
565 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
569 #if ENABLE_LIBOMPTARGET
570 static void __kmp_init_omptarget() {
571 __kmp_init_target_task();
580 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
585 case DLL_PROCESS_ATTACH:
586 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
590 case DLL_PROCESS_DETACH:
591 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
604 if (lpReserved == NULL)
605 __kmp_internal_end_library(__kmp_gtid_get_specific());
609 case DLL_THREAD_ATTACH:
610 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
616 case DLL_THREAD_DETACH:
617 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
619 __kmp_internal_end_thread(__kmp_gtid_get_specific());
630 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
631 int gtid = *gtid_ref;
632 #ifdef BUILD_PARALLEL_ORDERED
633 kmp_team_t *team = __kmp_team_from_gtid(gtid);
636 if (__kmp_env_consistency_check) {
637 if (__kmp_threads[gtid]->th.th_root->r.r_active)
638 #if KMP_USE_DYNAMIC_LOCK
639 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
644 #ifdef BUILD_PARALLEL_ORDERED
645 if (!team->t.t_serialized) {
647 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
655 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
656 int gtid = *gtid_ref;
657 #ifdef BUILD_PARALLEL_ORDERED
658 int tid = __kmp_tid_from_gtid(gtid);
659 kmp_team_t *team = __kmp_team_from_gtid(gtid);
662 if (__kmp_env_consistency_check) {
663 if (__kmp_threads[gtid]->th.th_root->r.r_active)
664 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
666 #ifdef BUILD_PARALLEL_ORDERED
667 if (!team->t.t_serialized) {
672 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
682 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
687 if (!TCR_4(__kmp_init_parallel))
688 __kmp_parallel_initialize();
689 __kmp_resume_if_soft_paused();
691 th = __kmp_threads[gtid];
692 team = th->th.th_team;
695 th->th.th_ident = id_ref;
697 if (team->t.t_serialized) {
700 kmp_int32 old_this = th->th.th_local.this_construct;
702 ++th->th.th_local.this_construct;
706 if (team->t.t_construct == old_this) {
707 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
708 th->th.th_local.this_construct);
711 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
712 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
713 team->t.t_active_level == 1) {
715 __kmp_itt_metadata_single(id_ref);
720 if (__kmp_env_consistency_check) {
721 if (status && push_ws) {
722 __kmp_push_workshare(gtid, ct_psingle, id_ref);
724 __kmp_check_workshare(gtid, ct_psingle, id_ref);
729 __kmp_itt_single_start(gtid);
735 void __kmp_exit_single(
int gtid) {
737 __kmp_itt_single_end(gtid);
739 if (__kmp_env_consistency_check)
740 __kmp_pop_workshare(gtid, ct_psingle, NULL);
749 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
750 int master_tid,
int set_nthreads,
754 KMP_DEBUG_ASSERT(__kmp_init_serial);
755 KMP_DEBUG_ASSERT(root && parent_team);
756 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
760 new_nthreads = set_nthreads;
761 if (!get__dynamic_2(parent_team, master_tid)) {
764 #ifdef USE_LOAD_BALANCE
765 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
766 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
767 if (new_nthreads == 1) {
768 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
769 "reservation to 1 thread\n",
773 if (new_nthreads < set_nthreads) {
774 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
775 "reservation to %d threads\n",
776 master_tid, new_nthreads));
780 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
781 new_nthreads = __kmp_avail_proc - __kmp_nth +
782 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
783 if (new_nthreads <= 1) {
784 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
785 "reservation to 1 thread\n",
789 if (new_nthreads < set_nthreads) {
790 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
791 "reservation to %d threads\n",
792 master_tid, new_nthreads));
794 new_nthreads = set_nthreads;
796 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
797 if (set_nthreads > 2) {
798 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
799 new_nthreads = (new_nthreads % set_nthreads) + 1;
800 if (new_nthreads == 1) {
801 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
802 "reservation to 1 thread\n",
806 if (new_nthreads < set_nthreads) {
807 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
808 "reservation to %d threads\n",
809 master_tid, new_nthreads));
817 if (__kmp_nth + new_nthreads -
818 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
820 int tl_nthreads = __kmp_max_nth - __kmp_nth +
821 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
822 if (tl_nthreads <= 0) {
827 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
828 __kmp_reserve_warn = 1;
829 __kmp_msg(kmp_ms_warning,
830 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
831 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
833 if (tl_nthreads == 1) {
834 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
835 "reduced reservation to 1 thread\n",
839 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
840 "reservation to %d threads\n",
841 master_tid, tl_nthreads));
842 new_nthreads = tl_nthreads;
846 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
847 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
848 if (cg_nthreads + new_nthreads -
849 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
851 int tl_nthreads = max_cg_threads - cg_nthreads +
852 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
853 if (tl_nthreads <= 0) {
858 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
859 __kmp_reserve_warn = 1;
860 __kmp_msg(kmp_ms_warning,
861 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
862 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
864 if (tl_nthreads == 1) {
865 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
866 "reduced reservation to 1 thread\n",
870 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
871 "reservation to %d threads\n",
872 master_tid, tl_nthreads));
873 new_nthreads = tl_nthreads;
879 capacity = __kmp_threads_capacity;
880 if (TCR_PTR(__kmp_threads[0]) == NULL) {
886 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
887 capacity -= __kmp_hidden_helper_threads_num;
889 if (__kmp_nth + new_nthreads -
890 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
893 int slotsRequired = __kmp_nth + new_nthreads -
894 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
896 int slotsAdded = __kmp_expand_threads(slotsRequired);
897 if (slotsAdded < slotsRequired) {
899 new_nthreads -= (slotsRequired - slotsAdded);
900 KMP_ASSERT(new_nthreads >= 1);
903 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
904 __kmp_reserve_warn = 1;
905 if (__kmp_tp_cached) {
906 __kmp_msg(kmp_ms_warning,
907 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
908 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
909 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
911 __kmp_msg(kmp_ms_warning,
912 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
913 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
920 if (new_nthreads == 1) {
922 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
923 "dead roots and rechecking; requested %d threads\n",
924 __kmp_get_gtid(), set_nthreads));
926 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
928 __kmp_get_gtid(), new_nthreads, set_nthreads));
937 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
938 kmp_info_t *master_th,
int master_gtid,
939 int fork_teams_workers) {
943 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
944 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
948 master_th->th.th_info.ds.ds_tid = 0;
949 master_th->th.th_team = team;
950 master_th->th.th_team_nproc = team->t.t_nproc;
951 master_th->th.th_team_master = master_th;
952 master_th->th.th_team_serialized = FALSE;
953 master_th->th.th_dispatch = &team->t.t_dispatch[0];
956 #if KMP_NESTED_HOT_TEAMS
958 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
961 int level = team->t.t_active_level - 1;
962 if (master_th->th.th_teams_microtask) {
963 if (master_th->th.th_teams_size.nteams > 1) {
967 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
968 master_th->th.th_teams_level == team->t.t_level) {
973 if (level < __kmp_hot_teams_max_level) {
974 if (hot_teams[level].hot_team) {
976 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
980 hot_teams[level].hot_team = team;
981 hot_teams[level].hot_team_nth = team->t.t_nproc;
988 use_hot_team = team == root->r.r_hot_team;
993 team->t.t_threads[0] = master_th;
994 __kmp_initialize_info(master_th, team, 0, master_gtid);
997 for (i = 1; i < team->t.t_nproc; i++) {
1000 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1001 team->t.t_threads[i] = thr;
1002 KMP_DEBUG_ASSERT(thr);
1003 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1005 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1006 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1007 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1008 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1009 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1010 team->t.t_bar[bs_plain_barrier].b_arrived));
1011 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1012 thr->th.th_teams_level = master_th->th.th_teams_level;
1013 thr->th.th_teams_size = master_th->th.th_teams_size;
1016 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1017 for (b = 0; b < bs_last_barrier; ++b) {
1018 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1019 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1021 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1027 #if KMP_AFFINITY_SUPPORTED
1031 if (!fork_teams_workers) {
1032 __kmp_partition_places(team);
1036 if (team->t.t_nproc > 1 &&
1037 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1038 team->t.b->update_num_threads(team->t.t_nproc);
1039 __kmp_add_threads_to_team(team, team->t.t_nproc);
1043 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1044 for (i = 0; i < team->t.t_nproc; i++) {
1045 kmp_info_t *thr = team->t.t_threads[i];
1046 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1047 thr->th.th_prev_level != team->t.t_level) {
1048 team->t.t_display_affinity = 1;
1057 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1061 inline static void propagateFPControl(kmp_team_t *team) {
1062 if (__kmp_inherit_fp_control) {
1063 kmp_int16 x87_fpu_control_word;
1067 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1068 __kmp_store_mxcsr(&mxcsr);
1069 mxcsr &= KMP_X86_MXCSR_MASK;
1080 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1081 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1084 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1088 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1094 inline static void updateHWFPControl(kmp_team_t *team) {
1095 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1098 kmp_int16 x87_fpu_control_word;
1100 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1101 __kmp_store_mxcsr(&mxcsr);
1102 mxcsr &= KMP_X86_MXCSR_MASK;
1104 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1105 __kmp_clear_x87_fpu_status_word();
1106 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1109 if (team->t.t_mxcsr != mxcsr) {
1110 __kmp_load_mxcsr(&team->t.t_mxcsr);
1115 #define propagateFPControl(x) ((void)0)
1116 #define updateHWFPControl(x) ((void)0)
1119 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1124 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1125 kmp_info_t *this_thr;
1126 kmp_team_t *serial_team;
1128 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1135 if (!TCR_4(__kmp_init_parallel))
1136 __kmp_parallel_initialize();
1137 __kmp_resume_if_soft_paused();
1139 this_thr = __kmp_threads[global_tid];
1140 serial_team = this_thr->th.th_serial_team;
1143 KMP_DEBUG_ASSERT(serial_team);
1146 if (__kmp_tasking_mode != tskm_immediate_exec) {
1148 this_thr->th.th_task_team ==
1149 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1150 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1152 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1153 "team %p, new task_team = NULL\n",
1154 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1155 this_thr->th.th_task_team = NULL;
1158 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1159 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1160 proc_bind = proc_bind_false;
1161 }
else if (proc_bind == proc_bind_default) {
1164 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1167 this_thr->th.th_set_proc_bind = proc_bind_default;
1170 this_thr->th.th_set_nproc = 0;
1173 ompt_data_t ompt_parallel_data = ompt_data_none;
1174 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1175 if (ompt_enabled.enabled &&
1176 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1178 ompt_task_info_t *parent_task_info;
1179 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1181 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1182 if (ompt_enabled.ompt_callback_parallel_begin) {
1185 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1186 &(parent_task_info->task_data), &(parent_task_info->frame),
1187 &ompt_parallel_data, team_size,
1188 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1193 if (this_thr->th.th_team != serial_team) {
1195 int level = this_thr->th.th_team->t.t_level;
1197 if (serial_team->t.t_serialized) {
1200 kmp_team_t *new_team;
1202 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1205 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1209 proc_bind, &this_thr->th.th_current_task->td_icvs,
1210 0 USE_NESTED_HOT_ARG(NULL));
1211 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1212 KMP_ASSERT(new_team);
1215 new_team->t.t_threads[0] = this_thr;
1216 new_team->t.t_parent = this_thr->th.th_team;
1217 serial_team = new_team;
1218 this_thr->th.th_serial_team = serial_team;
1222 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1223 global_tid, serial_team));
1231 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1232 global_tid, serial_team));
1236 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1237 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1238 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1239 serial_team->t.t_ident = loc;
1240 serial_team->t.t_serialized = 1;
1241 serial_team->t.t_nproc = 1;
1242 serial_team->t.t_parent = this_thr->th.th_team;
1243 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1244 this_thr->th.th_team = serial_team;
1245 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1247 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1248 this_thr->th.th_current_task));
1249 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1250 this_thr->th.th_current_task->td_flags.executing = 0;
1252 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1257 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1258 &this_thr->th.th_current_task->td_parent->td_icvs);
1262 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1263 this_thr->th.th_current_task->td_icvs.nproc =
1264 __kmp_nested_nth.nth[level + 1];
1267 if (__kmp_nested_proc_bind.used &&
1268 (level + 1 < __kmp_nested_proc_bind.used)) {
1269 this_thr->th.th_current_task->td_icvs.proc_bind =
1270 __kmp_nested_proc_bind.bind_types[level + 1];
1274 serial_team->t.t_pkfn = (microtask_t)(~0);
1276 this_thr->th.th_info.ds.ds_tid = 0;
1279 this_thr->th.th_team_nproc = 1;
1280 this_thr->th.th_team_master = this_thr;
1281 this_thr->th.th_team_serialized = 1;
1283 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1284 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1285 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1287 propagateFPControl(serial_team);
1290 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1291 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1292 serial_team->t.t_dispatch->th_disp_buffer =
1293 (dispatch_private_info_t *)__kmp_allocate(
1294 sizeof(dispatch_private_info_t));
1296 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1303 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1305 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1306 ++serial_team->t.t_serialized;
1307 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1310 int level = this_thr->th.th_team->t.t_level;
1313 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1314 this_thr->th.th_current_task->td_icvs.nproc =
1315 __kmp_nested_nth.nth[level + 1];
1317 serial_team->t.t_level++;
1318 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1319 "of serial team %p to %d\n",
1320 global_tid, serial_team, serial_team->t.t_level));
1323 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1325 dispatch_private_info_t *disp_buffer =
1326 (dispatch_private_info_t *)__kmp_allocate(
1327 sizeof(dispatch_private_info_t));
1328 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1329 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1331 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1339 if (__kmp_display_affinity) {
1340 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1341 this_thr->th.th_prev_num_threads != 1) {
1343 __kmp_aux_display_affinity(global_tid, NULL);
1344 this_thr->th.th_prev_level = serial_team->t.t_level;
1345 this_thr->th.th_prev_num_threads = 1;
1349 if (__kmp_env_consistency_check)
1350 __kmp_push_parallel(global_tid, NULL);
1352 serial_team->t.ompt_team_info.master_return_address = codeptr;
1353 if (ompt_enabled.enabled &&
1354 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1355 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1356 OMPT_GET_FRAME_ADDRESS(0);
1358 ompt_lw_taskteam_t lw_taskteam;
1359 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1360 &ompt_parallel_data, codeptr);
1362 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1366 if (ompt_enabled.ompt_callback_implicit_task) {
1367 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1368 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1369 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1370 ompt_task_implicit);
1371 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1372 __kmp_tid_from_gtid(global_tid);
1376 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1377 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1378 OMPT_GET_FRAME_ADDRESS(0);
1384 static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1385 microtask_t microtask,
int level,
1386 int teams_level, kmp_va_list ap) {
1387 return (master_th->th.th_teams_microtask && ap &&
1388 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1393 static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1394 int teams_level, kmp_va_list ap) {
1395 return ((ap == NULL && active_level == 0) ||
1396 (ap && teams_level > 0 && teams_level == level));
1403 __kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1404 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1405 enum fork_context_e call_context, microtask_t microtask,
1406 launch_t invoker,
int master_set_numthreads,
int level,
1408 ompt_data_t ompt_parallel_data,
void *return_address,
1414 parent_team->t.t_ident = loc;
1415 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1416 parent_team->t.t_argc = argc;
1417 argv = (
void **)parent_team->t.t_argv;
1418 for (i = argc - 1; i >= 0; --i) {
1419 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1422 if (parent_team == master_th->th.th_serial_team) {
1425 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1427 if (call_context == fork_context_gnu) {
1430 parent_team->t.t_serialized--;
1435 parent_team->t.t_pkfn = microtask;
1440 void **exit_frame_p;
1441 ompt_data_t *implicit_task_data;
1442 ompt_lw_taskteam_t lw_taskteam;
1444 if (ompt_enabled.enabled) {
1445 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1446 &ompt_parallel_data, return_address);
1447 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1449 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1453 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1454 if (ompt_enabled.ompt_callback_implicit_task) {
1455 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1456 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1457 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1458 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1462 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1464 exit_frame_p = &dummy;
1470 parent_team->t.t_serialized--;
1473 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1474 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1475 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1484 if (ompt_enabled.enabled) {
1485 *exit_frame_p = NULL;
1486 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1487 if (ompt_enabled.ompt_callback_implicit_task) {
1488 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1489 ompt_scope_end, NULL, implicit_task_data, 1,
1490 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1492 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1493 __ompt_lw_taskteam_unlink(master_th);
1494 if (ompt_enabled.ompt_callback_parallel_end) {
1495 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1496 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1497 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1499 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1505 parent_team->t.t_pkfn = microtask;
1506 parent_team->t.t_invoke = invoker;
1507 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1508 parent_team->t.t_active_level++;
1509 parent_team->t.t_level++;
1510 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1517 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1520 if (ompt_enabled.enabled) {
1521 ompt_lw_taskteam_t lw_taskteam;
1522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1524 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1529 if (master_set_numthreads) {
1530 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1532 kmp_info_t **other_threads = parent_team->t.t_threads;
1535 int old_proc = master_th->th.th_teams_size.nth;
1536 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1537 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1538 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1540 parent_team->t.t_nproc = master_set_numthreads;
1541 for (i = 0; i < master_set_numthreads; ++i) {
1542 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1546 master_th->th.th_set_nproc = 0;
1550 if (__kmp_debugging) {
1551 int nth = __kmp_omp_num_threads(loc);
1553 master_set_numthreads = nth;
1559 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1561 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1562 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1563 proc_bind = proc_bind_false;
1566 if (proc_bind == proc_bind_default) {
1567 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1573 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1574 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1575 master_th->th.th_current_task->td_icvs.proc_bind)) {
1576 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1579 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1581 if (proc_bind_icv != proc_bind_default &&
1582 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1583 kmp_info_t **other_threads = parent_team->t.t_threads;
1584 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1585 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1589 master_th->th.th_set_proc_bind = proc_bind_default;
1591 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1592 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1594 __kmp_forkjoin_frames_mode == 3 &&
1595 parent_team->t.t_active_level == 1
1596 && master_th->th.th_teams_size.nteams == 1) {
1597 kmp_uint64 tmp_time = __itt_get_timestamp();
1598 master_th->th.th_frame_time = tmp_time;
1599 parent_team->t.t_region_time = tmp_time;
1601 if (__itt_stack_caller_create_ptr) {
1602 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1604 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1607 #if KMP_AFFINITY_SUPPORTED
1608 __kmp_partition_places(parent_team);
1611 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1612 "master_th=%p, gtid=%d\n",
1613 root, parent_team, master_th, gtid));
1614 __kmp_internal_fork(loc, gtid, parent_team);
1615 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1616 "master_th=%p, gtid=%d\n",
1617 root, parent_team, master_th, gtid));
1619 if (call_context == fork_context_gnu)
1623 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1624 parent_team->t.t_id, parent_team->t.t_pkfn));
1626 if (!parent_team->t.t_invoke(gtid)) {
1627 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1629 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1630 parent_team->t.t_id, parent_team->t.t_pkfn));
1633 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1640 __kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1641 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1642 kmp_info_t *master_th, kmp_team_t *parent_team,
1644 ompt_data_t *ompt_parallel_data,
void **return_address,
1645 ompt_data_t **parent_task_data,
1653 #if KMP_OS_LINUX && \
1654 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1657 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1662 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1667 master_th->th.th_serial_team->t.t_pkfn = microtask;
1670 if (call_context == fork_context_intel) {
1672 master_th->th.th_serial_team->t.t_ident = loc;
1675 master_th->th.th_serial_team->t.t_level--;
1680 void **exit_frame_p;
1681 ompt_task_info_t *task_info;
1682 ompt_lw_taskteam_t lw_taskteam;
1684 if (ompt_enabled.enabled) {
1685 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1686 ompt_parallel_data, *return_address);
1688 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1690 task_info = OMPT_CUR_TASK_INFO(master_th);
1691 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1692 if (ompt_enabled.ompt_callback_implicit_task) {
1693 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1694 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1695 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1696 &(task_info->task_data), 1,
1697 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1701 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1703 exit_frame_p = &dummy;
1708 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1709 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1710 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1719 if (ompt_enabled.enabled) {
1720 *exit_frame_p = NULL;
1721 if (ompt_enabled.ompt_callback_implicit_task) {
1722 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1723 ompt_scope_end, NULL, &(task_info->task_data), 1,
1724 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1726 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1727 __ompt_lw_taskteam_unlink(master_th);
1728 if (ompt_enabled.ompt_callback_parallel_end) {
1729 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1730 ompt_parallel_data, *parent_task_data,
1731 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1733 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1736 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1737 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1738 team = master_th->th.th_team;
1740 team->t.t_invoke = invoker;
1741 __kmp_alloc_argv_entries(argc, team, TRUE);
1742 team->t.t_argc = argc;
1743 argv = (
void **)team->t.t_argv;
1745 for (i = argc - 1; i >= 0; --i)
1746 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1748 for (i = 0; i < argc; ++i)
1750 argv[i] = parent_team->t.t_argv[i];
1758 if (ompt_enabled.enabled) {
1759 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1760 if (ompt_enabled.ompt_callback_implicit_task) {
1761 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1762 ompt_scope_end, NULL, &(task_info->task_data), 0,
1763 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1765 if (ompt_enabled.ompt_callback_parallel_end) {
1766 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1767 ompt_parallel_data, *parent_task_data,
1768 OMPT_INVOKER(call_context) | ompt_parallel_league,
1771 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1776 for (i = argc - 1; i >= 0; --i)
1777 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1782 void **exit_frame_p;
1783 ompt_task_info_t *task_info;
1784 ompt_lw_taskteam_t lw_taskteam;
1785 ompt_data_t *implicit_task_data;
1787 if (ompt_enabled.enabled) {
1788 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1789 ompt_parallel_data, *return_address);
1790 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1792 task_info = OMPT_CUR_TASK_INFO(master_th);
1793 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1796 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1797 if (ompt_enabled.ompt_callback_implicit_task) {
1798 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1799 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1800 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1801 ompt_task_implicit);
1802 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1806 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1808 exit_frame_p = &dummy;
1813 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1814 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1815 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1824 if (ompt_enabled.enabled) {
1825 *exit_frame_p = NULL;
1826 if (ompt_enabled.ompt_callback_implicit_task) {
1827 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1828 ompt_scope_end, NULL, &(task_info->task_data), 1,
1829 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1832 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1833 __ompt_lw_taskteam_unlink(master_th);
1834 if (ompt_enabled.ompt_callback_parallel_end) {
1835 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1836 ompt_parallel_data, *parent_task_data,
1837 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1839 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1843 }
else if (call_context == fork_context_gnu) {
1845 if (ompt_enabled.enabled) {
1846 ompt_lw_taskteam_t lwt;
1847 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1850 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1851 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1857 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1860 KMP_ASSERT2(call_context < fork_context_last,
1861 "__kmp_serial_fork_call: unknown fork_context parameter");
1864 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1871 int __kmp_fork_call(
ident_t *loc,
int gtid,
1872 enum fork_context_e call_context,
1873 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1878 int master_this_cons;
1880 kmp_team_t *parent_team;
1881 kmp_info_t *master_th;
1885 int master_set_numthreads;
1886 int task_thread_limit = 0;
1890 #if KMP_NESTED_HOT_TEAMS
1891 kmp_hot_team_ptr_t **p_hot_teams;
1894 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1897 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1898 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1901 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1903 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1904 __kmp_stkpadding += (short)((kmp_int64)dummy);
1910 if (!TCR_4(__kmp_init_parallel))
1911 __kmp_parallel_initialize();
1912 __kmp_resume_if_soft_paused();
1917 master_th = __kmp_threads[gtid];
1919 parent_team = master_th->th.th_team;
1920 master_tid = master_th->th.th_info.ds.ds_tid;
1921 master_this_cons = master_th->th.th_local.this_construct;
1922 root = master_th->th.th_root;
1923 master_active = root->r.r_active;
1924 master_set_numthreads = master_th->th.th_set_nproc;
1926 master_th->th.th_current_task->td_icvs.task_thread_limit;
1929 ompt_data_t ompt_parallel_data = ompt_data_none;
1930 ompt_data_t *parent_task_data;
1931 ompt_frame_t *ompt_frame;
1932 void *return_address = NULL;
1934 if (ompt_enabled.enabled) {
1935 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1937 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1942 __kmp_assign_root_init_mask();
1945 level = parent_team->t.t_level;
1947 active_level = parent_team->t.t_active_level;
1949 teams_level = master_th->th.th_teams_level;
1950 #if KMP_NESTED_HOT_TEAMS
1951 p_hot_teams = &master_th->th.th_hot_teams;
1952 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1953 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1954 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1955 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1957 (*p_hot_teams)[0].hot_team_nth = 1;
1962 if (ompt_enabled.enabled) {
1963 if (ompt_enabled.ompt_callback_parallel_begin) {
1964 int team_size = master_set_numthreads
1965 ? master_set_numthreads
1966 : get__nproc_2(parent_team, master_tid);
1967 int flags = OMPT_INVOKER(call_context) |
1968 ((microtask == (microtask_t)__kmp_teams_master)
1969 ? ompt_parallel_league
1970 : ompt_parallel_team);
1971 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1972 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1975 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1979 master_th->th.th_ident = loc;
1982 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1983 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1984 call_context, microtask, invoker,
1985 master_set_numthreads, level,
1987 ompt_parallel_data, return_address,
1993 if (__kmp_tasking_mode != tskm_immediate_exec) {
1994 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1995 parent_team->t.t_task_team[master_th->th.th_task_state]);
2005 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2006 if ((!enter_teams &&
2007 (parent_team->t.t_active_level >=
2008 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2009 (__kmp_library == library_serial)) {
2010 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2013 nthreads = master_set_numthreads
2014 ? master_set_numthreads
2016 : get__nproc_2(parent_team, master_tid);
2019 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2026 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2031 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2032 nthreads, enter_teams);
2033 if (nthreads == 1) {
2037 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2041 KMP_DEBUG_ASSERT(nthreads > 0);
2044 master_th->th.th_set_nproc = 0;
2046 if (nthreads == 1) {
2047 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2048 invoker, master_th, parent_team,
2050 &ompt_parallel_data, &return_address,
2058 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2059 "curtask=%p, curtask_max_aclevel=%d\n",
2060 parent_team->t.t_active_level, master_th,
2061 master_th->th.th_current_task,
2062 master_th->th.th_current_task->td_icvs.max_active_levels));
2066 master_th->th.th_current_task->td_flags.executing = 0;
2068 if (!master_th->th.th_teams_microtask || level > teams_level) {
2070 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2074 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2075 if ((level + 1 < __kmp_nested_nth.used) &&
2076 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2077 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2083 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2085 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2086 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2087 proc_bind = proc_bind_false;
2091 if (proc_bind == proc_bind_default) {
2092 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2095 if (master_th->th.th_teams_microtask &&
2096 microtask == (microtask_t)__kmp_teams_master) {
2097 proc_bind = __kmp_teams_proc_bind;
2103 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2104 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2105 master_th->th.th_current_task->td_icvs.proc_bind)) {
2108 if (!master_th->th.th_teams_microtask ||
2109 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2110 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2115 master_th->th.th_set_proc_bind = proc_bind_default;
2117 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2118 kmp_internal_control_t new_icvs;
2119 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2120 new_icvs.next = NULL;
2121 if (nthreads_icv > 0) {
2122 new_icvs.nproc = nthreads_icv;
2124 if (proc_bind_icv != proc_bind_default) {
2125 new_icvs.proc_bind = proc_bind_icv;
2129 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2130 team = __kmp_allocate_team(root, nthreads, nthreads,
2134 proc_bind, &new_icvs,
2135 argc USE_NESTED_HOT_ARG(master_th));
2136 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2137 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2140 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2141 team = __kmp_allocate_team(root, nthreads, nthreads,
2146 &master_th->th.th_current_task->td_icvs,
2147 argc USE_NESTED_HOT_ARG(master_th));
2148 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2149 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2150 &master_th->th.th_current_task->td_icvs);
2153 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2156 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2157 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2158 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2159 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2160 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2162 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2165 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2167 if (!master_th->th.th_teams_microtask || level > teams_level) {
2168 int new_level = parent_team->t.t_level + 1;
2169 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2170 new_level = parent_team->t.t_active_level + 1;
2171 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2174 int new_level = parent_team->t.t_level;
2175 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2176 new_level = parent_team->t.t_active_level;
2177 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2179 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2181 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2183 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2184 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2187 propagateFPControl(team);
2189 if (ompd_state & OMPD_ENABLE_BP)
2190 ompd_bp_parallel_begin();
2193 if (__kmp_tasking_mode != tskm_immediate_exec) {
2196 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2197 parent_team->t.t_task_team[master_th->th.th_task_state]);
2198 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2199 "%p, new task_team %p / team %p\n",
2200 __kmp_gtid_from_thread(master_th),
2201 master_th->th.th_task_team, parent_team,
2202 team->t.t_task_team[master_th->th.th_task_state], team));
2204 if (active_level || master_th->th.th_task_team) {
2206 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2207 if (master_th->th.th_task_state_top >=
2208 master_th->th.th_task_state_stack_sz) {
2209 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2210 kmp_uint8 *old_stack, *new_stack;
2212 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2213 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2214 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2216 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2220 old_stack = master_th->th.th_task_state_memo_stack;
2221 master_th->th.th_task_state_memo_stack = new_stack;
2222 master_th->th.th_task_state_stack_sz = new_size;
2223 __kmp_free(old_stack);
2227 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2228 master_th->th.th_task_state;
2229 master_th->th.th_task_state_top++;
2230 #if KMP_NESTED_HOT_TEAMS
2231 if (master_th->th.th_hot_teams &&
2232 active_level < __kmp_hot_teams_max_level &&
2233 team == master_th->th.th_hot_teams[active_level].hot_team) {
2235 master_th->th.th_task_state =
2237 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2240 master_th->th.th_task_state = 0;
2241 #if KMP_NESTED_HOT_TEAMS
2245 #if !KMP_NESTED_HOT_TEAMS
2246 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2247 (team == root->r.r_hot_team));
2253 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2254 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2256 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2257 (team->t.t_master_tid == 0 &&
2258 (team->t.t_parent == root->r.r_root_team ||
2259 team->t.t_parent->t.t_serialized)));
2263 argv = (
void **)team->t.t_argv;
2265 for (i = argc - 1; i >= 0; --i) {
2266 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2267 KMP_CHECK_UPDATE(*argv, new_argv);
2271 for (i = 0; i < argc; ++i) {
2273 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2278 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2279 if (!root->r.r_active)
2280 root->r.r_active = TRUE;
2282 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2283 __kmp_setup_icv_copy(team, nthreads,
2284 &master_th->th.th_current_task->td_icvs, loc);
2287 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2290 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2293 if (team->t.t_active_level == 1
2294 && !master_th->th.th_teams_microtask) {
2296 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2297 (__kmp_forkjoin_frames_mode == 3 ||
2298 __kmp_forkjoin_frames_mode == 1)) {
2299 kmp_uint64 tmp_time = 0;
2300 if (__itt_get_timestamp_ptr)
2301 tmp_time = __itt_get_timestamp();
2303 master_th->th.th_frame_time = tmp_time;
2304 if (__kmp_forkjoin_frames_mode == 3)
2305 team->t.t_region_time = tmp_time;
2309 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2310 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2312 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2318 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2321 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2322 root, team, master_th, gtid));
2325 if (__itt_stack_caller_create_ptr) {
2328 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2329 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2330 }
else if (parent_team->t.t_serialized) {
2335 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2336 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2344 __kmp_internal_fork(loc, gtid, team);
2345 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2346 "master_th=%p, gtid=%d\n",
2347 root, team, master_th, gtid));
2350 if (call_context == fork_context_gnu) {
2351 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2356 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2357 team->t.t_id, team->t.t_pkfn));
2360 #if KMP_STATS_ENABLED
2364 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2368 if (!team->t.t_invoke(gtid)) {
2369 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2372 #if KMP_STATS_ENABLED
2375 KMP_SET_THREAD_STATE(previous_state);
2379 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2380 team->t.t_id, team->t.t_pkfn));
2383 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2385 if (ompt_enabled.enabled) {
2386 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2394 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2397 thread->th.ompt_thread_info.state =
2398 ((team->t.t_serialized) ? ompt_state_work_serial
2399 : ompt_state_work_parallel);
2402 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2403 kmp_team_t *team, ompt_data_t *parallel_data,
2404 int flags,
void *codeptr) {
2405 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2406 if (ompt_enabled.ompt_callback_parallel_end) {
2407 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2408 parallel_data, &(task_info->task_data), flags, codeptr);
2411 task_info->frame.enter_frame = ompt_data_none;
2412 __kmp_join_restore_state(thread, team);
2416 void __kmp_join_call(
ident_t *loc,
int gtid
2419 enum fork_context_e fork_context
2423 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2425 kmp_team_t *parent_team;
2426 kmp_info_t *master_th;
2430 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2433 master_th = __kmp_threads[gtid];
2434 root = master_th->th.th_root;
2435 team = master_th->th.th_team;
2436 parent_team = team->t.t_parent;
2438 master_th->th.th_ident = loc;
2441 void *team_microtask = (
void *)team->t.t_pkfn;
2445 if (ompt_enabled.enabled &&
2446 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2447 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2452 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2453 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2454 "th_task_team = %p\n",
2455 __kmp_gtid_from_thread(master_th), team,
2456 team->t.t_task_team[master_th->th.th_task_state],
2457 master_th->th.th_task_team));
2458 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2459 team->t.t_task_team[master_th->th.th_task_state]);
2463 if (team->t.t_serialized) {
2464 if (master_th->th.th_teams_microtask) {
2466 int level = team->t.t_level;
2467 int tlevel = master_th->th.th_teams_level;
2468 if (level == tlevel) {
2472 }
else if (level == tlevel + 1) {
2476 team->t.t_serialized++;
2482 if (ompt_enabled.enabled) {
2483 if (fork_context == fork_context_gnu) {
2484 __ompt_lw_taskteam_unlink(master_th);
2486 __kmp_join_restore_state(master_th, parent_team);
2493 master_active = team->t.t_master_active;
2498 __kmp_internal_join(loc, gtid, team);
2500 if (__itt_stack_caller_create_ptr) {
2501 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2503 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2504 team->t.t_stack_id = NULL;
2508 master_th->th.th_task_state =
2511 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2512 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2516 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2517 parent_team->t.t_stack_id = NULL;
2525 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2526 void *codeptr = team->t.ompt_team_info.master_return_address;
2531 if (team->t.t_active_level == 1 &&
2532 (!master_th->th.th_teams_microtask ||
2533 master_th->th.th_teams_size.nteams == 1)) {
2534 master_th->th.th_ident = loc;
2537 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2538 __kmp_forkjoin_frames_mode == 3)
2539 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2540 master_th->th.th_frame_time, 0, loc,
2541 master_th->th.th_team_nproc, 1);
2542 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2543 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2544 __kmp_itt_region_joined(gtid);
2548 #if KMP_AFFINITY_SUPPORTED
2551 master_th->th.th_first_place = team->t.t_first_place;
2552 master_th->th.th_last_place = team->t.t_last_place;
2556 if (master_th->th.th_teams_microtask && !exit_teams &&
2557 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2558 team->t.t_level == master_th->th.th_teams_level + 1) {
2563 ompt_data_t ompt_parallel_data = ompt_data_none;
2564 if (ompt_enabled.enabled) {
2565 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2566 if (ompt_enabled.ompt_callback_implicit_task) {
2567 int ompt_team_size = team->t.t_nproc;
2568 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2569 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2570 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2572 task_info->frame.exit_frame = ompt_data_none;
2573 task_info->task_data = ompt_data_none;
2574 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2575 __ompt_lw_taskteam_unlink(master_th);
2580 team->t.t_active_level--;
2581 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2587 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2588 int old_num = master_th->th.th_team_nproc;
2589 int new_num = master_th->th.th_teams_size.nth;
2590 kmp_info_t **other_threads = team->t.t_threads;
2591 team->t.t_nproc = new_num;
2592 for (
int i = 0; i < old_num; ++i) {
2593 other_threads[i]->th.th_team_nproc = new_num;
2596 for (
int i = old_num; i < new_num; ++i) {
2598 KMP_DEBUG_ASSERT(other_threads[i]);
2599 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2600 for (
int b = 0; b < bs_last_barrier; ++b) {
2601 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2602 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2604 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2607 if (__kmp_tasking_mode != tskm_immediate_exec) {
2609 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2615 if (ompt_enabled.enabled) {
2616 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2617 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2625 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2626 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2628 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2633 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2635 if (!master_th->th.th_teams_microtask ||
2636 team->t.t_level > master_th->th.th_teams_level) {
2638 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2640 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2643 if (ompt_enabled.enabled) {
2644 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2645 if (ompt_enabled.ompt_callback_implicit_task) {
2646 int flags = (team_microtask == (
void *)__kmp_teams_master)
2648 : ompt_task_implicit;
2649 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2650 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2651 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2652 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2654 task_info->frame.exit_frame = ompt_data_none;
2655 task_info->task_data = ompt_data_none;
2659 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2661 __kmp_pop_current_task_from_thread(master_th);
2663 master_th->th.th_def_allocator = team->t.t_def_allocator;
2666 if (ompd_state & OMPD_ENABLE_BP)
2667 ompd_bp_parallel_end();
2669 updateHWFPControl(team);
2671 if (root->r.r_active != master_active)
2672 root->r.r_active = master_active;
2674 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2682 master_th->th.th_team = parent_team;
2683 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2684 master_th->th.th_team_master = parent_team->t.t_threads[0];
2685 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2688 if (parent_team->t.t_serialized &&
2689 parent_team != master_th->th.th_serial_team &&
2690 parent_team != root->r.r_root_team) {
2691 __kmp_free_team(root,
2692 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2693 master_th->th.th_serial_team = parent_team;
2696 if (__kmp_tasking_mode != tskm_immediate_exec) {
2697 if (master_th->th.th_task_state_top >
2699 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2701 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2702 master_th->th.th_task_state;
2703 --master_th->th.th_task_state_top;
2705 master_th->th.th_task_state =
2707 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2708 }
else if (team != root->r.r_hot_team) {
2713 master_th->th.th_task_state = 0;
2716 master_th->th.th_task_team =
2717 parent_team->t.t_task_team[master_th->th.th_task_state];
2719 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2720 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2727 master_th->th.th_current_task->td_flags.executing = 1;
2729 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2731 #if KMP_AFFINITY_SUPPORTED
2732 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2733 __kmp_reset_root_init_mask(gtid);
2738 OMPT_INVOKER(fork_context) |
2739 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2740 : ompt_parallel_team);
2741 if (ompt_enabled.enabled) {
2742 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2748 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2753 void __kmp_save_internal_controls(kmp_info_t *thread) {
2755 if (thread->th.th_team != thread->th.th_serial_team) {
2758 if (thread->th.th_team->t.t_serialized > 1) {
2761 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2764 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2765 thread->th.th_team->t.t_serialized) {
2770 kmp_internal_control_t *control =
2771 (kmp_internal_control_t *)__kmp_allocate(
2772 sizeof(kmp_internal_control_t));
2774 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2776 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2778 control->next = thread->th.th_team->t.t_control_stack_top;
2779 thread->th.th_team->t.t_control_stack_top = control;
2785 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2789 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2790 KMP_DEBUG_ASSERT(__kmp_init_serial);
2794 else if (new_nth > __kmp_max_nth)
2795 new_nth = __kmp_max_nth;
2798 thread = __kmp_threads[gtid];
2799 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2802 __kmp_save_internal_controls(thread);
2804 set__nproc(thread, new_nth);
2809 root = thread->th.th_root;
2810 if (__kmp_init_parallel && (!root->r.r_active) &&
2811 (root->r.r_hot_team->t.t_nproc > new_nth)
2812 #
if KMP_NESTED_HOT_TEAMS
2813 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2816 kmp_team_t *hot_team = root->r.r_hot_team;
2819 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2821 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2822 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2825 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2826 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2827 if (__kmp_tasking_mode != tskm_immediate_exec) {
2830 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2832 __kmp_free_thread(hot_team->t.t_threads[f]);
2833 hot_team->t.t_threads[f] = NULL;
2835 hot_team->t.t_nproc = new_nth;
2836 #if KMP_NESTED_HOT_TEAMS
2837 if (thread->th.th_hot_teams) {
2838 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2839 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2843 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2844 hot_team->t.b->update_num_threads(new_nth);
2845 __kmp_add_threads_to_team(hot_team, new_nth);
2848 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2851 for (f = 0; f < new_nth; f++) {
2852 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2853 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2856 hot_team->t.t_size_changed = -1;
2861 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2864 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2866 gtid, max_active_levels));
2867 KMP_DEBUG_ASSERT(__kmp_init_serial);
2870 if (max_active_levels < 0) {
2871 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2876 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2877 "max_active_levels for thread %d = (%d)\n",
2878 gtid, max_active_levels));
2881 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2886 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2887 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2888 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2894 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2895 "max_active_levels for thread %d = (%d)\n",
2896 gtid, max_active_levels));
2898 thread = __kmp_threads[gtid];
2900 __kmp_save_internal_controls(thread);
2902 set__max_active_levels(thread, max_active_levels);
2906 int __kmp_get_max_active_levels(
int gtid) {
2909 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2910 KMP_DEBUG_ASSERT(__kmp_init_serial);
2912 thread = __kmp_threads[gtid];
2913 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2914 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2915 "curtask_maxaclevel=%d\n",
2916 gtid, thread->th.th_current_task,
2917 thread->th.th_current_task->td_icvs.max_active_levels));
2918 return thread->th.th_current_task->td_icvs.max_active_levels;
2922 void __kmp_set_num_teams(
int num_teams) {
2924 __kmp_nteams = num_teams;
2926 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2928 void __kmp_set_teams_thread_limit(
int limit) {
2930 __kmp_teams_thread_limit = limit;
2932 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2934 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2935 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2938 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2940 kmp_sched_t orig_kind;
2943 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2944 gtid, (
int)kind, chunk));
2945 KMP_DEBUG_ASSERT(__kmp_init_serial);
2952 kind = __kmp_sched_without_mods(kind);
2954 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2955 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2957 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2958 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2960 kind = kmp_sched_default;
2964 thread = __kmp_threads[gtid];
2966 __kmp_save_internal_controls(thread);
2968 if (kind < kmp_sched_upper_std) {
2969 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2972 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2975 __kmp_sch_map[kind - kmp_sched_lower - 1];
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2981 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2982 kmp_sched_lower - 2];
2984 __kmp_sched_apply_mods_intkind(
2985 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2986 if (kind == kmp_sched_auto || chunk < 1) {
2988 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2990 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2995 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2999 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3000 KMP_DEBUG_ASSERT(__kmp_init_serial);
3002 thread = __kmp_threads[gtid];
3004 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3005 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3007 case kmp_sch_static_greedy:
3008 case kmp_sch_static_balanced:
3009 *kind = kmp_sched_static;
3010 __kmp_sched_apply_mods_stdkind(kind, th_type);
3013 case kmp_sch_static_chunked:
3014 *kind = kmp_sched_static;
3016 case kmp_sch_dynamic_chunked:
3017 *kind = kmp_sched_dynamic;
3020 case kmp_sch_guided_iterative_chunked:
3021 case kmp_sch_guided_analytical_chunked:
3022 *kind = kmp_sched_guided;
3025 *kind = kmp_sched_auto;
3027 case kmp_sch_trapezoidal:
3028 *kind = kmp_sched_trapezoidal;
3030 #if KMP_STATIC_STEAL_ENABLED
3031 case kmp_sch_static_steal:
3032 *kind = kmp_sched_static_steal;
3036 KMP_FATAL(UnknownSchedulingType, th_type);
3039 __kmp_sched_apply_mods_stdkind(kind, th_type);
3040 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3043 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3049 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3050 KMP_DEBUG_ASSERT(__kmp_init_serial);
3057 thr = __kmp_threads[gtid];
3058 team = thr->th.th_team;
3059 ii = team->t.t_level;
3063 if (thr->th.th_teams_microtask) {
3065 int tlevel = thr->th.th_teams_level;
3068 KMP_DEBUG_ASSERT(ii >= tlevel);
3080 return __kmp_tid_from_gtid(gtid);
3082 dd = team->t.t_serialized;
3084 while (ii > level) {
3085 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3087 if ((team->t.t_serialized) && (!dd)) {
3088 team = team->t.t_parent;
3092 team = team->t.t_parent;
3093 dd = team->t.t_serialized;
3098 return (dd > 1) ? (0) : (team->t.t_master_tid);
3101 int __kmp_get_team_size(
int gtid,
int level) {
3107 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3108 KMP_DEBUG_ASSERT(__kmp_init_serial);
3115 thr = __kmp_threads[gtid];
3116 team = thr->th.th_team;
3117 ii = team->t.t_level;
3121 if (thr->th.th_teams_microtask) {
3123 int tlevel = thr->th.th_teams_level;
3126 KMP_DEBUG_ASSERT(ii >= tlevel);
3137 while (ii > level) {
3138 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3140 if (team->t.t_serialized && (!dd)) {
3141 team = team->t.t_parent;
3145 team = team->t.t_parent;
3150 return team->t.t_nproc;
3153 kmp_r_sched_t __kmp_get_schedule_global() {
3158 kmp_r_sched_t r_sched;
3164 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3165 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3168 r_sched.r_sched_type = __kmp_static;
3171 r_sched.r_sched_type = __kmp_guided;
3173 r_sched.r_sched_type = __kmp_sched;
3175 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3177 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3179 r_sched.chunk = KMP_DEFAULT_CHUNK;
3181 r_sched.chunk = __kmp_chunk;
3189 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3191 KMP_DEBUG_ASSERT(team);
3192 if (!realloc || argc > team->t.t_max_argc) {
3194 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3195 "current entries=%d\n",
3196 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3198 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3199 __kmp_free((
void *)team->t.t_argv);
3201 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3203 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3204 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3206 team->t.t_id, team->t.t_max_argc));
3207 team->t.t_argv = &team->t.t_inline_argv[0];
3208 if (__kmp_storage_map) {
3209 __kmp_print_storage_map_gtid(
3210 -1, &team->t.t_inline_argv[0],
3211 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3212 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3217 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3218 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3220 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3222 team->t.t_id, team->t.t_max_argc));
3224 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3225 if (__kmp_storage_map) {
3226 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3227 &team->t.t_argv[team->t.t_max_argc],
3228 sizeof(
void *) * team->t.t_max_argc,
3229 "team_%d.t_argv", team->t.t_id);
3235 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3237 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3239 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3240 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3241 sizeof(dispatch_shared_info_t) * num_disp_buff);
3242 team->t.t_dispatch =
3243 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3244 team->t.t_implicit_task_taskdata =
3245 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3246 team->t.t_max_nproc = max_nth;
3249 for (i = 0; i < num_disp_buff; ++i) {
3250 team->t.t_disp_buffer[i].buffer_index = i;
3251 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3255 static void __kmp_free_team_arrays(kmp_team_t *team) {
3258 for (i = 0; i < team->t.t_max_nproc; ++i) {
3259 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3260 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3261 team->t.t_dispatch[i].th_disp_buffer = NULL;
3264 #if KMP_USE_HIER_SCHED
3265 __kmp_dispatch_free_hierarchies(team);
3267 __kmp_free(team->t.t_threads);
3268 __kmp_free(team->t.t_disp_buffer);
3269 __kmp_free(team->t.t_dispatch);
3270 __kmp_free(team->t.t_implicit_task_taskdata);
3271 team->t.t_threads = NULL;
3272 team->t.t_disp_buffer = NULL;
3273 team->t.t_dispatch = NULL;
3274 team->t.t_implicit_task_taskdata = 0;
3277 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3278 kmp_info_t **oldThreads = team->t.t_threads;
3280 __kmp_free(team->t.t_disp_buffer);
3281 __kmp_free(team->t.t_dispatch);
3282 __kmp_free(team->t.t_implicit_task_taskdata);
3283 __kmp_allocate_team_arrays(team, max_nth);
3285 KMP_MEMCPY(team->t.t_threads, oldThreads,
3286 team->t.t_nproc *
sizeof(kmp_info_t *));
3288 __kmp_free(oldThreads);
3291 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3293 kmp_r_sched_t r_sched =
3294 __kmp_get_schedule_global();
3296 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3298 kmp_internal_control_t g_icvs = {
3300 (kmp_int8)__kmp_global.g.g_dynamic,
3302 (kmp_int8)__kmp_env_blocktime,
3304 __kmp_dflt_blocktime,
3309 __kmp_dflt_team_nth,
3315 __kmp_dflt_max_active_levels,
3319 __kmp_nested_proc_bind.bind_types[0],
3320 __kmp_default_device,
3327 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3329 kmp_internal_control_t gx_icvs;
3330 gx_icvs.serial_nesting_level =
3332 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3333 gx_icvs.next = NULL;
3338 static void __kmp_initialize_root(kmp_root_t *root) {
3340 kmp_team_t *root_team;
3341 kmp_team_t *hot_team;
3342 int hot_team_max_nth;
3343 kmp_r_sched_t r_sched =
3344 __kmp_get_schedule_global();
3345 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3346 KMP_DEBUG_ASSERT(root);
3347 KMP_ASSERT(!root->r.r_begin);
3350 __kmp_init_lock(&root->r.r_begin_lock);
3351 root->r.r_begin = FALSE;
3352 root->r.r_active = FALSE;
3353 root->r.r_in_parallel = 0;
3354 root->r.r_blocktime = __kmp_dflt_blocktime;
3355 #if KMP_AFFINITY_SUPPORTED
3356 root->r.r_affinity_assigned = FALSE;
3361 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3364 __kmp_allocate_team(root,
3370 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3372 USE_NESTED_HOT_ARG(NULL)
3377 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3380 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3382 root->r.r_root_team = root_team;
3383 root_team->t.t_control_stack_top = NULL;
3386 root_team->t.t_threads[0] = NULL;
3387 root_team->t.t_nproc = 1;
3388 root_team->t.t_serialized = 1;
3390 root_team->t.t_sched.sched = r_sched.sched;
3393 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3394 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3398 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3401 __kmp_allocate_team(root,
3403 __kmp_dflt_team_nth_ub * 2,
3407 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3409 USE_NESTED_HOT_ARG(NULL)
3411 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3413 root->r.r_hot_team = hot_team;
3414 root_team->t.t_control_stack_top = NULL;
3417 hot_team->t.t_parent = root_team;
3420 hot_team_max_nth = hot_team->t.t_max_nproc;
3421 for (f = 0; f < hot_team_max_nth; ++f) {
3422 hot_team->t.t_threads[f] = NULL;
3424 hot_team->t.t_nproc = 1;
3426 hot_team->t.t_sched.sched = r_sched.sched;
3427 hot_team->t.t_size_changed = 0;
3432 typedef struct kmp_team_list_item {
3433 kmp_team_p
const *entry;
3434 struct kmp_team_list_item *next;
3435 } kmp_team_list_item_t;
3436 typedef kmp_team_list_item_t *kmp_team_list_t;
3438 static void __kmp_print_structure_team_accum(
3439 kmp_team_list_t list,
3440 kmp_team_p
const *team
3450 KMP_DEBUG_ASSERT(list != NULL);
3455 __kmp_print_structure_team_accum(list, team->t.t_parent);
3456 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3460 while (l->next != NULL && l->entry != team) {
3463 if (l->next != NULL) {
3469 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3475 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3476 sizeof(kmp_team_list_item_t));
3483 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3486 __kmp_printf(
"%s", title);
3488 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3490 __kmp_printf(
" - (nil)\n");
3494 static void __kmp_print_structure_thread(
char const *title,
3495 kmp_info_p
const *thread) {
3496 __kmp_printf(
"%s", title);
3497 if (thread != NULL) {
3498 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3500 __kmp_printf(
" - (nil)\n");
3504 void __kmp_print_structure(
void) {
3506 kmp_team_list_t list;
3510 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3514 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3515 "Table\n------------------------------\n");
3518 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3519 __kmp_printf(
"%2d", gtid);
3520 if (__kmp_threads != NULL) {
3521 __kmp_printf(
" %p", __kmp_threads[gtid]);
3523 if (__kmp_root != NULL) {
3524 __kmp_printf(
" %p", __kmp_root[gtid]);
3531 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3533 if (__kmp_threads != NULL) {
3535 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3536 kmp_info_t
const *thread = __kmp_threads[gtid];
3537 if (thread != NULL) {
3538 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3539 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3540 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3541 __kmp_print_structure_team(
" Serial Team: ",
3542 thread->th.th_serial_team);
3543 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3544 __kmp_print_structure_thread(
" Primary: ",
3545 thread->th.th_team_master);
3546 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3547 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3548 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3549 __kmp_print_structure_thread(
" Next in pool: ",
3550 thread->th.th_next_pool);
3552 __kmp_print_structure_team_accum(list, thread->th.th_team);
3553 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3557 __kmp_printf(
"Threads array is not allocated.\n");
3561 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3563 if (__kmp_root != NULL) {
3565 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3566 kmp_root_t
const *root = __kmp_root[gtid];
3568 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3569 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3570 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3571 __kmp_print_structure_thread(
" Uber Thread: ",
3572 root->r.r_uber_thread);
3573 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3574 __kmp_printf(
" In Parallel: %2d\n",
3575 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3577 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3578 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3582 __kmp_printf(
"Ubers array is not allocated.\n");
3585 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3587 while (list->next != NULL) {
3588 kmp_team_p
const *team = list->entry;
3590 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3591 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3592 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3593 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3594 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3595 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3596 for (i = 0; i < team->t.t_nproc; ++i) {
3597 __kmp_printf(
" Thread %2d: ", i);
3598 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3600 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3606 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3608 __kmp_print_structure_thread(
"Thread pool: ",
3609 CCAST(kmp_info_t *, __kmp_thread_pool));
3610 __kmp_print_structure_team(
"Team pool: ",
3611 CCAST(kmp_team_t *, __kmp_team_pool));
3615 while (list != NULL) {
3616 kmp_team_list_item_t *item = list;
3618 KMP_INTERNAL_FREE(item);
3627 static const unsigned __kmp_primes[] = {
3628 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3629 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3630 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3631 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3632 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3633 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3634 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3635 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3636 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3637 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3638 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3642 unsigned short __kmp_get_random(kmp_info_t *thread) {
3643 unsigned x = thread->th.th_x;
3644 unsigned short r = (
unsigned short)(x >> 16);
3646 thread->th.th_x = x * thread->th.th_a + 1;
3648 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3649 thread->th.th_info.ds.ds_tid, r));
3655 void __kmp_init_random(kmp_info_t *thread) {
3656 unsigned seed = thread->th.th_info.ds.ds_tid;
3659 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3660 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3662 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3668 static int __kmp_reclaim_dead_roots(
void) {
3671 for (i = 0; i < __kmp_threads_capacity; ++i) {
3672 if (KMP_UBER_GTID(i) &&
3673 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3676 r += __kmp_unregister_root_other_thread(i);
3701 static int __kmp_expand_threads(
int nNeed) {
3703 int minimumRequiredCapacity;
3705 kmp_info_t **newThreads;
3706 kmp_root_t **newRoot;
3712 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3715 added = __kmp_reclaim_dead_roots();
3744 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3747 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3751 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3753 newCapacity = __kmp_threads_capacity;
3755 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3756 : __kmp_sys_max_nth;
3757 }
while (newCapacity < minimumRequiredCapacity);
3758 newThreads = (kmp_info_t **)__kmp_allocate(
3759 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3761 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3762 KMP_MEMCPY(newThreads, __kmp_threads,
3763 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3764 KMP_MEMCPY(newRoot, __kmp_root,
3765 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3768 kmp_old_threads_list_t *node =
3769 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3770 node->threads = __kmp_threads;
3771 node->next = __kmp_old_threads_list;
3772 __kmp_old_threads_list = node;
3774 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3775 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3776 added += newCapacity - __kmp_threads_capacity;
3777 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3779 if (newCapacity > __kmp_tp_capacity) {
3780 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3781 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3782 __kmp_threadprivate_resize_cache(newCapacity);
3784 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3786 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3795 int __kmp_register_root(
int initial_thread) {
3796 kmp_info_t *root_thread;
3800 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3801 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3818 capacity = __kmp_threads_capacity;
3819 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3826 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3827 capacity -= __kmp_hidden_helper_threads_num;
3831 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3832 if (__kmp_tp_cached) {
3833 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3834 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3835 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3837 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3847 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3850 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3851 gtid <= __kmp_hidden_helper_threads_num;
3854 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3855 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3856 "hidden helper thread: T#%d\n",
3862 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3865 for (gtid = __kmp_hidden_helper_threads_num + 1;
3866 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3870 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3871 KMP_ASSERT(gtid < __kmp_threads_capacity);
3876 TCW_4(__kmp_nth, __kmp_nth + 1);
3880 if (__kmp_adjust_gtid_mode) {
3881 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3882 if (TCR_4(__kmp_gtid_mode) != 2) {
3883 TCW_4(__kmp_gtid_mode, 2);
3886 if (TCR_4(__kmp_gtid_mode) != 1) {
3887 TCW_4(__kmp_gtid_mode, 1);
3892 #ifdef KMP_ADJUST_BLOCKTIME
3895 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3896 if (__kmp_nth > __kmp_avail_proc) {
3897 __kmp_zero_bt = TRUE;
3903 if (!(root = __kmp_root[gtid])) {
3904 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3905 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3908 #if KMP_STATS_ENABLED
3910 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3911 __kmp_stats_thread_ptr->startLife();
3912 KMP_SET_THREAD_STATE(SERIAL_REGION);
3915 __kmp_initialize_root(root);
3918 if (root->r.r_uber_thread) {
3919 root_thread = root->r.r_uber_thread;
3921 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3922 if (__kmp_storage_map) {
3923 __kmp_print_thread_storage_map(root_thread, gtid);
3925 root_thread->th.th_info.ds.ds_gtid = gtid;
3927 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3929 root_thread->th.th_root = root;
3930 if (__kmp_env_consistency_check) {
3931 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3934 __kmp_initialize_fast_memory(root_thread);
3938 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3939 __kmp_initialize_bget(root_thread);
3941 __kmp_init_random(root_thread);
3945 if (!root_thread->th.th_serial_team) {
3946 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3947 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3948 root_thread->th.th_serial_team = __kmp_allocate_team(
3953 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3955 KMP_ASSERT(root_thread->th.th_serial_team);
3956 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3957 root_thread->th.th_serial_team));
3960 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3962 root->r.r_root_team->t.t_threads[0] = root_thread;
3963 root->r.r_hot_team->t.t_threads[0] = root_thread;
3964 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3966 root_thread->th.th_serial_team->t.t_serialized = 0;
3967 root->r.r_uber_thread = root_thread;
3970 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3971 TCW_4(__kmp_init_gtid, TRUE);
3974 __kmp_gtid_set_specific(gtid);
3977 __kmp_itt_thread_name(gtid);
3980 #ifdef KMP_TDATA_GTID
3983 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3984 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3986 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3988 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3989 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3990 KMP_INIT_BARRIER_STATE));
3993 for (b = 0; b < bs_last_barrier; ++b) {
3994 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3996 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4000 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4001 KMP_INIT_BARRIER_STATE);
4003 #if KMP_AFFINITY_SUPPORTED
4004 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4005 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4006 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4009 root_thread->th.th_def_allocator = __kmp_def_allocator;
4010 root_thread->th.th_prev_level = 0;
4011 root_thread->th.th_prev_num_threads = 1;
4013 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4014 tmp->cg_root = root_thread;
4015 tmp->cg_thread_limit = __kmp_cg_max_nth;
4016 tmp->cg_nthreads = 1;
4017 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4018 " cg_nthreads init to 1\n",
4021 root_thread->th.th_cg_roots = tmp;
4023 __kmp_root_counter++;
4026 if (!initial_thread && ompt_enabled.enabled) {
4028 kmp_info_t *root_thread = ompt_get_thread();
4030 ompt_set_thread_state(root_thread, ompt_state_overhead);
4032 if (ompt_enabled.ompt_callback_thread_begin) {
4033 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4034 ompt_thread_initial, __ompt_get_thread_data_internal());
4036 ompt_data_t *task_data;
4037 ompt_data_t *parallel_data;
4038 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4040 if (ompt_enabled.ompt_callback_implicit_task) {
4041 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4042 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4045 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4049 if (ompd_state & OMPD_ENABLE_BP)
4050 ompd_bp_thread_begin();
4054 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4059 #if KMP_NESTED_HOT_TEAMS
4060 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4061 const int max_level) {
4063 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4064 if (!hot_teams || !hot_teams[level].hot_team) {
4067 KMP_DEBUG_ASSERT(level < max_level);
4068 kmp_team_t *team = hot_teams[level].hot_team;
4069 nth = hot_teams[level].hot_team_nth;
4071 if (level < max_level - 1) {
4072 for (i = 0; i < nth; ++i) {
4073 kmp_info_t *th = team->t.t_threads[i];
4074 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4075 if (i > 0 && th->th.th_hot_teams) {
4076 __kmp_free(th->th.th_hot_teams);
4077 th->th.th_hot_teams = NULL;
4081 __kmp_free_team(root, team, NULL);
4088 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4089 kmp_team_t *root_team = root->r.r_root_team;
4090 kmp_team_t *hot_team = root->r.r_hot_team;
4091 int n = hot_team->t.t_nproc;
4094 KMP_DEBUG_ASSERT(!root->r.r_active);
4096 root->r.r_root_team = NULL;
4097 root->r.r_hot_team = NULL;
4100 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4101 #if KMP_NESTED_HOT_TEAMS
4102 if (__kmp_hot_teams_max_level >
4104 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4105 kmp_info_t *th = hot_team->t.t_threads[i];
4106 if (__kmp_hot_teams_max_level > 1) {
4107 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4109 if (th->th.th_hot_teams) {
4110 __kmp_free(th->th.th_hot_teams);
4111 th->th.th_hot_teams = NULL;
4116 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4121 if (__kmp_tasking_mode != tskm_immediate_exec) {
4122 __kmp_wait_to_unref_task_teams();
4128 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4130 (LPVOID) & (root->r.r_uber_thread->th),
4131 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4132 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4136 if (ompd_state & OMPD_ENABLE_BP)
4137 ompd_bp_thread_end();
4141 ompt_data_t *task_data;
4142 ompt_data_t *parallel_data;
4143 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4145 if (ompt_enabled.ompt_callback_implicit_task) {
4146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4147 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4149 if (ompt_enabled.ompt_callback_thread_end) {
4150 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4151 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4157 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4158 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4160 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4161 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4164 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4165 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4166 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4167 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4168 root->r.r_uber_thread->th.th_cg_roots = NULL;
4170 __kmp_reap_thread(root->r.r_uber_thread, 1);
4174 root->r.r_uber_thread = NULL;
4176 root->r.r_begin = FALSE;
4181 void __kmp_unregister_root_current_thread(
int gtid) {
4182 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4187 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4188 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4191 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4194 kmp_root_t *root = __kmp_root[gtid];
4196 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4197 KMP_ASSERT(KMP_UBER_GTID(gtid));
4198 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4199 KMP_ASSERT(root->r.r_active == FALSE);
4203 kmp_info_t *thread = __kmp_threads[gtid];
4204 kmp_team_t *team = thread->th.th_team;
4205 kmp_task_team_t *task_team = thread->th.th_task_team;
4208 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4209 task_team->tt.tt_hidden_helper_task_encountered)) {
4212 thread->th.ompt_thread_info.state = ompt_state_undefined;
4214 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4217 __kmp_reset_root(gtid, root);
4221 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4223 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4230 static int __kmp_unregister_root_other_thread(
int gtid) {
4231 kmp_root_t *root = __kmp_root[gtid];
4234 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4235 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4236 KMP_ASSERT(KMP_UBER_GTID(gtid));
4237 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4238 KMP_ASSERT(root->r.r_active == FALSE);
4240 r = __kmp_reset_root(gtid, root);
4242 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4248 void __kmp_task_info() {
4250 kmp_int32 gtid = __kmp_entry_gtid();
4251 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4252 kmp_info_t *this_thr = __kmp_threads[gtid];
4253 kmp_team_t *steam = this_thr->th.th_serial_team;
4254 kmp_team_t *team = this_thr->th.th_team;
4257 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4259 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4260 team->t.t_implicit_task_taskdata[tid].td_parent);
4267 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4268 int tid,
int gtid) {
4272 KMP_DEBUG_ASSERT(this_thr != NULL);
4273 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4274 KMP_DEBUG_ASSERT(team);
4275 KMP_DEBUG_ASSERT(team->t.t_threads);
4276 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4277 kmp_info_t *master = team->t.t_threads[0];
4278 KMP_DEBUG_ASSERT(master);
4279 KMP_DEBUG_ASSERT(master->th.th_root);
4283 TCW_SYNC_PTR(this_thr->th.th_team, team);
4285 this_thr->th.th_info.ds.ds_tid = tid;
4286 this_thr->th.th_set_nproc = 0;
4287 if (__kmp_tasking_mode != tskm_immediate_exec)
4290 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4292 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4293 this_thr->th.th_set_proc_bind = proc_bind_default;
4294 #if KMP_AFFINITY_SUPPORTED
4295 this_thr->th.th_new_place = this_thr->th.th_current_place;
4297 this_thr->th.th_root = master->th.th_root;
4300 this_thr->th.th_team_nproc = team->t.t_nproc;
4301 this_thr->th.th_team_master = master;
4302 this_thr->th.th_team_serialized = team->t.t_serialized;
4304 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4306 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4307 tid, gtid, this_thr, this_thr->th.th_current_task));
4309 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4312 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4313 tid, gtid, this_thr, this_thr->th.th_current_task));
4318 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4320 this_thr->th.th_local.this_construct = 0;
4322 if (!this_thr->th.th_pri_common) {
4323 this_thr->th.th_pri_common =
4324 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4325 if (__kmp_storage_map) {
4326 __kmp_print_storage_map_gtid(
4327 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4328 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4330 this_thr->th.th_pri_head = NULL;
4333 if (this_thr != master &&
4334 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4336 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4337 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4340 int i = tmp->cg_nthreads--;
4341 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4342 " on node %p of thread %p to %d\n",
4343 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4348 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4350 this_thr->th.th_cg_roots->cg_nthreads++;
4351 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4352 " node %p of thread %p to %d\n",
4353 this_thr, this_thr->th.th_cg_roots,
4354 this_thr->th.th_cg_roots->cg_root,
4355 this_thr->th.th_cg_roots->cg_nthreads));
4356 this_thr->th.th_current_task->td_icvs.thread_limit =
4357 this_thr->th.th_cg_roots->cg_thread_limit;
4362 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4365 sizeof(dispatch_private_info_t) *
4366 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4367 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4368 team->t.t_max_nproc));
4369 KMP_ASSERT(dispatch);
4370 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4371 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4373 dispatch->th_disp_index = 0;
4374 dispatch->th_doacross_buf_idx = 0;
4375 if (!dispatch->th_disp_buffer) {
4376 dispatch->th_disp_buffer =
4377 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4379 if (__kmp_storage_map) {
4380 __kmp_print_storage_map_gtid(
4381 gtid, &dispatch->th_disp_buffer[0],
4382 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4384 : __kmp_dispatch_num_buffers],
4386 "th_%d.th_dispatch.th_disp_buffer "
4387 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4388 gtid, team->t.t_id, gtid);
4391 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4394 dispatch->th_dispatch_pr_current = 0;
4395 dispatch->th_dispatch_sh_current = 0;
4397 dispatch->th_deo_fcn = 0;
4398 dispatch->th_dxo_fcn = 0;
4401 this_thr->th.th_next_pool = NULL;
4403 if (!this_thr->th.th_task_state_memo_stack) {
4405 this_thr->th.th_task_state_memo_stack =
4406 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4407 this_thr->th.th_task_state_top = 0;
4408 this_thr->th.th_task_state_stack_sz = 4;
4409 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4411 this_thr->th.th_task_state_memo_stack[i] = 0;
4414 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4415 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4425 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4427 kmp_team_t *serial_team;
4428 kmp_info_t *new_thr;
4431 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4432 KMP_DEBUG_ASSERT(root && team);
4433 #if !KMP_NESTED_HOT_TEAMS
4434 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4439 if (__kmp_thread_pool) {
4440 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4441 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4442 if (new_thr == __kmp_thread_pool_insert_pt) {
4443 __kmp_thread_pool_insert_pt = NULL;
4445 TCW_4(new_thr->th.th_in_pool, FALSE);
4446 __kmp_suspend_initialize_thread(new_thr);
4447 __kmp_lock_suspend_mx(new_thr);
4448 if (new_thr->th.th_active_in_pool == TRUE) {
4449 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4450 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4451 new_thr->th.th_active_in_pool = FALSE;
4453 __kmp_unlock_suspend_mx(new_thr);
4455 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4456 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4457 KMP_ASSERT(!new_thr->th.th_team);
4458 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4461 __kmp_initialize_info(new_thr, team, new_tid,
4462 new_thr->th.th_info.ds.ds_gtid);
4463 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4465 TCW_4(__kmp_nth, __kmp_nth + 1);
4467 new_thr->th.th_task_state = 0;
4468 new_thr->th.th_task_state_top = 0;
4469 new_thr->th.th_task_state_stack_sz = 4;
4471 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4473 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4477 #ifdef KMP_ADJUST_BLOCKTIME
4480 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4481 if (__kmp_nth > __kmp_avail_proc) {
4482 __kmp_zero_bt = TRUE;
4491 kmp_balign_t *balign = new_thr->th.th_bar;
4492 for (b = 0; b < bs_last_barrier; ++b)
4493 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4496 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4497 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4504 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4505 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4510 if (!TCR_4(__kmp_init_monitor)) {
4511 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4512 if (!TCR_4(__kmp_init_monitor)) {
4513 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4514 TCW_4(__kmp_init_monitor, 1);
4515 __kmp_create_monitor(&__kmp_monitor);
4516 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4527 while (TCR_4(__kmp_init_monitor) < 2) {
4530 KF_TRACE(10, (
"after monitor thread has started\n"));
4533 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4540 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4542 : __kmp_hidden_helper_threads_num + 1;
4544 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4546 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4549 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4550 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4555 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4557 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4559 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4562 __itt_suppress_mark_range(
4563 __itt_suppress_range, __itt_suppress_threading_errors,
4564 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4565 __itt_suppress_mark_range(
4566 __itt_suppress_range, __itt_suppress_threading_errors,
4567 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4569 __itt_suppress_mark_range(
4570 __itt_suppress_range, __itt_suppress_threading_errors,
4571 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4573 __itt_suppress_mark_range(__itt_suppress_range,
4574 __itt_suppress_threading_errors,
4575 &new_thr->th.th_suspend_init_count,
4576 sizeof(new_thr->th.th_suspend_init_count));
4579 __itt_suppress_mark_range(__itt_suppress_range,
4580 __itt_suppress_threading_errors,
4581 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4582 sizeof(new_thr->th.th_bar[0].bb.b_go));
4583 __itt_suppress_mark_range(__itt_suppress_range,
4584 __itt_suppress_threading_errors,
4585 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4586 sizeof(new_thr->th.th_bar[1].bb.b_go));
4587 __itt_suppress_mark_range(__itt_suppress_range,
4588 __itt_suppress_threading_errors,
4589 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4590 sizeof(new_thr->th.th_bar[2].bb.b_go));
4592 if (__kmp_storage_map) {
4593 __kmp_print_thread_storage_map(new_thr, new_gtid);
4598 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4599 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4600 new_thr->th.th_serial_team = serial_team =
4601 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4605 proc_bind_default, &r_icvs,
4606 0 USE_NESTED_HOT_ARG(NULL));
4608 KMP_ASSERT(serial_team);
4609 serial_team->t.t_serialized = 0;
4611 serial_team->t.t_threads[0] = new_thr;
4613 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4617 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4620 __kmp_initialize_fast_memory(new_thr);
4624 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4625 __kmp_initialize_bget(new_thr);
4628 __kmp_init_random(new_thr);
4632 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4633 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4636 kmp_balign_t *balign = new_thr->th.th_bar;
4637 for (b = 0; b < bs_last_barrier; ++b) {
4638 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4639 balign[b].bb.team = NULL;
4640 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4641 balign[b].bb.use_oncore_barrier = 0;
4644 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4645 new_thr->th.th_sleep_loc_type = flag_unset;
4647 new_thr->th.th_spin_here = FALSE;
4648 new_thr->th.th_next_waiting = 0;
4650 new_thr->th.th_blocking =
false;
4653 #if KMP_AFFINITY_SUPPORTED
4654 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4655 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4656 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4657 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_def_allocator = __kmp_def_allocator;
4660 new_thr->th.th_prev_level = 0;
4661 new_thr->th.th_prev_num_threads = 1;
4663 TCW_4(new_thr->th.th_in_pool, FALSE);
4664 new_thr->th.th_active_in_pool = FALSE;
4665 TCW_4(new_thr->th.th_active, TRUE);
4673 if (__kmp_adjust_gtid_mode) {
4674 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4675 if (TCR_4(__kmp_gtid_mode) != 2) {
4676 TCW_4(__kmp_gtid_mode, 2);
4679 if (TCR_4(__kmp_gtid_mode) != 1) {
4680 TCW_4(__kmp_gtid_mode, 1);
4685 #ifdef KMP_ADJUST_BLOCKTIME
4688 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4689 if (__kmp_nth > __kmp_avail_proc) {
4690 __kmp_zero_bt = TRUE;
4695 #if KMP_AFFINITY_SUPPORTED
4697 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4702 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4703 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4705 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4707 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4718 static void __kmp_reinitialize_team(kmp_team_t *team,
4719 kmp_internal_control_t *new_icvs,
4721 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4722 team->t.t_threads[0], team));
4723 KMP_DEBUG_ASSERT(team && new_icvs);
4724 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4725 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4727 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4729 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4730 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4732 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4733 team->t.t_threads[0], team));
4739 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4740 kmp_internal_control_t *new_icvs,
4742 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4745 KMP_DEBUG_ASSERT(team);
4746 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4747 KMP_DEBUG_ASSERT(team->t.t_threads);
4750 team->t.t_master_tid = 0;
4752 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4753 team->t.t_nproc = new_nproc;
4756 team->t.t_next_pool = NULL;
4760 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4761 team->t.t_invoke = NULL;
4764 team->t.t_sched.sched = new_icvs->sched.sched;
4766 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4767 team->t.t_fp_control_saved = FALSE;
4768 team->t.t_x87_fpu_control_word = 0;
4769 team->t.t_mxcsr = 0;
4772 team->t.t_construct = 0;
4774 team->t.t_ordered.dt.t_value = 0;
4775 team->t.t_master_active = FALSE;
4778 team->t.t_copypriv_data = NULL;
4781 team->t.t_copyin_counter = 0;
4784 team->t.t_control_stack_top = NULL;
4786 __kmp_reinitialize_team(team, new_icvs, loc);
4789 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4792 #if KMP_AFFINITY_SUPPORTED
4793 static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4794 int first,
int last,
int newp) {
4795 th->th.th_first_place = first;
4796 th->th.th_last_place = last;
4797 th->th.th_new_place = newp;
4798 if (newp != th->th.th_current_place) {
4799 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4800 team->t.t_display_affinity = 1;
4802 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4803 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4811 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4813 if (KMP_HIDDEN_HELPER_TEAM(team))
4816 kmp_info_t *master_th = team->t.t_threads[0];
4817 KMP_DEBUG_ASSERT(master_th != NULL);
4818 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4819 int first_place = master_th->th.th_first_place;
4820 int last_place = master_th->th.th_last_place;
4821 int masters_place = master_th->th.th_current_place;
4822 int num_masks = __kmp_affinity.num_masks;
4823 team->t.t_first_place = first_place;
4824 team->t.t_last_place = last_place;
4826 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4827 "bound to place %d partition = [%d,%d]\n",
4828 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4829 team->t.t_id, masters_place, first_place, last_place));
4831 switch (proc_bind) {
4833 case proc_bind_default:
4836 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4839 case proc_bind_primary: {
4841 int n_th = team->t.t_nproc;
4842 for (f = 1; f < n_th; f++) {
4843 kmp_info_t *th = team->t.t_threads[f];
4844 KMP_DEBUG_ASSERT(th != NULL);
4845 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4847 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4848 "partition = [%d,%d]\n",
4849 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4850 f, masters_place, first_place, last_place));
4854 case proc_bind_close: {
4856 int n_th = team->t.t_nproc;
4858 if (first_place <= last_place) {
4859 n_places = last_place - first_place + 1;
4861 n_places = num_masks - first_place + last_place + 1;
4863 if (n_th <= n_places) {
4864 int place = masters_place;
4865 for (f = 1; f < n_th; f++) {
4866 kmp_info_t *th = team->t.t_threads[f];
4867 KMP_DEBUG_ASSERT(th != NULL);
4869 if (place == last_place) {
4870 place = first_place;
4871 }
else if (place == (num_masks - 1)) {
4876 __kmp_set_thread_place(team, th, first_place, last_place, place);
4878 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4879 "partition = [%d,%d]\n",
4880 __kmp_gtid_from_thread(team->t.t_threads[f]),
4881 team->t.t_id, f, place, first_place, last_place));
4884 int S, rem, gap, s_count;
4885 S = n_th / n_places;
4887 rem = n_th - (S * n_places);
4888 gap = rem > 0 ? n_places / rem : n_places;
4889 int place = masters_place;
4891 for (f = 0; f < n_th; f++) {
4892 kmp_info_t *th = team->t.t_threads[f];
4893 KMP_DEBUG_ASSERT(th != NULL);
4895 __kmp_set_thread_place(team, th, first_place, last_place, place);
4898 if ((s_count == S) && rem && (gap_ct == gap)) {
4900 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4902 if (place == last_place) {
4903 place = first_place;
4904 }
else if (place == (num_masks - 1)) {
4912 }
else if (s_count == S) {
4913 if (place == last_place) {
4914 place = first_place;
4915 }
else if (place == (num_masks - 1)) {
4925 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4926 "partition = [%d,%d]\n",
4927 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4928 th->th.th_new_place, first_place, last_place));
4930 KMP_DEBUG_ASSERT(place == masters_place);
4934 case proc_bind_spread: {
4936 int n_th = team->t.t_nproc;
4939 if (first_place <= last_place) {
4940 n_places = last_place - first_place + 1;
4942 n_places = num_masks - first_place + last_place + 1;
4944 if (n_th <= n_places) {
4947 if (n_places != num_masks) {
4948 int S = n_places / n_th;
4949 int s_count, rem, gap, gap_ct;
4951 place = masters_place;
4952 rem = n_places - n_th * S;
4953 gap = rem ? n_th / rem : 1;
4956 if (update_master_only == 1)
4958 for (f = 0; f < thidx; f++) {
4959 kmp_info_t *th = team->t.t_threads[f];
4960 KMP_DEBUG_ASSERT(th != NULL);
4962 int fplace = place, nplace = place;
4964 while (s_count < S) {
4965 if (place == last_place) {
4966 place = first_place;
4967 }
else if (place == (num_masks - 1)) {
4974 if (rem && (gap_ct == gap)) {
4975 if (place == last_place) {
4976 place = first_place;
4977 }
else if (place == (num_masks - 1)) {
4985 __kmp_set_thread_place(team, th, fplace, place, nplace);
4988 if (place == last_place) {
4989 place = first_place;
4990 }
else if (place == (num_masks - 1)) {
4997 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4998 "partition = [%d,%d], num_masks: %u\n",
4999 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5000 f, th->th.th_new_place, th->th.th_first_place,
5001 th->th.th_last_place, num_masks));
5007 double current =
static_cast<double>(masters_place);
5009 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5014 if (update_master_only == 1)
5016 for (f = 0; f < thidx; f++) {
5017 first =
static_cast<int>(current);
5018 last =
static_cast<int>(current + spacing) - 1;
5019 KMP_DEBUG_ASSERT(last >= first);
5020 if (first >= n_places) {
5021 if (masters_place) {
5024 if (first == (masters_place + 1)) {
5025 KMP_DEBUG_ASSERT(f == n_th);
5028 if (last == masters_place) {
5029 KMP_DEBUG_ASSERT(f == (n_th - 1));
5033 KMP_DEBUG_ASSERT(f == n_th);
5038 if (last >= n_places) {
5039 last = (n_places - 1);
5044 KMP_DEBUG_ASSERT(0 <= first);
5045 KMP_DEBUG_ASSERT(n_places > first);
5046 KMP_DEBUG_ASSERT(0 <= last);
5047 KMP_DEBUG_ASSERT(n_places > last);
5048 KMP_DEBUG_ASSERT(last_place >= first_place);
5049 th = team->t.t_threads[f];
5050 KMP_DEBUG_ASSERT(th);
5051 __kmp_set_thread_place(team, th, first, last, place);
5053 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5054 "partition = [%d,%d], spacing = %.4f\n",
5055 __kmp_gtid_from_thread(team->t.t_threads[f]),
5056 team->t.t_id, f, th->th.th_new_place,
5057 th->th.th_first_place, th->th.th_last_place, spacing));
5061 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5063 int S, rem, gap, s_count;
5064 S = n_th / n_places;
5066 rem = n_th - (S * n_places);
5067 gap = rem > 0 ? n_places / rem : n_places;
5068 int place = masters_place;
5071 if (update_master_only == 1)
5073 for (f = 0; f < thidx; f++) {
5074 kmp_info_t *th = team->t.t_threads[f];
5075 KMP_DEBUG_ASSERT(th != NULL);
5077 __kmp_set_thread_place(team, th, place, place, place);
5080 if ((s_count == S) && rem && (gap_ct == gap)) {
5082 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5084 if (place == last_place) {
5085 place = first_place;
5086 }
else if (place == (num_masks - 1)) {
5094 }
else if (s_count == S) {
5095 if (place == last_place) {
5096 place = first_place;
5097 }
else if (place == (num_masks - 1)) {
5106 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5107 "partition = [%d,%d]\n",
5108 __kmp_gtid_from_thread(team->t.t_threads[f]),
5109 team->t.t_id, f, th->th.th_new_place,
5110 th->th.th_first_place, th->th.th_last_place));
5112 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5120 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5128 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5130 ompt_data_t ompt_parallel_data,
5132 kmp_proc_bind_t new_proc_bind,
5133 kmp_internal_control_t *new_icvs,
5134 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5135 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5138 int use_hot_team = !root->r.r_active;
5140 int do_place_partition = 1;
5142 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5143 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5144 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5147 #if KMP_NESTED_HOT_TEAMS
5148 kmp_hot_team_ptr_t *hot_teams;
5150 team = master->th.th_team;
5151 level = team->t.t_active_level;
5152 if (master->th.th_teams_microtask) {
5153 if (master->th.th_teams_size.nteams > 1 &&
5156 (microtask_t)__kmp_teams_master ||
5157 master->th.th_teams_level <
5164 if ((master->th.th_teams_size.nteams == 1 &&
5165 master->th.th_teams_level >= team->t.t_level) ||
5166 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5167 do_place_partition = 0;
5169 hot_teams = master->th.th_hot_teams;
5170 if (level < __kmp_hot_teams_max_level && hot_teams &&
5171 hot_teams[level].hot_team) {
5179 KMP_DEBUG_ASSERT(new_nproc == 1);
5183 if (use_hot_team && new_nproc > 1) {
5184 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5185 #if KMP_NESTED_HOT_TEAMS
5186 team = hot_teams[level].hot_team;
5188 team = root->r.r_hot_team;
5191 if (__kmp_tasking_mode != tskm_immediate_exec) {
5192 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5193 "task_team[1] = %p before reinit\n",
5194 team->t.t_task_team[0], team->t.t_task_team[1]));
5198 if (team->t.t_nproc != new_nproc &&
5199 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5201 int old_nthr = team->t.t_nproc;
5202 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5207 if (do_place_partition == 0)
5208 team->t.t_proc_bind = proc_bind_default;
5212 if (team->t.t_nproc == new_nproc) {
5213 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5216 if (team->t.t_size_changed == -1) {
5217 team->t.t_size_changed = 1;
5219 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5223 kmp_r_sched_t new_sched = new_icvs->sched;
5225 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5227 __kmp_reinitialize_team(team, new_icvs,
5228 root->r.r_uber_thread->th.th_ident);
5230 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5231 team->t.t_threads[0], team));
5232 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5234 #if KMP_AFFINITY_SUPPORTED
5235 if ((team->t.t_size_changed == 0) &&
5236 (team->t.t_proc_bind == new_proc_bind)) {
5237 if (new_proc_bind == proc_bind_spread) {
5238 if (do_place_partition) {
5240 __kmp_partition_places(team, 1);
5243 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5244 "proc_bind = %d, partition = [%d,%d]\n",
5245 team->t.t_id, new_proc_bind, team->t.t_first_place,
5246 team->t.t_last_place));
5248 if (do_place_partition) {
5249 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5250 __kmp_partition_places(team);
5254 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5256 }
else if (team->t.t_nproc > new_nproc) {
5258 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5261 team->t.t_size_changed = 1;
5262 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5265 __kmp_add_threads_to_team(team, new_nproc);
5267 #if KMP_NESTED_HOT_TEAMS
5268 if (__kmp_hot_teams_mode == 0) {
5271 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5272 hot_teams[level].hot_team_nth = new_nproc;
5275 for (f = new_nproc; f < team->t.t_nproc; f++) {
5276 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5277 if (__kmp_tasking_mode != tskm_immediate_exec) {
5280 team->t.t_threads[f]->th.th_task_team = NULL;
5282 __kmp_free_thread(team->t.t_threads[f]);
5283 team->t.t_threads[f] = NULL;
5285 #if KMP_NESTED_HOT_TEAMS
5290 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5291 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5292 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5293 for (
int b = 0; b < bs_last_barrier; ++b) {
5294 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5295 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5297 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5302 team->t.t_nproc = new_nproc;
5304 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5305 __kmp_reinitialize_team(team, new_icvs,
5306 root->r.r_uber_thread->th.th_ident);
5309 for (f = 0; f < new_nproc; ++f) {
5310 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5315 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5316 team->t.t_threads[0], team));
5318 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5321 for (f = 0; f < team->t.t_nproc; f++) {
5322 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5323 team->t.t_threads[f]->th.th_team_nproc ==
5328 if (do_place_partition) {
5329 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5330 #if KMP_AFFINITY_SUPPORTED
5331 __kmp_partition_places(team);
5337 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5339 int old_nproc = team->t.t_nproc;
5340 team->t.t_size_changed = 1;
5342 #if KMP_NESTED_HOT_TEAMS
5343 int avail_threads = hot_teams[level].hot_team_nth;
5344 if (new_nproc < avail_threads)
5345 avail_threads = new_nproc;
5346 kmp_info_t **other_threads = team->t.t_threads;
5347 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5351 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5352 for (b = 0; b < bs_last_barrier; ++b) {
5353 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5354 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5356 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5360 if (hot_teams[level].hot_team_nth >= new_nproc) {
5363 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5364 team->t.t_nproc = new_nproc;
5368 team->t.t_nproc = hot_teams[level].hot_team_nth;
5369 hot_teams[level].hot_team_nth = new_nproc;
5371 if (team->t.t_max_nproc < new_nproc) {
5373 __kmp_reallocate_team_arrays(team, new_nproc);
5374 __kmp_reinitialize_team(team, new_icvs, NULL);
5377 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5383 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5387 for (f = team->t.t_nproc; f < new_nproc; f++) {
5388 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5389 KMP_DEBUG_ASSERT(new_worker);
5390 team->t.t_threads[f] = new_worker;
5393 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5394 "join=%llu, plain=%llu\n",
5395 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5396 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5397 team->t.t_bar[bs_plain_barrier].b_arrived));
5401 kmp_balign_t *balign = new_worker->th.th_bar;
5402 for (b = 0; b < bs_last_barrier; ++b) {
5403 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5404 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5405 KMP_BARRIER_PARENT_FLAG);
5407 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5413 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5415 new_temp_affinity.restore();
5417 #if KMP_NESTED_HOT_TEAMS
5420 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5423 __kmp_add_threads_to_team(team, new_nproc);
5427 __kmp_initialize_team(team, new_nproc, new_icvs,
5428 root->r.r_uber_thread->th.th_ident);
5431 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5432 for (f = 0; f < team->t.t_nproc; ++f)
5433 __kmp_initialize_info(team->t.t_threads[f], team, f,
5434 __kmp_gtid_from_tid(f, team));
5437 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5438 for (f = old_nproc; f < team->t.t_nproc; ++f)
5439 team->t.t_threads[f]->th.th_task_state = old_state;
5442 for (f = 0; f < team->t.t_nproc; ++f) {
5443 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5444 team->t.t_threads[f]->th.th_team_nproc ==
5449 if (do_place_partition) {
5450 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5451 #if KMP_AFFINITY_SUPPORTED
5452 __kmp_partition_places(team);
5457 kmp_info_t *master = team->t.t_threads[0];
5458 if (master->th.th_teams_microtask) {
5459 for (f = 1; f < new_nproc; ++f) {
5461 kmp_info_t *thr = team->t.t_threads[f];
5462 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5463 thr->th.th_teams_level = master->th.th_teams_level;
5464 thr->th.th_teams_size = master->th.th_teams_size;
5467 #if KMP_NESTED_HOT_TEAMS
5471 for (f = 1; f < new_nproc; ++f) {
5472 kmp_info_t *thr = team->t.t_threads[f];
5474 kmp_balign_t *balign = thr->th.th_bar;
5475 for (b = 0; b < bs_last_barrier; ++b) {
5476 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5477 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5479 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5487 __kmp_alloc_argv_entries(argc, team, TRUE);
5488 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5492 KF_TRACE(10, (
" hot_team = %p\n", team));
5495 if (__kmp_tasking_mode != tskm_immediate_exec) {
5496 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5497 "task_team[1] = %p after reinit\n",
5498 team->t.t_task_team[0], team->t.t_task_team[1]));
5503 __ompt_team_assign_id(team, ompt_parallel_data);
5513 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5516 if (team->t.t_max_nproc >= max_nproc) {
5518 __kmp_team_pool = team->t.t_next_pool;
5520 if (max_nproc > 1 &&
5521 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5523 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5528 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5530 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5531 "task_team[1] %p to NULL\n",
5532 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5533 team->t.t_task_team[0] = NULL;
5534 team->t.t_task_team[1] = NULL;
5537 __kmp_alloc_argv_entries(argc, team, TRUE);
5538 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5541 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5542 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5545 for (b = 0; b < bs_last_barrier; ++b) {
5546 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5548 team->t.t_bar[b].b_master_arrived = 0;
5549 team->t.t_bar[b].b_team_arrived = 0;
5554 team->t.t_proc_bind = new_proc_bind;
5556 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5560 __ompt_team_assign_id(team, ompt_parallel_data);
5572 team = __kmp_reap_team(team);
5573 __kmp_team_pool = team;
5578 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5581 team->t.t_max_nproc = max_nproc;
5582 if (max_nproc > 1 &&
5583 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5585 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5590 __kmp_allocate_team_arrays(team, max_nproc);
5592 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5593 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5595 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5597 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5598 team->t.t_task_team[0] = NULL;
5600 team->t.t_task_team[1] = NULL;
5603 if (__kmp_storage_map) {
5604 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5608 __kmp_alloc_argv_entries(argc, team, FALSE);
5609 team->t.t_argc = argc;
5612 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5613 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5616 for (b = 0; b < bs_last_barrier; ++b) {
5617 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5619 team->t.t_bar[b].b_master_arrived = 0;
5620 team->t.t_bar[b].b_team_arrived = 0;
5625 team->t.t_proc_bind = new_proc_bind;
5628 __ompt_team_assign_id(team, ompt_parallel_data);
5629 team->t.ompt_serialized_team_info = NULL;
5634 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5645 void __kmp_free_team(kmp_root_t *root,
5646 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5648 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5652 KMP_DEBUG_ASSERT(root);
5653 KMP_DEBUG_ASSERT(team);
5654 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5655 KMP_DEBUG_ASSERT(team->t.t_threads);
5657 int use_hot_team = team == root->r.r_hot_team;
5658 #if KMP_NESTED_HOT_TEAMS
5661 level = team->t.t_active_level - 1;
5662 if (master->th.th_teams_microtask) {
5663 if (master->th.th_teams_size.nteams > 1) {
5667 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5668 master->th.th_teams_level == team->t.t_level) {
5674 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5676 if (level < __kmp_hot_teams_max_level) {
5677 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5684 TCW_SYNC_PTR(team->t.t_pkfn,
5687 team->t.t_copyin_counter = 0;
5692 if (!use_hot_team) {
5693 if (__kmp_tasking_mode != tskm_immediate_exec) {
5695 for (f = 1; f < team->t.t_nproc; ++f) {
5696 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5697 kmp_info_t *th = team->t.t_threads[f];
5698 volatile kmp_uint32 *state = &th->th.th_reap_state;
5699 while (*state != KMP_SAFE_TO_REAP) {
5703 if (!__kmp_is_thread_alive(th, &ecode)) {
5704 *state = KMP_SAFE_TO_REAP;
5709 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5710 if (fl.is_sleeping())
5711 fl.resume(__kmp_gtid_from_thread(th));
5718 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5719 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5720 if (task_team != NULL) {
5721 for (f = 0; f < team->t.t_nproc; ++f) {
5722 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5723 team->t.t_threads[f]->th.th_task_team = NULL;
5727 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5728 __kmp_get_gtid(), task_team, team->t.t_id));
5729 #if KMP_NESTED_HOT_TEAMS
5730 __kmp_free_task_team(master, task_team);
5732 team->t.t_task_team[tt_idx] = NULL;
5738 team->t.t_parent = NULL;
5739 team->t.t_level = 0;
5740 team->t.t_active_level = 0;
5743 for (f = 1; f < team->t.t_nproc; ++f) {
5744 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5745 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5746 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5749 __kmp_free_thread(team->t.t_threads[f]);
5752 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5755 team->t.b->go_release();
5756 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5757 for (f = 1; f < team->t.t_nproc; ++f) {
5758 if (team->t.b->sleep[f].sleep) {
5759 __kmp_atomic_resume_64(
5760 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5761 (kmp_atomic_flag_64<> *)NULL);
5766 for (
int f = 1; f < team->t.t_nproc; ++f) {
5767 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5773 for (f = 1; f < team->t.t_nproc; ++f) {
5774 team->t.t_threads[f] = NULL;
5777 if (team->t.t_max_nproc > 1 &&
5778 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5779 distributedBarrier::deallocate(team->t.b);
5784 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5785 __kmp_team_pool = (
volatile kmp_team_t *)team;
5788 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5789 team->t.t_threads[1]->th.th_cg_roots);
5790 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5792 for (f = 1; f < team->t.t_nproc; ++f) {
5793 kmp_info_t *thr = team->t.t_threads[f];
5794 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5795 thr->th.th_cg_roots->cg_root == thr);
5797 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5798 thr->th.th_cg_roots = tmp->up;
5799 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5800 " up to node %p. cg_nthreads was %d\n",
5801 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5802 int i = tmp->cg_nthreads--;
5807 if (thr->th.th_cg_roots)
5808 thr->th.th_current_task->td_icvs.thread_limit =
5809 thr->th.th_cg_roots->cg_thread_limit;
5818 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5819 kmp_team_t *next_pool = team->t.t_next_pool;
5821 KMP_DEBUG_ASSERT(team);
5822 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5823 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5824 KMP_DEBUG_ASSERT(team->t.t_threads);
5825 KMP_DEBUG_ASSERT(team->t.t_argv);
5830 __kmp_free_team_arrays(team);
5831 if (team->t.t_argv != &team->t.t_inline_argv[0])
5832 __kmp_free((
void *)team->t.t_argv);
5864 void __kmp_free_thread(kmp_info_t *this_th) {
5868 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5869 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5871 KMP_DEBUG_ASSERT(this_th);
5876 kmp_balign_t *balign = this_th->th.th_bar;
5877 for (b = 0; b < bs_last_barrier; ++b) {
5878 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5879 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5880 balign[b].bb.team = NULL;
5881 balign[b].bb.leaf_kids = 0;
5883 this_th->th.th_task_state = 0;
5884 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5887 TCW_PTR(this_th->th.th_team, NULL);
5888 TCW_PTR(this_th->th.th_root, NULL);
5889 TCW_PTR(this_th->th.th_dispatch, NULL);
5891 while (this_th->th.th_cg_roots) {
5892 this_th->th.th_cg_roots->cg_nthreads--;
5893 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5894 " %p of thread %p to %d\n",
5895 this_th, this_th->th.th_cg_roots,
5896 this_th->th.th_cg_roots->cg_root,
5897 this_th->th.th_cg_roots->cg_nthreads));
5898 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5899 if (tmp->cg_root == this_th) {
5900 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5902 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5903 this_th->th.th_cg_roots = tmp->up;
5906 if (tmp->cg_nthreads == 0) {
5909 this_th->th.th_cg_roots = NULL;
5919 __kmp_free_implicit_task(this_th);
5920 this_th->th.th_current_task = NULL;
5924 gtid = this_th->th.th_info.ds.ds_gtid;
5925 if (__kmp_thread_pool_insert_pt != NULL) {
5926 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5927 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5928 __kmp_thread_pool_insert_pt = NULL;
5937 if (__kmp_thread_pool_insert_pt != NULL) {
5938 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5940 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5942 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5943 scan = &((*scan)->th.th_next_pool))
5948 TCW_PTR(this_th->th.th_next_pool, *scan);
5949 __kmp_thread_pool_insert_pt = *scan = this_th;
5950 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5951 (this_th->th.th_info.ds.ds_gtid <
5952 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5953 TCW_4(this_th->th.th_in_pool, TRUE);
5954 __kmp_suspend_initialize_thread(this_th);
5955 __kmp_lock_suspend_mx(this_th);
5956 if (this_th->th.th_active == TRUE) {
5957 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5958 this_th->th.th_active_in_pool = TRUE;
5962 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5965 __kmp_unlock_suspend_mx(this_th);
5967 TCW_4(__kmp_nth, __kmp_nth - 1);
5969 #ifdef KMP_ADJUST_BLOCKTIME
5972 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5973 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5974 if (__kmp_nth <= __kmp_avail_proc) {
5975 __kmp_zero_bt = FALSE;
5985 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5986 #if OMP_PROFILING_SUPPORT
5987 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5989 if (ProfileTraceFile)
5990 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5993 int gtid = this_thr->th.th_info.ds.ds_gtid;
5995 kmp_team_t **
volatile pteam;
5998 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6000 if (__kmp_env_consistency_check) {
6001 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6005 if (ompd_state & OMPD_ENABLE_BP)
6006 ompd_bp_thread_begin();
6010 ompt_data_t *thread_data =
nullptr;
6011 if (ompt_enabled.enabled) {
6012 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6013 *thread_data = ompt_data_none;
6015 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6016 this_thr->th.ompt_thread_info.wait_id = 0;
6017 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6018 this_thr->th.ompt_thread_info.parallel_flags = 0;
6019 if (ompt_enabled.ompt_callback_thread_begin) {
6020 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6021 ompt_thread_worker, thread_data);
6023 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6028 while (!TCR_4(__kmp_global.g.g_done)) {
6029 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6033 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6036 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6039 if (ompt_enabled.enabled) {
6040 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6044 pteam = &this_thr->th.th_team;
6047 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6049 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6052 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6053 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6054 (*pteam)->t.t_pkfn));
6056 updateHWFPControl(*pteam);
6059 if (ompt_enabled.enabled) {
6060 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6064 rc = (*pteam)->t.t_invoke(gtid);
6068 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6069 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6070 (*pteam)->t.t_pkfn));
6073 if (ompt_enabled.enabled) {
6075 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6077 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6081 __kmp_join_barrier(gtid);
6086 if (ompd_state & OMPD_ENABLE_BP)
6087 ompd_bp_thread_end();
6091 if (ompt_enabled.ompt_callback_thread_end) {
6092 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6096 this_thr->th.th_task_team = NULL;
6098 __kmp_common_destroy_gtid(gtid);
6100 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6103 #if OMP_PROFILING_SUPPORT
6104 llvm::timeTraceProfilerFinishThread();
6111 void __kmp_internal_end_dest(
void *specific_gtid) {
6114 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6116 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6120 __kmp_internal_end_thread(gtid);
6123 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6125 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6126 __kmp_internal_end_atexit();
6133 void __kmp_internal_end_atexit(
void) {
6134 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6158 __kmp_internal_end_library(-1);
6160 __kmp_close_console();
6164 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6169 KMP_DEBUG_ASSERT(thread != NULL);
6171 gtid = thread->th.th_info.ds.ds_gtid;
6174 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6177 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6179 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6181 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6183 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6187 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6189 __kmp_release_64(&flag);
6194 __kmp_reap_worker(thread);
6206 if (thread->th.th_active_in_pool) {
6207 thread->th.th_active_in_pool = FALSE;
6208 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6209 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6213 __kmp_free_implicit_task(thread);
6217 __kmp_free_fast_memory(thread);
6220 __kmp_suspend_uninitialize_thread(thread);
6222 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6223 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6228 #ifdef KMP_ADJUST_BLOCKTIME
6231 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6232 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6233 if (__kmp_nth <= __kmp_avail_proc) {
6234 __kmp_zero_bt = FALSE;
6240 if (__kmp_env_consistency_check) {
6241 if (thread->th.th_cons) {
6242 __kmp_free_cons_stack(thread->th.th_cons);
6243 thread->th.th_cons = NULL;
6247 if (thread->th.th_pri_common != NULL) {
6248 __kmp_free(thread->th.th_pri_common);
6249 thread->th.th_pri_common = NULL;
6252 if (thread->th.th_task_state_memo_stack != NULL) {
6253 __kmp_free(thread->th.th_task_state_memo_stack);
6254 thread->th.th_task_state_memo_stack = NULL;
6258 if (thread->th.th_local.bget_data != NULL) {
6259 __kmp_finalize_bget(thread);
6263 #if KMP_AFFINITY_SUPPORTED
6264 if (thread->th.th_affin_mask != NULL) {
6265 KMP_CPU_FREE(thread->th.th_affin_mask);
6266 thread->th.th_affin_mask = NULL;
6270 #if KMP_USE_HIER_SCHED
6271 if (thread->th.th_hier_bar_data != NULL) {
6272 __kmp_free(thread->th.th_hier_bar_data);
6273 thread->th.th_hier_bar_data = NULL;
6277 __kmp_reap_team(thread->th.th_serial_team);
6278 thread->th.th_serial_team = NULL;
6285 static void __kmp_itthash_clean(kmp_info_t *th) {
6287 if (__kmp_itt_region_domains.count > 0) {
6288 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6289 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6291 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6292 __kmp_thread_free(th, bucket);
6297 if (__kmp_itt_barrier_domains.count > 0) {
6298 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6299 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6301 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6302 __kmp_thread_free(th, bucket);
6310 static void __kmp_internal_end(
void) {
6314 __kmp_unregister_library();
6321 __kmp_reclaim_dead_roots();
6325 for (i = 0; i < __kmp_threads_capacity; i++)
6327 if (__kmp_root[i]->r.r_active)
6330 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6332 if (i < __kmp_threads_capacity) {
6344 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6345 if (TCR_4(__kmp_init_monitor)) {
6346 __kmp_reap_monitor(&__kmp_monitor);
6347 TCW_4(__kmp_init_monitor, 0);
6349 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6350 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6356 for (i = 0; i < __kmp_threads_capacity; i++) {
6357 if (__kmp_root[i]) {
6360 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6369 while (__kmp_thread_pool != NULL) {
6371 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6372 __kmp_thread_pool = thread->th.th_next_pool;
6374 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6375 thread->th.th_next_pool = NULL;
6376 thread->th.th_in_pool = FALSE;
6377 __kmp_reap_thread(thread, 0);
6379 __kmp_thread_pool_insert_pt = NULL;
6382 while (__kmp_team_pool != NULL) {
6384 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6385 __kmp_team_pool = team->t.t_next_pool;
6387 team->t.t_next_pool = NULL;
6388 __kmp_reap_team(team);
6391 __kmp_reap_task_teams();
6398 for (i = 0; i < __kmp_threads_capacity; i++) {
6399 kmp_info_t *thr = __kmp_threads[i];
6400 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6405 for (i = 0; i < __kmp_threads_capacity; ++i) {
6412 TCW_SYNC_4(__kmp_init_common, FALSE);
6414 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6422 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6423 if (TCR_4(__kmp_init_monitor)) {
6424 __kmp_reap_monitor(&__kmp_monitor);
6425 TCW_4(__kmp_init_monitor, 0);
6427 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6428 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6431 TCW_4(__kmp_init_gtid, FALSE);
6440 void __kmp_internal_end_library(
int gtid_req) {
6447 if (__kmp_global.g.g_abort) {
6448 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6452 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6453 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6458 if (TCR_4(__kmp_init_hidden_helper) &&
6459 !TCR_4(__kmp_hidden_helper_team_done)) {
6460 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6462 __kmp_hidden_helper_main_thread_release();
6464 __kmp_hidden_helper_threads_deinitz_wait();
6470 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6472 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6473 if (gtid == KMP_GTID_SHUTDOWN) {
6474 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6475 "already shutdown\n"));
6477 }
else if (gtid == KMP_GTID_MONITOR) {
6478 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6479 "registered, or system shutdown\n"));
6481 }
else if (gtid == KMP_GTID_DNE) {
6482 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6485 }
else if (KMP_UBER_GTID(gtid)) {
6487 if (__kmp_root[gtid]->r.r_active) {
6488 __kmp_global.g.g_abort = -1;
6489 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6490 __kmp_unregister_library();
6492 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6496 __kmp_itthash_clean(__kmp_threads[gtid]);
6499 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6500 __kmp_unregister_root_current_thread(gtid);
6507 #ifdef DUMP_DEBUG_ON_EXIT
6508 if (__kmp_debug_buf)
6509 __kmp_dump_debug_buffer();
6514 __kmp_unregister_library();
6519 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6522 if (__kmp_global.g.g_abort) {
6523 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6525 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6528 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6529 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6538 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6541 __kmp_internal_end();
6543 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6544 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6546 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6548 #ifdef DUMP_DEBUG_ON_EXIT
6549 if (__kmp_debug_buf)
6550 __kmp_dump_debug_buffer();
6554 __kmp_close_console();
6557 __kmp_fini_allocator();
6561 void __kmp_internal_end_thread(
int gtid_req) {
6570 if (__kmp_global.g.g_abort) {
6571 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6575 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6576 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6581 if (TCR_4(__kmp_init_hidden_helper) &&
6582 !TCR_4(__kmp_hidden_helper_team_done)) {
6583 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6585 __kmp_hidden_helper_main_thread_release();
6587 __kmp_hidden_helper_threads_deinitz_wait();
6594 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6596 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6597 if (gtid == KMP_GTID_SHUTDOWN) {
6598 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6599 "already shutdown\n"));
6601 }
else if (gtid == KMP_GTID_MONITOR) {
6602 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6603 "registered, or system shutdown\n"));
6605 }
else if (gtid == KMP_GTID_DNE) {
6606 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6610 }
else if (KMP_UBER_GTID(gtid)) {
6612 if (__kmp_root[gtid]->r.r_active) {
6613 __kmp_global.g.g_abort = -1;
6614 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6616 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6620 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6622 __kmp_unregister_root_current_thread(gtid);
6626 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6629 __kmp_threads[gtid]->th.th_task_team = NULL;
6633 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6639 if (__kmp_pause_status != kmp_hard_paused)
6643 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6648 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6651 if (__kmp_global.g.g_abort) {
6652 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6654 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6657 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6658 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6669 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6671 for (i = 0; i < __kmp_threads_capacity; ++i) {
6672 if (KMP_UBER_GTID(i)) {
6675 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6676 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6677 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6684 __kmp_internal_end();
6686 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6687 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6689 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6691 #ifdef DUMP_DEBUG_ON_EXIT
6692 if (__kmp_debug_buf)
6693 __kmp_dump_debug_buffer();
6700 static long __kmp_registration_flag = 0;
6702 static char *__kmp_registration_str = NULL;
6705 static inline char *__kmp_reg_status_name() {
6711 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6712 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6715 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6719 #if defined(KMP_USE_SHM)
6721 char *temp_reg_status_file_name =
nullptr;
6724 void __kmp_register_library_startup(
void) {
6726 char *name = __kmp_reg_status_name();
6732 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6733 __kmp_initialize_system_tick();
6735 __kmp_read_system_time(&time.dtime);
6736 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6737 __kmp_registration_str =
6738 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6739 __kmp_registration_flag, KMP_LIBRARY_FILE);
6741 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6742 __kmp_registration_str));
6748 #if defined(KMP_USE_SHM)
6749 char *shm_name = __kmp_str_format(
"/%s", name);
6750 int shm_preexist = 0;
6752 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6753 if ((fd1 == -1) && (errno == EEXIST)) {
6756 fd1 = shm_open(shm_name, O_RDWR, 0666);
6759 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6765 }
else if (fd1 == -1) {
6770 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6771 fd1 = mkstemp(temp_file_name);
6774 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6777 temp_reg_status_file_name = temp_file_name;
6779 if (shm_preexist == 0) {
6781 if (ftruncate(fd1, SHM_SIZE) == -1) {
6783 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6784 KMP_ERR(errno), __kmp_msg_null);
6788 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6789 if (data1 == MAP_FAILED) {
6791 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6794 if (shm_preexist == 0) {
6795 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6798 value = __kmp_str_format(
"%s", data1);
6799 munmap(data1, SHM_SIZE);
6803 __kmp_env_set(name, __kmp_registration_str, 0);
6805 value = __kmp_env_get(name);
6808 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6815 char *flag_addr_str = NULL;
6816 char *flag_val_str = NULL;
6817 char const *file_name = NULL;
6818 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6819 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6822 unsigned long *flag_addr = 0;
6823 unsigned long flag_val = 0;
6824 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6825 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6826 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6830 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6844 file_name =
"unknown library";
6849 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6850 if (!__kmp_str_match_true(duplicate_ok)) {
6852 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6853 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6855 KMP_INTERNAL_FREE(duplicate_ok);
6856 __kmp_duplicate_library_ok = 1;
6861 #if defined(KMP_USE_SHM)
6863 shm_unlink(shm_name);
6866 __kmp_env_unset(name);
6870 KMP_DEBUG_ASSERT(0);
6874 KMP_INTERNAL_FREE((
void *)value);
6875 #if defined(KMP_USE_SHM)
6876 KMP_INTERNAL_FREE((
void *)shm_name);
6879 KMP_INTERNAL_FREE((
void *)name);
6883 void __kmp_unregister_library(
void) {
6885 char *name = __kmp_reg_status_name();
6888 #if defined(KMP_USE_SHM)
6889 bool use_shm =
true;
6890 char *shm_name = __kmp_str_format(
"/%s", name);
6891 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6895 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6896 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6902 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6903 if (data1 != MAP_FAILED) {
6904 value = __kmp_str_format(
"%s", data1);
6905 munmap(data1, SHM_SIZE);
6909 value = __kmp_env_get(name);
6912 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6913 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6914 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6916 #if defined(KMP_USE_SHM)
6918 shm_unlink(shm_name);
6920 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6921 unlink(temp_reg_status_file_name);
6924 __kmp_env_unset(name);
6928 #if defined(KMP_USE_SHM)
6929 KMP_INTERNAL_FREE(shm_name);
6931 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6932 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6936 KMP_INTERNAL_FREE(__kmp_registration_str);
6937 KMP_INTERNAL_FREE(value);
6938 KMP_INTERNAL_FREE(name);
6940 __kmp_registration_flag = 0;
6941 __kmp_registration_str = NULL;
6948 #if KMP_MIC_SUPPORTED
6950 static void __kmp_check_mic_type() {
6951 kmp_cpuid_t cpuid_state = {0};
6952 kmp_cpuid_t *cs_p = &cpuid_state;
6953 __kmp_x86_cpuid(1, 0, cs_p);
6955 if ((cs_p->eax & 0xff0) == 0xB10) {
6956 __kmp_mic_type = mic2;
6957 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6958 __kmp_mic_type = mic3;
6960 __kmp_mic_type = non_mic;
6967 static void __kmp_user_level_mwait_init() {
6968 struct kmp_cpuid buf;
6969 __kmp_x86_cpuid(7, 0, &buf);
6970 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6971 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6972 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6973 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6974 __kmp_umwait_enabled));
6976 #elif KMP_HAVE_MWAIT
6977 #ifndef AT_INTELPHIUSERMWAIT
6980 #define AT_INTELPHIUSERMWAIT 10000
6985 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6986 unsigned long getauxval(
unsigned long) {
return 0; }
6988 static void __kmp_user_level_mwait_init() {
6993 if (__kmp_mic_type == mic3) {
6994 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6995 if ((res & 0x1) || __kmp_user_level_mwait) {
6996 __kmp_mwait_enabled = TRUE;
6997 if (__kmp_user_level_mwait) {
6998 KMP_INFORM(EnvMwaitWarn);
7001 __kmp_mwait_enabled = FALSE;
7004 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7005 "__kmp_mwait_enabled = %d\n",
7006 __kmp_mic_type, __kmp_mwait_enabled));
7010 static void __kmp_do_serial_initialize(
void) {
7014 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7016 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7017 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7018 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7019 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7020 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7030 __kmp_validate_locks();
7032 #if ENABLE_LIBOMPTARGET
7034 __kmp_init_omptarget();
7038 __kmp_init_allocator();
7044 if (__kmp_need_register_serial)
7045 __kmp_register_library_startup();
7048 if (TCR_4(__kmp_global.g.g_done)) {
7049 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7052 __kmp_global.g.g_abort = 0;
7053 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7056 #if KMP_USE_ADAPTIVE_LOCKS
7057 #if KMP_DEBUG_ADAPTIVE_LOCKS
7058 __kmp_init_speculative_stats();
7061 #if KMP_STATS_ENABLED
7064 __kmp_init_lock(&__kmp_global_lock);
7065 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7066 __kmp_init_lock(&__kmp_debug_lock);
7067 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7068 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7069 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7070 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7071 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7072 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7073 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7074 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7075 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7076 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7077 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7078 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7079 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7080 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7081 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7083 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7085 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7089 __kmp_runtime_initialize();
7091 #if KMP_MIC_SUPPORTED
7092 __kmp_check_mic_type();
7099 __kmp_abort_delay = 0;
7103 __kmp_dflt_team_nth_ub = __kmp_xproc;
7104 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7105 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7107 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7108 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7110 __kmp_max_nth = __kmp_sys_max_nth;
7111 __kmp_cg_max_nth = __kmp_sys_max_nth;
7112 __kmp_teams_max_nth = __kmp_xproc;
7113 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7114 __kmp_teams_max_nth = __kmp_sys_max_nth;
7119 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7121 __kmp_monitor_wakeups =
7122 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7123 __kmp_bt_intervals =
7124 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7127 __kmp_library = library_throughput;
7129 __kmp_static = kmp_sch_static_balanced;
7136 #if KMP_FAST_REDUCTION_BARRIER
7137 #define kmp_reduction_barrier_gather_bb ((int)1)
7138 #define kmp_reduction_barrier_release_bb ((int)1)
7139 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7140 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7142 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7143 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7144 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7145 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7146 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7147 #if KMP_FAST_REDUCTION_BARRIER
7148 if (i == bs_reduction_barrier) {
7150 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7151 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7152 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7153 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7157 #if KMP_FAST_REDUCTION_BARRIER
7158 #undef kmp_reduction_barrier_release_pat
7159 #undef kmp_reduction_barrier_gather_pat
7160 #undef kmp_reduction_barrier_release_bb
7161 #undef kmp_reduction_barrier_gather_bb
7163 #if KMP_MIC_SUPPORTED
7164 if (__kmp_mic_type == mic2) {
7166 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7167 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7169 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7170 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7172 #if KMP_FAST_REDUCTION_BARRIER
7173 if (__kmp_mic_type == mic2) {
7174 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7175 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7182 __kmp_env_checks = TRUE;
7184 __kmp_env_checks = FALSE;
7188 __kmp_foreign_tp = TRUE;
7190 __kmp_global.g.g_dynamic = FALSE;
7191 __kmp_global.g.g_dynamic_mode = dynamic_default;
7193 __kmp_init_nesting_mode();
7195 __kmp_env_initialize(NULL);
7197 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7198 __kmp_user_level_mwait_init();
7202 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7203 if (__kmp_str_match_true(val)) {
7204 kmp_str_buf_t buffer;
7205 __kmp_str_buf_init(&buffer);
7206 __kmp_i18n_dump_catalog(&buffer);
7207 __kmp_printf(
"%s", buffer.str);
7208 __kmp_str_buf_free(&buffer);
7210 __kmp_env_free(&val);
7213 __kmp_threads_capacity =
7214 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7216 __kmp_tp_capacity = __kmp_default_tp_capacity(
7217 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7222 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7223 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7224 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7225 __kmp_thread_pool = NULL;
7226 __kmp_thread_pool_insert_pt = NULL;
7227 __kmp_team_pool = NULL;
7234 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7236 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7237 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7238 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7241 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7243 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7248 gtid = __kmp_register_root(TRUE);
7249 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7250 KMP_ASSERT(KMP_UBER_GTID(gtid));
7251 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7255 __kmp_common_initialize();
7259 __kmp_register_atfork();
7262 #if !KMP_DYNAMIC_LIB || \
7263 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7268 int rc = atexit(__kmp_internal_end_atexit);
7270 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7276 #if KMP_HANDLE_SIGNALS
7282 __kmp_install_signals(FALSE);
7285 __kmp_install_signals(TRUE);
7290 __kmp_init_counter++;
7292 __kmp_init_serial = TRUE;
7294 if (__kmp_version) {
7295 __kmp_print_version_1();
7298 if (__kmp_settings) {
7302 if (__kmp_display_env || __kmp_display_env_verbose) {
7303 __kmp_env_print_2();
7312 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7315 void __kmp_serial_initialize(
void) {
7316 if (__kmp_init_serial) {
7319 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7320 if (__kmp_init_serial) {
7321 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7324 __kmp_do_serial_initialize();
7325 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7328 static void __kmp_do_middle_initialize(
void) {
7330 int prev_dflt_team_nth;
7332 if (!__kmp_init_serial) {
7333 __kmp_do_serial_initialize();
7336 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7338 if (UNLIKELY(!__kmp_need_register_serial)) {
7341 __kmp_register_library_startup();
7346 prev_dflt_team_nth = __kmp_dflt_team_nth;
7348 #if KMP_AFFINITY_SUPPORTED
7351 __kmp_affinity_initialize(__kmp_affinity);
7355 KMP_ASSERT(__kmp_xproc > 0);
7356 if (__kmp_avail_proc == 0) {
7357 __kmp_avail_proc = __kmp_xproc;
7363 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7364 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7369 if (__kmp_dflt_team_nth == 0) {
7370 #ifdef KMP_DFLT_NTH_CORES
7372 __kmp_dflt_team_nth = __kmp_ncores;
7373 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7374 "__kmp_ncores (%d)\n",
7375 __kmp_dflt_team_nth));
7378 __kmp_dflt_team_nth = __kmp_avail_proc;
7379 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7380 "__kmp_avail_proc(%d)\n",
7381 __kmp_dflt_team_nth));
7385 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7386 __kmp_dflt_team_nth = KMP_MIN_NTH;
7388 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7389 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7392 if (__kmp_nesting_mode > 0)
7393 __kmp_set_nesting_mode_threads();
7397 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7399 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7404 for (i = 0; i < __kmp_threads_capacity; i++) {
7405 kmp_info_t *thread = __kmp_threads[i];
7408 if (thread->th.th_current_task->td_icvs.nproc != 0)
7411 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7416 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7417 __kmp_dflt_team_nth));
7419 #ifdef KMP_ADJUST_BLOCKTIME
7421 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7422 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7423 if (__kmp_nth > __kmp_avail_proc) {
7424 __kmp_zero_bt = TRUE;
7430 TCW_SYNC_4(__kmp_init_middle, TRUE);
7432 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7435 void __kmp_middle_initialize(
void) {
7436 if (__kmp_init_middle) {
7439 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7440 if (__kmp_init_middle) {
7441 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7444 __kmp_do_middle_initialize();
7445 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7448 void __kmp_parallel_initialize(
void) {
7449 int gtid = __kmp_entry_gtid();
7452 if (TCR_4(__kmp_init_parallel))
7454 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7455 if (TCR_4(__kmp_init_parallel)) {
7456 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7461 if (TCR_4(__kmp_global.g.g_done)) {
7464 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7465 __kmp_infinite_loop();
7471 if (!__kmp_init_middle) {
7472 __kmp_do_middle_initialize();
7474 __kmp_assign_root_init_mask();
7475 __kmp_resume_if_hard_paused();
7478 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7479 KMP_ASSERT(KMP_UBER_GTID(gtid));
7481 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7484 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7485 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7486 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7490 #if KMP_HANDLE_SIGNALS
7492 __kmp_install_signals(TRUE);
7496 __kmp_suspend_initialize();
7498 #if defined(USE_LOAD_BALANCE)
7499 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7500 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7503 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7504 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7508 if (__kmp_version) {
7509 __kmp_print_version_2();
7513 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7516 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7518 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7521 void __kmp_hidden_helper_initialize() {
7522 if (TCR_4(__kmp_init_hidden_helper))
7526 if (!TCR_4(__kmp_init_parallel))
7527 __kmp_parallel_initialize();
7531 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7532 if (TCR_4(__kmp_init_hidden_helper)) {
7533 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7537 #if KMP_AFFINITY_SUPPORTED
7541 if (!__kmp_hh_affinity.flags.initialized)
7542 __kmp_affinity_initialize(__kmp_hh_affinity);
7546 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7550 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7553 __kmp_do_initialize_hidden_helper_threads();
7556 __kmp_hidden_helper_threads_initz_wait();
7559 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7561 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7566 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7568 kmp_disp_t *dispatch;
7573 this_thr->th.th_local.this_construct = 0;
7574 #if KMP_CACHE_MANAGE
7575 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7577 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7578 KMP_DEBUG_ASSERT(dispatch);
7579 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7583 dispatch->th_disp_index = 0;
7584 dispatch->th_doacross_buf_idx = 0;
7585 if (__kmp_env_consistency_check)
7586 __kmp_push_parallel(gtid, team->t.t_ident);
7591 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7593 if (__kmp_env_consistency_check)
7594 __kmp_pop_parallel(gtid, team->t.t_ident);
7596 __kmp_finish_implicit_task(this_thr);
7599 int __kmp_invoke_task_func(
int gtid) {
7601 int tid = __kmp_tid_from_gtid(gtid);
7602 kmp_info_t *this_thr = __kmp_threads[gtid];
7603 kmp_team_t *team = this_thr->th.th_team;
7605 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7607 if (__itt_stack_caller_create_ptr) {
7609 if (team->t.t_stack_id != NULL) {
7610 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7612 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7613 __kmp_itt_stack_callee_enter(
7614 (__itt_caller)team->t.t_parent->t.t_stack_id);
7618 #if INCLUDE_SSC_MARKS
7619 SSC_MARK_INVOKING();
7624 void **exit_frame_p;
7625 ompt_data_t *my_task_data;
7626 ompt_data_t *my_parallel_data;
7629 if (ompt_enabled.enabled) {
7630 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7631 .ompt_task_info.frame.exit_frame.ptr);
7633 exit_frame_p = &dummy;
7637 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7638 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7639 if (ompt_enabled.ompt_callback_implicit_task) {
7640 ompt_team_size = team->t.t_nproc;
7641 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7642 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7643 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7644 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7648 #if KMP_STATS_ENABLED
7650 if (previous_state == stats_state_e::TEAMS_REGION) {
7651 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7653 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7655 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7658 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7659 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7666 *exit_frame_p = NULL;
7667 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7670 #if KMP_STATS_ENABLED
7671 if (previous_state == stats_state_e::TEAMS_REGION) {
7672 KMP_SET_THREAD_STATE(previous_state);
7674 KMP_POP_PARTITIONED_TIMER();
7678 if (__itt_stack_caller_create_ptr) {
7680 if (team->t.t_stack_id != NULL) {
7681 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7683 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7684 __kmp_itt_stack_callee_leave(
7685 (__itt_caller)team->t.t_parent->t.t_stack_id);
7689 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7694 void __kmp_teams_master(
int gtid) {
7696 kmp_info_t *thr = __kmp_threads[gtid];
7697 kmp_team_t *team = thr->th.th_team;
7698 ident_t *loc = team->t.t_ident;
7699 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7700 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7701 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7702 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7703 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7706 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7709 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7710 tmp->cg_nthreads = 1;
7711 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7712 " cg_nthreads to 1\n",
7714 tmp->up = thr->th.th_cg_roots;
7715 thr->th.th_cg_roots = tmp;
7719 #if INCLUDE_SSC_MARKS
7722 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7723 (microtask_t)thr->th.th_teams_microtask,
7724 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7725 #if INCLUDE_SSC_MARKS
7729 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7730 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7733 __kmp_join_call(loc, gtid
7742 int __kmp_invoke_teams_master(
int gtid) {
7743 kmp_info_t *this_thr = __kmp_threads[gtid];
7744 kmp_team_t *team = this_thr->th.th_team;
7746 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7747 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7748 (
void *)__kmp_teams_master);
7750 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7752 int tid = __kmp_tid_from_gtid(gtid);
7753 ompt_data_t *task_data =
7754 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7755 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7756 if (ompt_enabled.ompt_callback_implicit_task) {
7757 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7758 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7760 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7763 __kmp_teams_master(gtid);
7765 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7767 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7776 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7777 kmp_info_t *thr = __kmp_threads[gtid];
7779 if (num_threads > 0)
7780 thr->th.th_set_nproc = num_threads;
7783 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7785 KMP_DEBUG_ASSERT(thr);
7787 if (!TCR_4(__kmp_init_middle))
7788 __kmp_middle_initialize();
7789 __kmp_assign_root_init_mask();
7790 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7791 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7793 if (num_threads == 0) {
7794 if (__kmp_teams_thread_limit > 0) {
7795 num_threads = __kmp_teams_thread_limit;
7797 num_threads = __kmp_avail_proc / num_teams;
7802 if (num_threads > __kmp_dflt_team_nth) {
7803 num_threads = __kmp_dflt_team_nth;
7805 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7806 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7808 if (num_teams * num_threads > __kmp_teams_max_nth) {
7809 num_threads = __kmp_teams_max_nth / num_teams;
7811 if (num_threads == 0) {
7815 if (num_threads < 0) {
7816 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7822 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7824 if (num_threads > __kmp_dflt_team_nth) {
7825 num_threads = __kmp_dflt_team_nth;
7827 if (num_teams * num_threads > __kmp_teams_max_nth) {
7828 int new_threads = __kmp_teams_max_nth / num_teams;
7829 if (new_threads == 0) {
7832 if (new_threads != num_threads) {
7833 if (!__kmp_reserve_warn) {
7834 __kmp_reserve_warn = 1;
7835 __kmp_msg(kmp_ms_warning,
7836 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7837 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7840 num_threads = new_threads;
7843 thr->th.th_teams_size.nth = num_threads;
7848 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7850 kmp_info_t *thr = __kmp_threads[gtid];
7851 if (num_teams < 0) {
7854 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7858 if (num_teams == 0) {
7859 if (__kmp_nteams > 0) {
7860 num_teams = __kmp_nteams;
7865 if (num_teams > __kmp_teams_max_nth) {
7866 if (!__kmp_reserve_warn) {
7867 __kmp_reserve_warn = 1;
7868 __kmp_msg(kmp_ms_warning,
7869 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7870 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7872 num_teams = __kmp_teams_max_nth;
7876 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7878 __kmp_push_thread_limit(thr, num_teams, num_threads);
7883 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7884 int num_teams_ub,
int num_threads) {
7885 kmp_info_t *thr = __kmp_threads[gtid];
7886 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7887 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7888 KMP_DEBUG_ASSERT(num_threads >= 0);
7890 if (num_teams_lb > num_teams_ub) {
7891 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7892 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7897 if (num_teams_lb == 0 && num_teams_ub > 0)
7898 num_teams_lb = num_teams_ub;
7900 if (num_teams_lb == 0 && num_teams_ub == 0) {
7901 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7902 if (num_teams > __kmp_teams_max_nth) {
7903 if (!__kmp_reserve_warn) {
7904 __kmp_reserve_warn = 1;
7905 __kmp_msg(kmp_ms_warning,
7906 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7907 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7909 num_teams = __kmp_teams_max_nth;
7911 }
else if (num_teams_lb == num_teams_ub) {
7912 num_teams = num_teams_ub;
7914 if (num_threads <= 0) {
7915 if (num_teams_ub > __kmp_teams_max_nth) {
7916 num_teams = num_teams_lb;
7918 num_teams = num_teams_ub;
7921 num_teams = (num_threads > __kmp_teams_max_nth)
7923 : __kmp_teams_max_nth / num_threads;
7924 if (num_teams < num_teams_lb) {
7925 num_teams = num_teams_lb;
7926 }
else if (num_teams > num_teams_ub) {
7927 num_teams = num_teams_ub;
7933 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7935 __kmp_push_thread_limit(thr, num_teams, num_threads);
7939 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7940 kmp_info_t *thr = __kmp_threads[gtid];
7941 thr->th.th_set_proc_bind = proc_bind;
7946 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7947 kmp_info_t *this_thr = __kmp_threads[gtid];
7953 KMP_DEBUG_ASSERT(team);
7954 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7955 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7958 team->t.t_construct = 0;
7959 team->t.t_ordered.dt.t_value =
7963 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7964 if (team->t.t_max_nproc > 1) {
7966 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7967 team->t.t_disp_buffer[i].buffer_index = i;
7968 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7971 team->t.t_disp_buffer[0].buffer_index = 0;
7972 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7976 KMP_ASSERT(this_thr->th.th_team == team);
7979 for (f = 0; f < team->t.t_nproc; f++) {
7980 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7981 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7986 __kmp_fork_barrier(gtid, 0);
7989 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7990 kmp_info_t *this_thr = __kmp_threads[gtid];
7992 KMP_DEBUG_ASSERT(team);
7993 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7994 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8000 if (__kmp_threads[gtid] &&
8001 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8002 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8003 __kmp_threads[gtid]);
8004 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8005 "team->t.t_nproc=%d\n",
8006 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8008 __kmp_print_structure();
8010 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8011 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8014 __kmp_join_barrier(gtid);
8016 if (ompt_enabled.enabled &&
8017 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8018 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8019 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8020 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8022 void *codeptr = NULL;
8023 if (KMP_MASTER_TID(ds_tid) &&
8024 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8025 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8026 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8028 if (ompt_enabled.ompt_callback_sync_region_wait) {
8029 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8030 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8033 if (ompt_enabled.ompt_callback_sync_region) {
8034 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8035 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8039 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8040 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8041 ompt_scope_end, NULL, task_data, 0, ds_tid,
8042 ompt_task_implicit);
8048 KMP_ASSERT(this_thr->th.th_team == team);
8053 #ifdef USE_LOAD_BALANCE
8057 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8060 kmp_team_t *hot_team;
8062 if (root->r.r_active) {
8065 hot_team = root->r.r_hot_team;
8066 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8067 return hot_team->t.t_nproc - 1;
8072 for (i = 1; i < hot_team->t.t_nproc; i++) {
8073 if (hot_team->t.t_threads[i]->th.th_active) {
8082 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8085 int hot_team_active;
8086 int team_curr_active;
8089 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8091 KMP_DEBUG_ASSERT(root);
8092 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8093 ->th.th_current_task->td_icvs.dynamic == TRUE);
8094 KMP_DEBUG_ASSERT(set_nproc > 1);
8096 if (set_nproc == 1) {
8097 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8106 pool_active = __kmp_thread_pool_active_nth;
8107 hot_team_active = __kmp_active_hot_team_nproc(root);
8108 team_curr_active = pool_active + hot_team_active + 1;
8111 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8112 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8113 "hot team active = %d\n",
8114 system_active, pool_active, hot_team_active));
8116 if (system_active < 0) {
8120 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8121 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8124 retval = __kmp_avail_proc - __kmp_nth +
8125 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8126 if (retval > set_nproc) {
8129 if (retval < KMP_MIN_NTH) {
8130 retval = KMP_MIN_NTH;
8133 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8141 if (system_active < team_curr_active) {
8142 system_active = team_curr_active;
8144 retval = __kmp_avail_proc - system_active + team_curr_active;
8145 if (retval > set_nproc) {
8148 if (retval < KMP_MIN_NTH) {
8149 retval = KMP_MIN_NTH;
8152 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8161 void __kmp_cleanup(
void) {
8164 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8166 if (TCR_4(__kmp_init_parallel)) {
8167 #if KMP_HANDLE_SIGNALS
8168 __kmp_remove_signals();
8170 TCW_4(__kmp_init_parallel, FALSE);
8173 if (TCR_4(__kmp_init_middle)) {
8174 #if KMP_AFFINITY_SUPPORTED
8175 __kmp_affinity_uninitialize();
8177 __kmp_cleanup_hierarchy();
8178 TCW_4(__kmp_init_middle, FALSE);
8181 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8183 if (__kmp_init_serial) {
8184 __kmp_runtime_destroy();
8185 __kmp_init_serial = FALSE;
8188 __kmp_cleanup_threadprivate_caches();
8190 for (f = 0; f < __kmp_threads_capacity; f++) {
8191 if (__kmp_root[f] != NULL) {
8192 __kmp_free(__kmp_root[f]);
8193 __kmp_root[f] = NULL;
8196 __kmp_free(__kmp_threads);
8199 __kmp_threads = NULL;
8201 __kmp_threads_capacity = 0;
8204 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8206 kmp_old_threads_list_t *next = ptr->next;
8207 __kmp_free(ptr->threads);
8212 #if KMP_USE_DYNAMIC_LOCK
8213 __kmp_cleanup_indirect_user_locks();
8215 __kmp_cleanup_user_locks();
8219 __kmp_free(ompd_env_block);
8220 ompd_env_block = NULL;
8221 ompd_env_block_size = 0;
8225 #if KMP_AFFINITY_SUPPORTED
8226 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8227 __kmp_cpuinfo_file = NULL;
8230 #if KMP_USE_ADAPTIVE_LOCKS
8231 #if KMP_DEBUG_ADAPTIVE_LOCKS
8232 __kmp_print_speculative_stats();
8235 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8236 __kmp_nested_nth.nth = NULL;
8237 __kmp_nested_nth.size = 0;
8238 __kmp_nested_nth.used = 0;
8239 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8240 __kmp_nested_proc_bind.bind_types = NULL;
8241 __kmp_nested_proc_bind.size = 0;
8242 __kmp_nested_proc_bind.used = 0;
8243 if (__kmp_affinity_format) {
8244 KMP_INTERNAL_FREE(__kmp_affinity_format);
8245 __kmp_affinity_format = NULL;
8248 __kmp_i18n_catclose();
8250 #if KMP_USE_HIER_SCHED
8251 __kmp_hier_scheds.deallocate();
8254 #if KMP_STATS_ENABLED
8258 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8263 int __kmp_ignore_mppbeg(
void) {
8266 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8267 if (__kmp_str_match_false(env))
8274 int __kmp_ignore_mppend(
void) {
8277 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8278 if (__kmp_str_match_false(env))
8285 void __kmp_internal_begin(
void) {
8291 gtid = __kmp_entry_gtid();
8292 root = __kmp_threads[gtid]->th.th_root;
8293 KMP_ASSERT(KMP_UBER_GTID(gtid));
8295 if (root->r.r_begin)
8297 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8298 if (root->r.r_begin) {
8299 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8303 root->r.r_begin = TRUE;
8305 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8310 void __kmp_user_set_library(
enum library_type arg) {
8317 gtid = __kmp_entry_gtid();
8318 thread = __kmp_threads[gtid];
8320 root = thread->th.th_root;
8322 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8324 if (root->r.r_in_parallel) {
8326 KMP_WARNING(SetLibraryIncorrectCall);
8331 case library_serial:
8332 thread->th.th_set_nproc = 0;
8333 set__nproc(thread, 1);
8335 case library_turnaround:
8336 thread->th.th_set_nproc = 0;
8337 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8338 : __kmp_dflt_team_nth_ub);
8340 case library_throughput:
8341 thread->th.th_set_nproc = 0;
8342 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8343 : __kmp_dflt_team_nth_ub);
8346 KMP_FATAL(UnknownLibraryType, arg);
8349 __kmp_aux_set_library(arg);
8352 void __kmp_aux_set_stacksize(
size_t arg) {
8353 if (!__kmp_init_serial)
8354 __kmp_serial_initialize();
8357 if (arg & (0x1000 - 1)) {
8358 arg &= ~(0x1000 - 1);
8363 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8366 if (!TCR_4(__kmp_init_parallel)) {
8369 if (value < __kmp_sys_min_stksize)
8370 value = __kmp_sys_min_stksize;
8371 else if (value > KMP_MAX_STKSIZE)
8372 value = KMP_MAX_STKSIZE;
8374 __kmp_stksize = value;
8376 __kmp_env_stksize = TRUE;
8379 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8384 void __kmp_aux_set_library(
enum library_type arg) {
8385 __kmp_library = arg;
8387 switch (__kmp_library) {
8388 case library_serial: {
8389 KMP_INFORM(LibraryIsSerial);
8391 case library_turnaround:
8392 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8393 __kmp_use_yield = 2;
8395 case library_throughput:
8396 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8397 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8400 KMP_FATAL(UnknownLibraryType, arg);
8406 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8407 kmp_info_t *thr = __kmp_entry_thread();
8408 teams_serialized = 0;
8409 if (thr->th.th_teams_microtask) {
8410 kmp_team_t *team = thr->th.th_team;
8411 int tlevel = thr->th.th_teams_level;
8412 int ii = team->t.t_level;
8413 teams_serialized = team->t.t_serialized;
8414 int level = tlevel + 1;
8415 KMP_DEBUG_ASSERT(ii >= tlevel);
8416 while (ii > level) {
8417 for (teams_serialized = team->t.t_serialized;
8418 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8420 if (team->t.t_serialized && (!teams_serialized)) {
8421 team = team->t.t_parent;
8425 team = team->t.t_parent;
8434 int __kmp_aux_get_team_num() {
8436 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8438 if (serialized > 1) {
8441 return team->t.t_master_tid;
8447 int __kmp_aux_get_num_teams() {
8449 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8451 if (serialized > 1) {
8454 return team->t.t_parent->t.t_nproc;
8493 typedef struct kmp_affinity_format_field_t {
8495 const char *long_name;
8498 } kmp_affinity_format_field_t;
8500 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8501 #if KMP_AFFINITY_SUPPORTED
8502 {
'A',
"thread_affinity",
's'},
8504 {
't',
"team_num",
'd'},
8505 {
'T',
"num_teams",
'd'},
8506 {
'L',
"nesting_level",
'd'},
8507 {
'n',
"thread_num",
'd'},
8508 {
'N',
"num_threads",
'd'},
8509 {
'a',
"ancestor_tnum",
'd'},
8511 {
'P',
"process_id",
'd'},
8512 {
'i',
"native_thread_id",
'd'}};
8515 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8517 kmp_str_buf_t *field_buffer) {
8518 int rc, format_index, field_value;
8519 const char *width_left, *width_right;
8520 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8521 static const int FORMAT_SIZE = 20;
8522 char format[FORMAT_SIZE] = {0};
8523 char absolute_short_name = 0;
8525 KMP_DEBUG_ASSERT(gtid >= 0);
8526 KMP_DEBUG_ASSERT(th);
8527 KMP_DEBUG_ASSERT(**ptr ==
'%');
8528 KMP_DEBUG_ASSERT(field_buffer);
8530 __kmp_str_buf_clear(field_buffer);
8537 __kmp_str_buf_cat(field_buffer,
"%", 1);
8548 right_justify =
false;
8550 right_justify =
true;
8554 width_left = width_right = NULL;
8555 if (**ptr >=
'0' && **ptr <=
'9') {
8563 format[format_index++] =
'%';
8565 format[format_index++] =
'-';
8567 format[format_index++] =
'0';
8568 if (width_left && width_right) {
8572 while (i < 8 && width_left < width_right) {
8573 format[format_index++] = *width_left;
8581 found_valid_name =
false;
8582 parse_long_name = (**ptr ==
'{');
8583 if (parse_long_name)
8585 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8586 sizeof(__kmp_affinity_format_table[0]);
8588 char short_name = __kmp_affinity_format_table[i].short_name;
8589 const char *long_name = __kmp_affinity_format_table[i].long_name;
8590 char field_format = __kmp_affinity_format_table[i].field_format;
8591 if (parse_long_name) {
8592 size_t length = KMP_STRLEN(long_name);
8593 if (strncmp(*ptr, long_name, length) == 0) {
8594 found_valid_name =
true;
8597 }
else if (**ptr == short_name) {
8598 found_valid_name =
true;
8601 if (found_valid_name) {
8602 format[format_index++] = field_format;
8603 format[format_index++] =
'\0';
8604 absolute_short_name = short_name;
8608 if (parse_long_name) {
8610 absolute_short_name = 0;
8618 switch (absolute_short_name) {
8620 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8623 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8626 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8629 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8632 static const int BUFFER_SIZE = 256;
8633 char buf[BUFFER_SIZE];
8634 __kmp_expand_host_name(buf, BUFFER_SIZE);
8635 rc = __kmp_str_buf_print(field_buffer, format, buf);
8638 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8641 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8644 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8648 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8649 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8651 #if KMP_AFFINITY_SUPPORTED
8654 __kmp_str_buf_init(&buf);
8655 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8656 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8657 __kmp_str_buf_free(&buf);
8663 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8665 if (parse_long_name) {
8674 KMP_ASSERT(format_index <= FORMAT_SIZE);
8684 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8685 kmp_str_buf_t *buffer) {
8686 const char *parse_ptr;
8688 const kmp_info_t *th;
8689 kmp_str_buf_t field;
8691 KMP_DEBUG_ASSERT(buffer);
8692 KMP_DEBUG_ASSERT(gtid >= 0);
8694 __kmp_str_buf_init(&field);
8695 __kmp_str_buf_clear(buffer);
8697 th = __kmp_threads[gtid];
8703 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8704 parse_ptr = __kmp_affinity_format;
8706 KMP_DEBUG_ASSERT(parse_ptr);
8708 while (*parse_ptr !=
'\0') {
8710 if (*parse_ptr ==
'%') {
8712 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8713 __kmp_str_buf_catbuf(buffer, &field);
8717 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8722 __kmp_str_buf_free(&field);
8727 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8729 __kmp_str_buf_init(&buf);
8730 __kmp_aux_capture_affinity(gtid, format, &buf);
8731 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8732 __kmp_str_buf_free(&buf);
8736 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8737 int blocktime = arg;
8743 __kmp_save_internal_controls(thread);
8746 if (blocktime < KMP_MIN_BLOCKTIME)
8747 blocktime = KMP_MIN_BLOCKTIME;
8748 else if (blocktime > KMP_MAX_BLOCKTIME)
8749 blocktime = KMP_MAX_BLOCKTIME;
8751 set__blocktime_team(thread->th.th_team, tid, blocktime);
8752 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8756 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8758 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8759 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8765 set__bt_set_team(thread->th.th_team, tid, bt_set);
8766 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8768 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8769 "bt_intervals=%d, monitor_updates=%d\n",
8770 __kmp_gtid_from_tid(tid, thread->th.th_team),
8771 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8772 __kmp_monitor_wakeups));
8774 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8775 __kmp_gtid_from_tid(tid, thread->th.th_team),
8776 thread->th.th_team->t.t_id, tid, blocktime));
8780 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8781 if (!__kmp_init_serial) {
8782 __kmp_serial_initialize();
8784 __kmp_env_initialize(str);
8786 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8794 PACKED_REDUCTION_METHOD_T
8795 __kmp_determine_reduction_method(
8796 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8797 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8798 kmp_critical_name *lck) {
8809 PACKED_REDUCTION_METHOD_T retval;
8813 KMP_DEBUG_ASSERT(lck);
8815 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8817 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8818 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8820 retval = critical_reduce_block;
8823 team_size = __kmp_get_team_num_threads(global_tid);
8824 if (team_size == 1) {
8826 retval = empty_reduce_block;
8830 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8832 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8833 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE
8835 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8836 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8838 int teamsize_cutoff = 4;
8840 #if KMP_MIC_SUPPORTED
8841 if (__kmp_mic_type != non_mic) {
8842 teamsize_cutoff = 8;
8845 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8846 if (tree_available) {
8847 if (team_size <= teamsize_cutoff) {
8848 if (atomic_available) {
8849 retval = atomic_reduce_block;
8852 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8854 }
else if (atomic_available) {
8855 retval = atomic_reduce_block;
8858 #error "Unknown or unsupported OS"
8862 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8864 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8868 if (atomic_available) {
8869 if (num_vars <= 2) {
8870 retval = atomic_reduce_block;
8876 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8877 if (atomic_available && (num_vars <= 3)) {
8878 retval = atomic_reduce_block;
8879 }
else if (tree_available) {
8880 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8881 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8882 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8887 #error "Unknown or unsupported OS"
8891 #error "Unknown or unsupported architecture"
8899 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8902 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8904 int atomic_available, tree_available;
8906 switch ((forced_retval = __kmp_force_reduction_method)) {
8907 case critical_reduce_block:
8911 case atomic_reduce_block:
8912 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8913 if (!atomic_available) {
8914 KMP_WARNING(RedMethodNotSupported,
"atomic");
8915 forced_retval = critical_reduce_block;
8919 case tree_reduce_block:
8920 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8921 if (!tree_available) {
8922 KMP_WARNING(RedMethodNotSupported,
"tree");
8923 forced_retval = critical_reduce_block;
8925 #if KMP_FAST_REDUCTION_BARRIER
8926 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8935 retval = forced_retval;
8938 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8940 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8941 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8946 kmp_int32 __kmp_get_reduce_method(
void) {
8947 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8952 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8956 void __kmp_hard_pause() {
8957 __kmp_pause_status = kmp_hard_paused;
8958 __kmp_internal_end_thread(-1);
8962 void __kmp_resume_if_soft_paused() {
8963 if (__kmp_pause_status == kmp_soft_paused) {
8964 __kmp_pause_status = kmp_not_paused;
8966 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8967 kmp_info_t *thread = __kmp_threads[gtid];
8969 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8971 if (fl.is_sleeping())
8973 else if (__kmp_try_suspend_mx(thread)) {
8974 __kmp_unlock_suspend_mx(thread);
8977 if (fl.is_sleeping()) {
8980 }
else if (__kmp_try_suspend_mx(thread)) {
8981 __kmp_unlock_suspend_mx(thread);
8993 int __kmp_pause_resource(kmp_pause_status_t level) {
8994 if (level == kmp_not_paused) {
8995 if (__kmp_pause_status == kmp_not_paused) {
8999 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9000 __kmp_pause_status == kmp_hard_paused);
9001 __kmp_pause_status = kmp_not_paused;
9004 }
else if (level == kmp_soft_paused) {
9005 if (__kmp_pause_status != kmp_not_paused) {
9012 }
else if (level == kmp_hard_paused) {
9013 if (__kmp_pause_status != kmp_not_paused) {
9026 void __kmp_omp_display_env(
int verbose) {
9027 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9028 if (__kmp_init_serial == 0)
9029 __kmp_do_serial_initialize();
9030 __kmp_display_env_impl(!verbose, verbose);
9031 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9035 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9037 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9039 kmp_info_t **other_threads = team->t.t_threads;
9043 for (
int f = 1; f < old_nthreads; ++f) {
9044 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9046 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9052 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9053 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9057 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9059 team->t.t_threads[f]->th.th_used_in_team.store(2);
9060 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9063 team->t.b->go_release();
9069 int count = old_nthreads - 1;
9071 count = old_nthreads - 1;
9072 for (
int f = 1; f < old_nthreads; ++f) {
9073 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9074 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9075 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9076 void *, other_threads[f]->th.th_sleep_loc);
9077 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9080 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9086 team->t.b->update_num_threads(new_nthreads);
9087 team->t.b->go_reset();
9090 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9092 KMP_DEBUG_ASSERT(team);
9098 for (
int f = 1; f < new_nthreads; ++f) {
9099 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9100 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9102 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9103 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9104 (kmp_flag_32<false, false> *)NULL);
9110 int count = new_nthreads - 1;
9112 count = new_nthreads - 1;
9113 for (
int f = 1; f < new_nthreads; ++f) {
9114 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9122 kmp_info_t **__kmp_hidden_helper_threads;
9123 kmp_info_t *__kmp_hidden_helper_main_thread;
9124 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9126 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9127 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9129 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9130 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9134 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9136 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9141 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9142 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9143 __kmp_hidden_helper_threads_num)
9149 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9150 __kmp_hidden_helper_initz_release();
9151 __kmp_hidden_helper_main_thread_wait();
9153 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9154 __kmp_hidden_helper_worker_thread_signal();
9160 void __kmp_hidden_helper_threads_initz_routine() {
9162 const int gtid = __kmp_register_root(TRUE);
9163 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9164 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9165 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9166 __kmp_hidden_helper_threads_num;
9168 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9173 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9175 __kmp_hidden_helper_threads_deinitz_release();
9195 void __kmp_init_nesting_mode() {
9196 int levels = KMP_HW_LAST;
9197 __kmp_nesting_mode_nlevels = levels;
9198 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9199 for (
int i = 0; i < levels; ++i)
9200 __kmp_nesting_nth_level[i] = 0;
9201 if (__kmp_nested_nth.size < levels) {
9202 __kmp_nested_nth.nth =
9203 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9204 __kmp_nested_nth.size = levels;
9209 void __kmp_set_nesting_mode_threads() {
9210 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9212 if (__kmp_nesting_mode == 1)
9213 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9214 else if (__kmp_nesting_mode > 1)
9215 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9217 if (__kmp_topology) {
9219 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9220 loc < __kmp_nesting_mode_nlevels;
9221 loc++, hw_level++) {
9222 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9223 if (__kmp_nesting_nth_level[loc] == 1)
9227 if (__kmp_nesting_mode > 1 && loc > 1) {
9228 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9229 int num_cores = __kmp_topology->get_count(core_level);
9230 int upper_levels = 1;
9231 for (
int level = 0; level < loc - 1; ++level)
9232 upper_levels *= __kmp_nesting_nth_level[level];
9233 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9234 __kmp_nesting_nth_level[loc - 1] =
9235 num_cores / __kmp_nesting_nth_level[loc - 2];
9237 __kmp_nesting_mode_nlevels = loc;
9238 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9240 if (__kmp_avail_proc >= 4) {
9241 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9242 __kmp_nesting_nth_level[1] = 2;
9243 __kmp_nesting_mode_nlevels = 2;
9245 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9246 __kmp_nesting_mode_nlevels = 1;
9248 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9250 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9251 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9253 set__nproc(thread, __kmp_nesting_nth_level[0]);
9254 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9255 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9256 if (get__max_active_levels(thread) > 1) {
9258 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9260 if (__kmp_nesting_mode == 1)
9261 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9266 #if !KMP_STATS_ENABLED
9267 void __kmp_reset_stats() {}
9270 int __kmp_omp_debug_struct_info = FALSE;
9271 int __kmp_debugging = FALSE;
9273 #if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9274 void __kmp_itt_fini_ittlib() {}
9275 void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)