14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117 int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
138 #ifdef KMP_TDATA_GTID
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227 int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
233 #ifdef KMP_TDATA_GTID
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270 void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336 void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344 #define MAX_MESSAGE 512
346 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356 #if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410 void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428 void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462 void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495 #if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
500 #endif // KMP_FAST_REDUCTION_BARRIER
506 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527 #if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
532 #endif // KMP_FAST_REDUCTION_BARRIER
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548 static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611 #ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617 #if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623 #ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636 #ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645 #ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714 void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743 #ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935 #if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006 #if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1030 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034 inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK;
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1067 inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1071 kmp_int16 x87_fpu_control_word;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK;
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1088 #define propagateFPControl(x) ((void)0)
1089 #define updateHWFPControl(x) ((void)0)
1092 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1097 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1101 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1108 if (!TCR_4(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1116 KMP_DEBUG_ASSERT(serial_team);
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1121 this_thr->th.th_task_team ==
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1125 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1126 "team %p, new task_team = NULL\n",
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1128 this_thr->th.th_task_team = NULL;
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 }
else if (proc_bind == proc_bind_default) {
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1143 ompt_data_t ompt_parallel_data = ompt_data_none;
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1161 #endif // OMPT_SUPPORT
1163 if (this_thr->th.th_team != serial_team) {
1165 int level = this_thr->th.th_team->t.t_level;
1167 if (serial_team->t.t_serialized) {
1170 kmp_team_t *new_team;
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL));
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team);
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1192 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1193 global_tid, serial_team));
1201 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1202 global_tid, serial_team));
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1217 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1218 this_thr->th.th_current_task));
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1244 serial_team->t.t_pkfn = (microtask_t)(~0);
1246 this_thr->th.th_info.ds.ds_tid = 0;
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1257 propagateFPControl(serial_team);
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(
1264 sizeof(dispatch_private_info_t));
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1280 int level = this_thr->th.th_team->t.t_level;
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1289 "of serial team %p to %d\n",
1290 global_tid, serial_team, serial_team->t.t_level));
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(
1297 sizeof(dispatch_private_info_t));
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1313 __kmp_aux_display_affinity(global_tid, NULL);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL);
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0);
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1339 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit);
1341 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0);
1355 int __kmp_fork_call(
ident_t *loc,
int gtid,
1356 enum fork_context_e call_context,
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1362 int master_this_cons;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1369 int master_set_numthreads;
1373 #if KMP_NESTED_HOT_TEAMS
1374 kmp_hot_team_ptr_t **p_hot_teams;
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1380 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1393 if (!TCR_4(__kmp_init_parallel))
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_data_t ompt_parallel_data = ompt_data_none;
1409 ompt_data_t *parent_task_data;
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL;
1414 if (ompt_enabled.enabled) {
1415 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1422 __kmp_assign_root_init_mask();
1425 level = parent_team->t.t_level;
1427 active_level = parent_team->t.t_active_level;
1429 teams_level = master_th->th.th_teams_level;
1430 #if KMP_NESTED_HOT_TEAMS
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1442 if (ompt_enabled.enabled) {
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
1444 int team_size = master_set_numthreads
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid);
1447 int flags = OMPT_INVOKER(call_context) |
1448 ((microtask == (microtask_t)__kmp_teams_master)
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1459 master_th->th.th_ident = loc;
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1468 parent_team->t.t_argc = argc;
1469 argv = (
void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1473 if (parent_team == master_th->th.th_serial_team) {
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1478 if (call_context == fork_context_gnu) {
1481 parent_team->t.t_serialized--;
1486 parent_team->t.t_pkfn = microtask;
1491 void **exit_frame_p;
1493 ompt_lw_taskteam_t lw_taskteam;
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1517 exit_frame_p = &dummy;
1522 parent_team->t.t_serialized--;
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL;
1538 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1541 ompt_scope_end, NULL, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1549 OMPT_INVOKER(call_context) | ompt_parallel_team,
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1575 if (master_set_numthreads) {
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1594 master_th->th.th_set_nproc = 0;
1598 if (__kmp_debugging) {
1599 int nth = __kmp_omp_num_threads(loc);
1601 master_set_numthreads = nth;
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1641 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1642 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1651 if (__itt_stack_caller_create_ptr) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1657 #if KMP_AFFINITY_SUPPORTED
1658 __kmp_partition_places(parent_team);
1661 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1662 "master_th=%p, gtid=%d\n",
1663 root, parent_team, master_th, gtid));
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1666 "master_th=%p, gtid=%d\n",
1667 root, parent_team, master_th, gtid));
1669 if (call_context == fork_context_gnu)
1673 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1674 parent_team->t.t_id, parent_team->t.t_pkfn));
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1679 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1680 parent_team->t.t_id, parent_team->t.t_pkfn));
1683 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1691 parent_team->t.t_task_team[master_th->th.th_task_state]);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1703 enter_teams = ((ap == NULL && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1708 : get__nproc_2(parent_team, master_tid);
1713 if ((get__max_active_levels(master_th) == 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1739 KMP_DEBUG_ASSERT(nthreads > 0);
1742 master_th->th.th_set_nproc = 0;
1745 if (nthreads == 1) {
1747 #if KMP_OS_LINUX && \
1748 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1751 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1756 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1764 if (call_context == fork_context_intel) {
1766 master_th->th.th_serial_team->t.t_ident = loc;
1769 master_th->th.th_serial_team->t.t_level--;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1777 ompt_lw_taskteam_t lw_taskteam;
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1786 task_info = OMPT_CUR_TASK_INFO(master_th);
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1795 ompt_task_implicit);
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1801 exit_frame_p = &dummy;
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1824 ompt_task_implicit);
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_team,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1837 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1839 master_th->th.th_serial_team);
1840 team = master_th->th.th_team;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE);
1844 team->t.t_argc = argc;
1845 argv = (
void **)team->t.t_argv;
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1850 for (i = 0; i < argc; ++i)
1852 argv[i] = parent_team->t.t_argv[i];
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_end, NULL, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context) | ompt_parallel_league,
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1887 ompt_lw_taskteam_t lw_taskteam;
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1894 task_info = OMPT_CUR_TASK_INFO(master_th);
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_frame_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1933 ompt_task_implicit);
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context) | ompt_parallel_team,
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 ompt_lw_taskteam_t lwt;
1951 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1954 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1955 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1960 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1963 KMP_ASSERT2(call_context < fork_context_last,
1964 "__kmp_fork_call: unknown fork_context parameter");
1967 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1974 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1975 "curtask=%p, curtask_max_aclevel=%d\n",
1976 parent_team->t.t_active_level, master_th,
1977 master_th->th.th_current_task,
1978 master_th->th.th_current_task->td_icvs.max_active_levels));
1982 master_th->th.th_current_task->td_flags.executing = 0;
1984 if (!master_th->th.th_teams_microtask || level > teams_level) {
1986 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1990 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1991 if ((level + 1 < __kmp_nested_nth.used) &&
1992 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1993 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1999 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2001 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2002 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2003 proc_bind = proc_bind_false;
2007 if (proc_bind == proc_bind_default) {
2008 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2011 if (master_th->th.th_teams_microtask &&
2012 microtask == (microtask_t)__kmp_teams_master) {
2013 proc_bind = __kmp_teams_proc_bind;
2019 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2020 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2021 master_th->th.th_current_task->td_icvs.proc_bind)) {
2024 if (!master_th->th.th_teams_microtask ||
2025 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2026 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2031 master_th->th.th_set_proc_bind = proc_bind_default;
2033 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2034 kmp_internal_control_t new_icvs;
2035 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2036 new_icvs.next = NULL;
2037 if (nthreads_icv > 0) {
2038 new_icvs.nproc = nthreads_icv;
2040 if (proc_bind_icv != proc_bind_default) {
2041 new_icvs.proc_bind = proc_bind_icv;
2045 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2046 team = __kmp_allocate_team(root, nthreads, nthreads,
2050 proc_bind, &new_icvs,
2051 argc USE_NESTED_HOT_ARG(master_th));
2052 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2053 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2056 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2057 team = __kmp_allocate_team(root, nthreads, nthreads,
2062 &master_th->th.th_current_task->td_icvs,
2063 argc USE_NESTED_HOT_ARG(master_th));
2064 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2065 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2066 &master_th->th.th_current_task->td_icvs);
2069 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2072 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2073 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2074 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2075 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2076 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2078 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2081 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2083 if (!master_th->th.th_teams_microtask || level > teams_level) {
2084 int new_level = parent_team->t.t_level + 1;
2085 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2086 new_level = parent_team->t.t_active_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2090 int new_level = parent_team->t.t_level;
2091 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2092 new_level = parent_team->t.t_active_level;
2093 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2095 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2097 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2099 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2100 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2103 propagateFPControl(team);
2105 if (ompd_state & OMPD_ENABLE_BP)
2106 ompd_bp_parallel_begin();
2109 if (__kmp_tasking_mode != tskm_immediate_exec) {
2112 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2113 parent_team->t.t_task_team[master_th->th.th_task_state]);
2114 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2115 "%p, new task_team %p / team %p\n",
2116 __kmp_gtid_from_thread(master_th),
2117 master_th->th.th_task_team, parent_team,
2118 team->t.t_task_team[master_th->th.th_task_state], team));
2120 if (active_level || master_th->th.th_task_team) {
2122 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2123 if (master_th->th.th_task_state_top >=
2124 master_th->th.th_task_state_stack_sz) {
2125 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2126 kmp_uint8 *old_stack, *new_stack;
2128 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2129 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2130 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2132 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2136 old_stack = master_th->th.th_task_state_memo_stack;
2137 master_th->th.th_task_state_memo_stack = new_stack;
2138 master_th->th.th_task_state_stack_sz = new_size;
2139 __kmp_free(old_stack);
2143 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2144 master_th->th.th_task_state;
2145 master_th->th.th_task_state_top++;
2146 #if KMP_NESTED_HOT_TEAMS
2147 if (master_th->th.th_hot_teams &&
2148 active_level < __kmp_hot_teams_max_level &&
2149 team == master_th->th.th_hot_teams[active_level].hot_team) {
2151 master_th->th.th_task_state =
2153 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2156 master_th->th.th_task_state = 0;
2157 #if KMP_NESTED_HOT_TEAMS
2161 #if !KMP_NESTED_HOT_TEAMS
2162 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2163 (team == root->r.r_hot_team));
2169 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2170 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2172 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2173 (team->t.t_master_tid == 0 &&
2174 (team->t.t_parent == root->r.r_root_team ||
2175 team->t.t_parent->t.t_serialized)));
2179 argv = (
void **)team->t.t_argv;
2181 for (i = argc - 1; i >= 0; --i) {
2182 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2183 KMP_CHECK_UPDATE(*argv, new_argv);
2187 for (i = 0; i < argc; ++i) {
2189 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2194 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2195 if (!root->r.r_active)
2196 root->r.r_active = TRUE;
2198 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2199 __kmp_setup_icv_copy(team, nthreads,
2200 &master_th->th.th_current_task->td_icvs, loc);
2203 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2206 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2209 if (team->t.t_active_level == 1
2210 && !master_th->th.th_teams_microtask) {
2212 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2213 (__kmp_forkjoin_frames_mode == 3 ||
2214 __kmp_forkjoin_frames_mode == 1)) {
2215 kmp_uint64 tmp_time = 0;
2216 if (__itt_get_timestamp_ptr)
2217 tmp_time = __itt_get_timestamp();
2219 master_th->th.th_frame_time = tmp_time;
2220 if (__kmp_forkjoin_frames_mode == 3)
2221 team->t.t_region_time = tmp_time;
2225 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2226 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2228 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2234 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2237 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
2241 if (__itt_stack_caller_create_ptr) {
2244 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2245 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2246 }
else if (parent_team->t.t_serialized) {
2251 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2252 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2260 __kmp_internal_fork(loc, gtid, team);
2261 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2262 "master_th=%p, gtid=%d\n",
2263 root, team, master_th, gtid));
2266 if (call_context == fork_context_gnu) {
2267 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2272 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2273 team->t.t_id, team->t.t_pkfn));
2276 #if KMP_STATS_ENABLED
2280 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2284 if (!team->t.t_invoke(gtid)) {
2285 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2288 #if KMP_STATS_ENABLED
2291 KMP_SET_THREAD_STATE(previous_state);
2295 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2296 team->t.t_id, team->t.t_pkfn));
2299 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2301 if (ompt_enabled.enabled) {
2302 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2310 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2313 thread->th.ompt_thread_info.state =
2314 ((team->t.t_serialized) ? ompt_state_work_serial
2315 : ompt_state_work_parallel);
2318 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2319 kmp_team_t *team, ompt_data_t *parallel_data,
2320 int flags,
void *codeptr) {
2321 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2322 if (ompt_enabled.ompt_callback_parallel_end) {
2323 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2324 parallel_data, &(task_info->task_data), flags, codeptr);
2327 task_info->frame.enter_frame = ompt_data_none;
2328 __kmp_join_restore_state(thread, team);
2332 void __kmp_join_call(
ident_t *loc,
int gtid
2335 enum fork_context_e fork_context
2339 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2341 kmp_team_t *parent_team;
2342 kmp_info_t *master_th;
2346 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2349 master_th = __kmp_threads[gtid];
2350 root = master_th->th.th_root;
2351 team = master_th->th.th_team;
2352 parent_team = team->t.t_parent;
2354 master_th->th.th_ident = loc;
2357 void *team_microtask = (
void *)team->t.t_pkfn;
2361 if (ompt_enabled.enabled &&
2362 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2363 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2368 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2369 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2370 "th_task_team = %p\n",
2371 __kmp_gtid_from_thread(master_th), team,
2372 team->t.t_task_team[master_th->th.th_task_state],
2373 master_th->th.th_task_team));
2374 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2375 team->t.t_task_team[master_th->th.th_task_state]);
2379 if (team->t.t_serialized) {
2380 if (master_th->th.th_teams_microtask) {
2382 int level = team->t.t_level;
2383 int tlevel = master_th->th.th_teams_level;
2384 if (level == tlevel) {
2388 }
else if (level == tlevel + 1) {
2392 team->t.t_serialized++;
2398 if (ompt_enabled.enabled) {
2399 __kmp_join_restore_state(master_th, parent_team);
2406 master_active = team->t.t_master_active;
2411 __kmp_internal_join(loc, gtid, team);
2413 if (__itt_stack_caller_create_ptr) {
2414 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2416 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2417 team->t.t_stack_id = NULL;
2421 master_th->th.th_task_state =
2424 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2425 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2429 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2430 parent_team->t.t_stack_id = NULL;
2434 if (team->t.t_nproc > 1 &&
2435 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2436 team->t.b->update_num_threads(team->t.t_nproc);
2437 __kmp_add_threads_to_team(team, team->t.t_nproc);
2444 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2445 void *codeptr = team->t.ompt_team_info.master_return_address;
2450 if (team->t.t_active_level == 1 &&
2451 (!master_th->th.th_teams_microtask ||
2452 master_th->th.th_teams_size.nteams == 1)) {
2453 master_th->th.th_ident = loc;
2456 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2457 __kmp_forkjoin_frames_mode == 3)
2458 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2459 master_th->th.th_frame_time, 0, loc,
2460 master_th->th.th_team_nproc, 1);
2461 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2462 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2463 __kmp_itt_region_joined(gtid);
2467 #if KMP_AFFINITY_SUPPORTED
2470 master_th->th.th_first_place = team->t.t_first_place;
2471 master_th->th.th_last_place = team->t.t_last_place;
2473 #endif // KMP_AFFINITY_SUPPORTED
2475 if (master_th->th.th_teams_microtask && !exit_teams &&
2476 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2477 team->t.t_level == master_th->th.th_teams_level + 1) {
2482 ompt_data_t ompt_parallel_data = ompt_data_none;
2483 if (ompt_enabled.enabled) {
2484 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2485 if (ompt_enabled.ompt_callback_implicit_task) {
2486 int ompt_team_size = team->t.t_nproc;
2487 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2488 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2489 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2491 task_info->frame.exit_frame = ompt_data_none;
2492 task_info->task_data = ompt_data_none;
2493 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2494 __ompt_lw_taskteam_unlink(master_th);
2499 team->t.t_active_level--;
2500 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2506 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2507 int old_num = master_th->th.th_team_nproc;
2508 int new_num = master_th->th.th_teams_size.nth;
2509 kmp_info_t **other_threads = team->t.t_threads;
2510 team->t.t_nproc = new_num;
2511 for (
int i = 0; i < old_num; ++i) {
2512 other_threads[i]->th.th_team_nproc = new_num;
2515 for (
int i = old_num; i < new_num; ++i) {
2517 KMP_DEBUG_ASSERT(other_threads[i]);
2518 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2519 for (
int b = 0; b < bs_last_barrier; ++b) {
2520 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2521 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2523 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2526 if (__kmp_tasking_mode != tskm_immediate_exec) {
2528 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2534 if (ompt_enabled.enabled) {
2535 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2536 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2544 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2545 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2547 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2552 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2554 if (!master_th->th.th_teams_microtask ||
2555 team->t.t_level > master_th->th.th_teams_level) {
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2559 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2562 if (ompt_enabled.enabled) {
2563 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2564 if (ompt_enabled.ompt_callback_implicit_task) {
2565 int flags = (team_microtask == (
void *)__kmp_teams_master)
2567 : ompt_task_implicit;
2568 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2569 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2570 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2571 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2573 task_info->frame.exit_frame = ompt_data_none;
2574 task_info->task_data = ompt_data_none;
2578 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2580 __kmp_pop_current_task_from_thread(master_th);
2582 master_th->th.th_def_allocator = team->t.t_def_allocator;
2585 if (ompd_state & OMPD_ENABLE_BP)
2586 ompd_bp_parallel_end();
2588 updateHWFPControl(team);
2590 if (root->r.r_active != master_active)
2591 root->r.r_active = master_active;
2593 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2601 master_th->th.th_team = parent_team;
2602 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2603 master_th->th.th_team_master = parent_team->t.t_threads[0];
2604 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2607 if (parent_team->t.t_serialized &&
2608 parent_team != master_th->th.th_serial_team &&
2609 parent_team != root->r.r_root_team) {
2610 __kmp_free_team(root,
2611 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2612 master_th->th.th_serial_team = parent_team;
2615 if (__kmp_tasking_mode != tskm_immediate_exec) {
2616 if (master_th->th.th_task_state_top >
2618 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2620 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2621 master_th->th.th_task_state;
2622 --master_th->th.th_task_state_top;
2624 master_th->th.th_task_state =
2626 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2629 master_th->th.th_task_team =
2630 parent_team->t.t_task_team[master_th->th.th_task_state];
2632 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2633 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2640 master_th->th.th_current_task->td_flags.executing = 1;
2642 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2646 OMPT_INVOKER(fork_context) |
2647 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2648 : ompt_parallel_team);
2649 if (ompt_enabled.enabled) {
2650 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2656 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2661 void __kmp_save_internal_controls(kmp_info_t *thread) {
2663 if (thread->th.th_team != thread->th.th_serial_team) {
2666 if (thread->th.th_team->t.t_serialized > 1) {
2669 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2672 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2673 thread->th.th_team->t.t_serialized) {
2678 kmp_internal_control_t *control =
2679 (kmp_internal_control_t *)__kmp_allocate(
2680 sizeof(kmp_internal_control_t));
2682 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2684 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2686 control->next = thread->th.th_team->t.t_control_stack_top;
2687 thread->th.th_team->t.t_control_stack_top = control;
2693 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2697 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2698 KMP_DEBUG_ASSERT(__kmp_init_serial);
2702 else if (new_nth > __kmp_max_nth)
2703 new_nth = __kmp_max_nth;
2706 thread = __kmp_threads[gtid];
2707 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2710 __kmp_save_internal_controls(thread);
2712 set__nproc(thread, new_nth);
2717 root = thread->th.th_root;
2718 if (__kmp_init_parallel && (!root->r.r_active) &&
2719 (root->r.r_hot_team->t.t_nproc > new_nth)
2720 #
if KMP_NESTED_HOT_TEAMS
2721 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2724 kmp_team_t *hot_team = root->r.r_hot_team;
2727 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2729 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2730 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2733 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2734 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2735 if (__kmp_tasking_mode != tskm_immediate_exec) {
2738 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2740 __kmp_free_thread(hot_team->t.t_threads[f]);
2741 hot_team->t.t_threads[f] = NULL;
2743 hot_team->t.t_nproc = new_nth;
2744 #if KMP_NESTED_HOT_TEAMS
2745 if (thread->th.th_hot_teams) {
2746 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2747 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2751 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2752 hot_team->t.b->update_num_threads(new_nth);
2753 __kmp_add_threads_to_team(hot_team, new_nth);
2756 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2759 for (f = 0; f < new_nth; f++) {
2760 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2761 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2764 hot_team->t.t_size_changed = -1;
2769 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2772 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2774 gtid, max_active_levels));
2775 KMP_DEBUG_ASSERT(__kmp_init_serial);
2778 if (max_active_levels < 0) {
2779 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2784 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2785 "max_active_levels for thread %d = (%d)\n",
2786 gtid, max_active_levels));
2789 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2794 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2795 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2796 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2802 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2803 "max_active_levels for thread %d = (%d)\n",
2804 gtid, max_active_levels));
2806 thread = __kmp_threads[gtid];
2808 __kmp_save_internal_controls(thread);
2810 set__max_active_levels(thread, max_active_levels);
2814 int __kmp_get_max_active_levels(
int gtid) {
2817 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2818 KMP_DEBUG_ASSERT(__kmp_init_serial);
2820 thread = __kmp_threads[gtid];
2821 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2822 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2823 "curtask_maxaclevel=%d\n",
2824 gtid, thread->th.th_current_task,
2825 thread->th.th_current_task->td_icvs.max_active_levels));
2826 return thread->th.th_current_task->td_icvs.max_active_levels;
2830 void __kmp_set_num_teams(
int num_teams) {
2832 __kmp_nteams = num_teams;
2834 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2836 void __kmp_set_teams_thread_limit(
int limit) {
2838 __kmp_teams_thread_limit = limit;
2840 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2842 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2843 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2846 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2848 kmp_sched_t orig_kind;
2851 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2852 gtid, (
int)kind, chunk));
2853 KMP_DEBUG_ASSERT(__kmp_init_serial);
2860 kind = __kmp_sched_without_mods(kind);
2862 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2863 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2865 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2866 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2868 kind = kmp_sched_default;
2872 thread = __kmp_threads[gtid];
2874 __kmp_save_internal_controls(thread);
2876 if (kind < kmp_sched_upper_std) {
2877 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2880 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2882 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2883 __kmp_sch_map[kind - kmp_sched_lower - 1];
2888 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2889 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2890 kmp_sched_lower - 2];
2892 __kmp_sched_apply_mods_intkind(
2893 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2894 if (kind == kmp_sched_auto || chunk < 1) {
2896 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2898 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2903 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2907 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2908 KMP_DEBUG_ASSERT(__kmp_init_serial);
2910 thread = __kmp_threads[gtid];
2912 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2913 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2915 case kmp_sch_static_greedy:
2916 case kmp_sch_static_balanced:
2917 *kind = kmp_sched_static;
2918 __kmp_sched_apply_mods_stdkind(kind, th_type);
2921 case kmp_sch_static_chunked:
2922 *kind = kmp_sched_static;
2924 case kmp_sch_dynamic_chunked:
2925 *kind = kmp_sched_dynamic;
2928 case kmp_sch_guided_iterative_chunked:
2929 case kmp_sch_guided_analytical_chunked:
2930 *kind = kmp_sched_guided;
2933 *kind = kmp_sched_auto;
2935 case kmp_sch_trapezoidal:
2936 *kind = kmp_sched_trapezoidal;
2938 #if KMP_STATIC_STEAL_ENABLED
2939 case kmp_sch_static_steal:
2940 *kind = kmp_sched_static_steal;
2944 KMP_FATAL(UnknownSchedulingType, th_type);
2947 __kmp_sched_apply_mods_stdkind(kind, th_type);
2948 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2951 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2957 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2958 KMP_DEBUG_ASSERT(__kmp_init_serial);
2965 thr = __kmp_threads[gtid];
2966 team = thr->th.th_team;
2967 ii = team->t.t_level;
2971 if (thr->th.th_teams_microtask) {
2973 int tlevel = thr->th.th_teams_level;
2976 KMP_DEBUG_ASSERT(ii >= tlevel);
2988 return __kmp_tid_from_gtid(gtid);
2990 dd = team->t.t_serialized;
2992 while (ii > level) {
2993 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2995 if ((team->t.t_serialized) && (!dd)) {
2996 team = team->t.t_parent;
3000 team = team->t.t_parent;
3001 dd = team->t.t_serialized;
3006 return (dd > 1) ? (0) : (team->t.t_master_tid);
3009 int __kmp_get_team_size(
int gtid,
int level) {
3015 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3016 KMP_DEBUG_ASSERT(__kmp_init_serial);
3023 thr = __kmp_threads[gtid];
3024 team = thr->th.th_team;
3025 ii = team->t.t_level;
3029 if (thr->th.th_teams_microtask) {
3031 int tlevel = thr->th.th_teams_level;
3034 KMP_DEBUG_ASSERT(ii >= tlevel);
3045 while (ii > level) {
3046 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3048 if (team->t.t_serialized && (!dd)) {
3049 team = team->t.t_parent;
3053 team = team->t.t_parent;
3058 return team->t.t_nproc;
3061 kmp_r_sched_t __kmp_get_schedule_global() {
3066 kmp_r_sched_t r_sched;
3072 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3073 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3076 r_sched.r_sched_type = __kmp_static;
3079 r_sched.r_sched_type = __kmp_guided;
3081 r_sched.r_sched_type = __kmp_sched;
3083 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3085 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3087 r_sched.chunk = KMP_DEFAULT_CHUNK;
3089 r_sched.chunk = __kmp_chunk;
3097 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3099 KMP_DEBUG_ASSERT(team);
3100 if (!realloc || argc > team->t.t_max_argc) {
3102 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3103 "current entries=%d\n",
3104 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3106 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3107 __kmp_free((
void *)team->t.t_argv);
3109 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3111 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3112 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3114 team->t.t_id, team->t.t_max_argc));
3115 team->t.t_argv = &team->t.t_inline_argv[0];
3116 if (__kmp_storage_map) {
3117 __kmp_print_storage_map_gtid(
3118 -1, &team->t.t_inline_argv[0],
3119 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3120 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3125 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3126 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3128 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3130 team->t.t_id, team->t.t_max_argc));
3132 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3133 if (__kmp_storage_map) {
3134 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3135 &team->t.t_argv[team->t.t_max_argc],
3136 sizeof(
void *) * team->t.t_max_argc,
3137 "team_%d.t_argv", team->t.t_id);
3143 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3145 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3147 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3148 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3149 sizeof(dispatch_shared_info_t) * num_disp_buff);
3150 team->t.t_dispatch =
3151 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3152 team->t.t_implicit_task_taskdata =
3153 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3154 team->t.t_max_nproc = max_nth;
3157 for (i = 0; i < num_disp_buff; ++i) {
3158 team->t.t_disp_buffer[i].buffer_index = i;
3159 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3163 static void __kmp_free_team_arrays(kmp_team_t *team) {
3166 for (i = 0; i < team->t.t_max_nproc; ++i) {
3167 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3168 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3169 team->t.t_dispatch[i].th_disp_buffer = NULL;
3172 #if KMP_USE_HIER_SCHED
3173 __kmp_dispatch_free_hierarchies(team);
3175 __kmp_free(team->t.t_threads);
3176 __kmp_free(team->t.t_disp_buffer);
3177 __kmp_free(team->t.t_dispatch);
3178 __kmp_free(team->t.t_implicit_task_taskdata);
3179 team->t.t_threads = NULL;
3180 team->t.t_disp_buffer = NULL;
3181 team->t.t_dispatch = NULL;
3182 team->t.t_implicit_task_taskdata = 0;
3185 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3186 kmp_info_t **oldThreads = team->t.t_threads;
3188 __kmp_free(team->t.t_disp_buffer);
3189 __kmp_free(team->t.t_dispatch);
3190 __kmp_free(team->t.t_implicit_task_taskdata);
3191 __kmp_allocate_team_arrays(team, max_nth);
3193 KMP_MEMCPY(team->t.t_threads, oldThreads,
3194 team->t.t_nproc *
sizeof(kmp_info_t *));
3196 __kmp_free(oldThreads);
3199 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3201 kmp_r_sched_t r_sched =
3202 __kmp_get_schedule_global();
3204 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3206 kmp_internal_control_t g_icvs = {
3208 (kmp_int8)__kmp_global.g.g_dynamic,
3210 (kmp_int8)__kmp_env_blocktime,
3212 __kmp_dflt_blocktime,
3217 __kmp_dflt_team_nth,
3221 __kmp_dflt_max_active_levels,
3225 __kmp_nested_proc_bind.bind_types[0],
3226 __kmp_default_device,
3233 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3235 kmp_internal_control_t gx_icvs;
3236 gx_icvs.serial_nesting_level =
3238 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3239 gx_icvs.next = NULL;
3244 static void __kmp_initialize_root(kmp_root_t *root) {
3246 kmp_team_t *root_team;
3247 kmp_team_t *hot_team;
3248 int hot_team_max_nth;
3249 kmp_r_sched_t r_sched =
3250 __kmp_get_schedule_global();
3251 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3252 KMP_DEBUG_ASSERT(root);
3253 KMP_ASSERT(!root->r.r_begin);
3256 __kmp_init_lock(&root->r.r_begin_lock);
3257 root->r.r_begin = FALSE;
3258 root->r.r_active = FALSE;
3259 root->r.r_in_parallel = 0;
3260 root->r.r_blocktime = __kmp_dflt_blocktime;
3261 #if KMP_AFFINITY_SUPPORTED
3262 root->r.r_affinity_assigned = FALSE;
3267 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3270 __kmp_allocate_team(root,
3276 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3278 USE_NESTED_HOT_ARG(NULL)
3283 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3286 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3288 root->r.r_root_team = root_team;
3289 root_team->t.t_control_stack_top = NULL;
3292 root_team->t.t_threads[0] = NULL;
3293 root_team->t.t_nproc = 1;
3294 root_team->t.t_serialized = 1;
3296 root_team->t.t_sched.sched = r_sched.sched;
3299 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3300 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3304 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3307 __kmp_allocate_team(root,
3309 __kmp_dflt_team_nth_ub * 2,
3313 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3315 USE_NESTED_HOT_ARG(NULL)
3317 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3319 root->r.r_hot_team = hot_team;
3320 root_team->t.t_control_stack_top = NULL;
3323 hot_team->t.t_parent = root_team;
3326 hot_team_max_nth = hot_team->t.t_max_nproc;
3327 for (f = 0; f < hot_team_max_nth; ++f) {
3328 hot_team->t.t_threads[f] = NULL;
3330 hot_team->t.t_nproc = 1;
3332 hot_team->t.t_sched.sched = r_sched.sched;
3333 hot_team->t.t_size_changed = 0;
3338 typedef struct kmp_team_list_item {
3339 kmp_team_p
const *entry;
3340 struct kmp_team_list_item *next;
3341 } kmp_team_list_item_t;
3342 typedef kmp_team_list_item_t *kmp_team_list_t;
3344 static void __kmp_print_structure_team_accum(
3345 kmp_team_list_t list,
3346 kmp_team_p
const *team
3356 KMP_DEBUG_ASSERT(list != NULL);
3361 __kmp_print_structure_team_accum(list, team->t.t_parent);
3362 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3366 while (l->next != NULL && l->entry != team) {
3369 if (l->next != NULL) {
3375 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3381 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3382 sizeof(kmp_team_list_item_t));
3389 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3392 __kmp_printf(
"%s", title);
3394 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3396 __kmp_printf(
" - (nil)\n");
3400 static void __kmp_print_structure_thread(
char const *title,
3401 kmp_info_p
const *thread) {
3402 __kmp_printf(
"%s", title);
3403 if (thread != NULL) {
3404 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3406 __kmp_printf(
" - (nil)\n");
3410 void __kmp_print_structure(
void) {
3412 kmp_team_list_t list;
3416 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3420 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3421 "Table\n------------------------------\n");
3424 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3425 __kmp_printf(
"%2d", gtid);
3426 if (__kmp_threads != NULL) {
3427 __kmp_printf(
" %p", __kmp_threads[gtid]);
3429 if (__kmp_root != NULL) {
3430 __kmp_printf(
" %p", __kmp_root[gtid]);
3437 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3439 if (__kmp_threads != NULL) {
3441 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3442 kmp_info_t
const *thread = __kmp_threads[gtid];
3443 if (thread != NULL) {
3444 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3445 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3446 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3447 __kmp_print_structure_team(
" Serial Team: ",
3448 thread->th.th_serial_team);
3449 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3450 __kmp_print_structure_thread(
" Primary: ",
3451 thread->th.th_team_master);
3452 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3453 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3454 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3455 __kmp_print_structure_thread(
" Next in pool: ",
3456 thread->th.th_next_pool);
3458 __kmp_print_structure_team_accum(list, thread->th.th_team);
3459 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3463 __kmp_printf(
"Threads array is not allocated.\n");
3467 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3469 if (__kmp_root != NULL) {
3471 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3472 kmp_root_t
const *root = __kmp_root[gtid];
3474 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3475 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3476 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3477 __kmp_print_structure_thread(
" Uber Thread: ",
3478 root->r.r_uber_thread);
3479 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3480 __kmp_printf(
" In Parallel: %2d\n",
3481 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3483 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3484 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3488 __kmp_printf(
"Ubers array is not allocated.\n");
3491 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3493 while (list->next != NULL) {
3494 kmp_team_p
const *team = list->entry;
3496 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3497 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3498 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3499 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3500 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3501 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3502 for (i = 0; i < team->t.t_nproc; ++i) {
3503 __kmp_printf(
" Thread %2d: ", i);
3504 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3506 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3512 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3514 __kmp_print_structure_thread(
"Thread pool: ",
3515 CCAST(kmp_info_t *, __kmp_thread_pool));
3516 __kmp_print_structure_team(
"Team pool: ",
3517 CCAST(kmp_team_t *, __kmp_team_pool));
3521 while (list != NULL) {
3522 kmp_team_list_item_t *item = list;
3524 KMP_INTERNAL_FREE(item);
3533 static const unsigned __kmp_primes[] = {
3534 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3535 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3536 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3537 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3538 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3539 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3540 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3541 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3542 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3543 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3544 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3548 unsigned short __kmp_get_random(kmp_info_t *thread) {
3549 unsigned x = thread->th.th_x;
3550 unsigned short r = (
unsigned short)(x >> 16);
3552 thread->th.th_x = x * thread->th.th_a + 1;
3554 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3555 thread->th.th_info.ds.ds_tid, r));
3561 void __kmp_init_random(kmp_info_t *thread) {
3562 unsigned seed = thread->th.th_info.ds.ds_tid;
3565 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3566 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3568 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3574 static int __kmp_reclaim_dead_roots(
void) {
3577 for (i = 0; i < __kmp_threads_capacity; ++i) {
3578 if (KMP_UBER_GTID(i) &&
3579 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3582 r += __kmp_unregister_root_other_thread(i);
3607 static int __kmp_expand_threads(
int nNeed) {
3609 int minimumRequiredCapacity;
3611 kmp_info_t **newThreads;
3612 kmp_root_t **newRoot;
3618 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3621 added = __kmp_reclaim_dead_roots();
3650 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3653 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3657 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3659 newCapacity = __kmp_threads_capacity;
3661 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3662 : __kmp_sys_max_nth;
3663 }
while (newCapacity < minimumRequiredCapacity);
3664 newThreads = (kmp_info_t **)__kmp_allocate(
3665 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3667 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3668 KMP_MEMCPY(newThreads, __kmp_threads,
3669 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3670 KMP_MEMCPY(newRoot, __kmp_root,
3671 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3673 kmp_info_t **temp_threads = __kmp_threads;
3674 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3675 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3676 __kmp_free(temp_threads);
3677 added += newCapacity - __kmp_threads_capacity;
3678 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3680 if (newCapacity > __kmp_tp_capacity) {
3681 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3682 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3683 __kmp_threadprivate_resize_cache(newCapacity);
3685 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3687 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3696 int __kmp_register_root(
int initial_thread) {
3697 kmp_info_t *root_thread;
3701 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3702 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3719 capacity = __kmp_threads_capacity;
3720 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3727 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3728 capacity -= __kmp_hidden_helper_threads_num;
3732 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3733 if (__kmp_tp_cached) {
3734 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3735 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3736 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3738 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3748 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3751 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3752 gtid <= __kmp_hidden_helper_threads_num;
3755 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3756 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3757 "hidden helper thread: T#%d\n",
3763 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3766 for (gtid = __kmp_hidden_helper_threads_num + 1;
3767 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3771 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3772 KMP_ASSERT(gtid < __kmp_threads_capacity);
3777 TCW_4(__kmp_nth, __kmp_nth + 1);
3781 if (__kmp_adjust_gtid_mode) {
3782 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3783 if (TCR_4(__kmp_gtid_mode) != 2) {
3784 TCW_4(__kmp_gtid_mode, 2);
3787 if (TCR_4(__kmp_gtid_mode) != 1) {
3788 TCW_4(__kmp_gtid_mode, 1);
3793 #ifdef KMP_ADJUST_BLOCKTIME
3796 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3797 if (__kmp_nth > __kmp_avail_proc) {
3798 __kmp_zero_bt = TRUE;
3804 if (!(root = __kmp_root[gtid])) {
3805 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3806 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3809 #if KMP_STATS_ENABLED
3811 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3812 __kmp_stats_thread_ptr->startLife();
3813 KMP_SET_THREAD_STATE(SERIAL_REGION);
3816 __kmp_initialize_root(root);
3819 if (root->r.r_uber_thread) {
3820 root_thread = root->r.r_uber_thread;
3822 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3823 if (__kmp_storage_map) {
3824 __kmp_print_thread_storage_map(root_thread, gtid);
3826 root_thread->th.th_info.ds.ds_gtid = gtid;
3828 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3830 root_thread->th.th_root = root;
3831 if (__kmp_env_consistency_check) {
3832 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3835 __kmp_initialize_fast_memory(root_thread);
3839 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3840 __kmp_initialize_bget(root_thread);
3842 __kmp_init_random(root_thread);
3846 if (!root_thread->th.th_serial_team) {
3847 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3848 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3849 root_thread->th.th_serial_team = __kmp_allocate_team(
3854 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3856 KMP_ASSERT(root_thread->th.th_serial_team);
3857 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3858 root_thread->th.th_serial_team));
3861 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3863 root->r.r_root_team->t.t_threads[0] = root_thread;
3864 root->r.r_hot_team->t.t_threads[0] = root_thread;
3865 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3867 root_thread->th.th_serial_team->t.t_serialized = 0;
3868 root->r.r_uber_thread = root_thread;
3871 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3872 TCW_4(__kmp_init_gtid, TRUE);
3875 __kmp_gtid_set_specific(gtid);
3878 __kmp_itt_thread_name(gtid);
3881 #ifdef KMP_TDATA_GTID
3884 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3885 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3887 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3889 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3890 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3891 KMP_INIT_BARRIER_STATE));
3894 for (b = 0; b < bs_last_barrier; ++b) {
3895 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3897 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3901 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3902 KMP_INIT_BARRIER_STATE);
3904 #if KMP_AFFINITY_SUPPORTED
3905 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3906 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3907 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3908 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3910 root_thread->th.th_def_allocator = __kmp_def_allocator;
3911 root_thread->th.th_prev_level = 0;
3912 root_thread->th.th_prev_num_threads = 1;
3914 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3915 tmp->cg_root = root_thread;
3916 tmp->cg_thread_limit = __kmp_cg_max_nth;
3917 tmp->cg_nthreads = 1;
3918 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3919 " cg_nthreads init to 1\n",
3922 root_thread->th.th_cg_roots = tmp;
3924 __kmp_root_counter++;
3927 if (!initial_thread && ompt_enabled.enabled) {
3929 kmp_info_t *root_thread = ompt_get_thread();
3931 ompt_set_thread_state(root_thread, ompt_state_overhead);
3933 if (ompt_enabled.ompt_callback_thread_begin) {
3934 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3935 ompt_thread_initial, __ompt_get_thread_data_internal());
3937 ompt_data_t *task_data;
3938 ompt_data_t *parallel_data;
3939 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3941 if (ompt_enabled.ompt_callback_implicit_task) {
3942 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3943 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3946 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3950 if (ompd_state & OMPD_ENABLE_BP)
3951 ompd_bp_thread_begin();
3955 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3960 #if KMP_NESTED_HOT_TEAMS
3961 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3962 const int max_level) {
3964 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3965 if (!hot_teams || !hot_teams[level].hot_team) {
3968 KMP_DEBUG_ASSERT(level < max_level);
3969 kmp_team_t *team = hot_teams[level].hot_team;
3970 nth = hot_teams[level].hot_team_nth;
3972 if (level < max_level - 1) {
3973 for (i = 0; i < nth; ++i) {
3974 kmp_info_t *th = team->t.t_threads[i];
3975 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3976 if (i > 0 && th->th.th_hot_teams) {
3977 __kmp_free(th->th.th_hot_teams);
3978 th->th.th_hot_teams = NULL;
3982 __kmp_free_team(root, team, NULL);
3989 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3990 kmp_team_t *root_team = root->r.r_root_team;
3991 kmp_team_t *hot_team = root->r.r_hot_team;
3992 int n = hot_team->t.t_nproc;
3995 KMP_DEBUG_ASSERT(!root->r.r_active);
3997 root->r.r_root_team = NULL;
3998 root->r.r_hot_team = NULL;
4001 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4002 #if KMP_NESTED_HOT_TEAMS
4003 if (__kmp_hot_teams_max_level >
4005 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4006 kmp_info_t *th = hot_team->t.t_threads[i];
4007 if (__kmp_hot_teams_max_level > 1) {
4008 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4010 if (th->th.th_hot_teams) {
4011 __kmp_free(th->th.th_hot_teams);
4012 th->th.th_hot_teams = NULL;
4017 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4022 if (__kmp_tasking_mode != tskm_immediate_exec) {
4023 __kmp_wait_to_unref_task_teams();
4029 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4031 (LPVOID) & (root->r.r_uber_thread->th),
4032 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4033 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4037 if (ompd_state & OMPD_ENABLE_BP)
4038 ompd_bp_thread_end();
4042 ompt_data_t *task_data;
4043 ompt_data_t *parallel_data;
4044 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4046 if (ompt_enabled.ompt_callback_implicit_task) {
4047 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4048 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4050 if (ompt_enabled.ompt_callback_thread_end) {
4051 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4052 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4058 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4059 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4061 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4062 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4065 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4066 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4067 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4068 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4069 root->r.r_uber_thread->th.th_cg_roots = NULL;
4071 __kmp_reap_thread(root->r.r_uber_thread, 1);
4075 root->r.r_uber_thread = NULL;
4077 root->r.r_begin = FALSE;
4082 void __kmp_unregister_root_current_thread(
int gtid) {
4083 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4087 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4088 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4089 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4092 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4095 kmp_root_t *root = __kmp_root[gtid];
4097 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4098 KMP_ASSERT(KMP_UBER_GTID(gtid));
4099 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4100 KMP_ASSERT(root->r.r_active == FALSE);
4104 kmp_info_t *thread = __kmp_threads[gtid];
4105 kmp_team_t *team = thread->th.th_team;
4106 kmp_task_team_t *task_team = thread->th.th_task_team;
4109 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4112 thread->th.ompt_thread_info.state = ompt_state_undefined;
4114 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4117 __kmp_reset_root(gtid, root);
4121 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4123 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4130 static int __kmp_unregister_root_other_thread(
int gtid) {
4131 kmp_root_t *root = __kmp_root[gtid];
4134 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4135 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4136 KMP_ASSERT(KMP_UBER_GTID(gtid));
4137 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4138 KMP_ASSERT(root->r.r_active == FALSE);
4140 r = __kmp_reset_root(gtid, root);
4142 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4148 void __kmp_task_info() {
4150 kmp_int32 gtid = __kmp_entry_gtid();
4151 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4152 kmp_info_t *this_thr = __kmp_threads[gtid];
4153 kmp_team_t *steam = this_thr->th.th_serial_team;
4154 kmp_team_t *team = this_thr->th.th_team;
4157 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4159 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4160 team->t.t_implicit_task_taskdata[tid].td_parent);
4167 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4168 int tid,
int gtid) {
4172 KMP_DEBUG_ASSERT(this_thr != NULL);
4173 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4174 KMP_DEBUG_ASSERT(team);
4175 KMP_DEBUG_ASSERT(team->t.t_threads);
4176 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4177 kmp_info_t *master = team->t.t_threads[0];
4178 KMP_DEBUG_ASSERT(master);
4179 KMP_DEBUG_ASSERT(master->th.th_root);
4183 TCW_SYNC_PTR(this_thr->th.th_team, team);
4185 this_thr->th.th_info.ds.ds_tid = tid;
4186 this_thr->th.th_set_nproc = 0;
4187 if (__kmp_tasking_mode != tskm_immediate_exec)
4190 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4192 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4193 this_thr->th.th_set_proc_bind = proc_bind_default;
4194 #if KMP_AFFINITY_SUPPORTED
4195 this_thr->th.th_new_place = this_thr->th.th_current_place;
4197 this_thr->th.th_root = master->th.th_root;
4200 this_thr->th.th_team_nproc = team->t.t_nproc;
4201 this_thr->th.th_team_master = master;
4202 this_thr->th.th_team_serialized = team->t.t_serialized;
4204 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4206 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4207 tid, gtid, this_thr, this_thr->th.th_current_task));
4209 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4212 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4213 tid, gtid, this_thr, this_thr->th.th_current_task));
4218 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4220 this_thr->th.th_local.this_construct = 0;
4222 if (!this_thr->th.th_pri_common) {
4223 this_thr->th.th_pri_common =
4224 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4225 if (__kmp_storage_map) {
4226 __kmp_print_storage_map_gtid(
4227 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4228 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4230 this_thr->th.th_pri_head = NULL;
4233 if (this_thr != master &&
4234 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4236 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4237 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4240 int i = tmp->cg_nthreads--;
4241 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4242 " on node %p of thread %p to %d\n",
4243 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4248 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4250 this_thr->th.th_cg_roots->cg_nthreads++;
4251 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4252 " node %p of thread %p to %d\n",
4253 this_thr, this_thr->th.th_cg_roots,
4254 this_thr->th.th_cg_roots->cg_root,
4255 this_thr->th.th_cg_roots->cg_nthreads));
4256 this_thr->th.th_current_task->td_icvs.thread_limit =
4257 this_thr->th.th_cg_roots->cg_thread_limit;
4262 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4265 sizeof(dispatch_private_info_t) *
4266 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4267 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4268 team->t.t_max_nproc));
4269 KMP_ASSERT(dispatch);
4270 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4271 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4273 dispatch->th_disp_index = 0;
4274 dispatch->th_doacross_buf_idx = 0;
4275 if (!dispatch->th_disp_buffer) {
4276 dispatch->th_disp_buffer =
4277 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4279 if (__kmp_storage_map) {
4280 __kmp_print_storage_map_gtid(
4281 gtid, &dispatch->th_disp_buffer[0],
4282 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4284 : __kmp_dispatch_num_buffers],
4286 "th_%d.th_dispatch.th_disp_buffer "
4287 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4288 gtid, team->t.t_id, gtid);
4291 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4294 dispatch->th_dispatch_pr_current = 0;
4295 dispatch->th_dispatch_sh_current = 0;
4297 dispatch->th_deo_fcn = 0;
4298 dispatch->th_dxo_fcn = 0;
4301 this_thr->th.th_next_pool = NULL;
4303 if (!this_thr->th.th_task_state_memo_stack) {
4305 this_thr->th.th_task_state_memo_stack =
4306 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4307 this_thr->th.th_task_state_top = 0;
4308 this_thr->th.th_task_state_stack_sz = 4;
4309 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4311 this_thr->th.th_task_state_memo_stack[i] = 0;
4314 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4315 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4325 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4327 kmp_team_t *serial_team;
4328 kmp_info_t *new_thr;
4331 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4332 KMP_DEBUG_ASSERT(root && team);
4333 #if !KMP_NESTED_HOT_TEAMS
4334 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4339 if (__kmp_thread_pool) {
4340 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4341 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4342 if (new_thr == __kmp_thread_pool_insert_pt) {
4343 __kmp_thread_pool_insert_pt = NULL;
4345 TCW_4(new_thr->th.th_in_pool, FALSE);
4346 __kmp_suspend_initialize_thread(new_thr);
4347 __kmp_lock_suspend_mx(new_thr);
4348 if (new_thr->th.th_active_in_pool == TRUE) {
4349 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4350 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4351 new_thr->th.th_active_in_pool = FALSE;
4353 __kmp_unlock_suspend_mx(new_thr);
4355 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4356 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4357 KMP_ASSERT(!new_thr->th.th_team);
4358 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4361 __kmp_initialize_info(new_thr, team, new_tid,
4362 new_thr->th.th_info.ds.ds_gtid);
4363 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4365 TCW_4(__kmp_nth, __kmp_nth + 1);
4367 new_thr->th.th_task_state = 0;
4368 new_thr->th.th_task_state_top = 0;
4369 new_thr->th.th_task_state_stack_sz = 4;
4371 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4373 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4377 #ifdef KMP_ADJUST_BLOCKTIME
4380 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4381 if (__kmp_nth > __kmp_avail_proc) {
4382 __kmp_zero_bt = TRUE;
4391 kmp_balign_t *balign = new_thr->th.th_bar;
4392 for (b = 0; b < bs_last_barrier; ++b)
4393 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4396 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4397 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4404 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4405 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4410 if (!TCR_4(__kmp_init_monitor)) {
4411 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4412 if (!TCR_4(__kmp_init_monitor)) {
4413 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4414 TCW_4(__kmp_init_monitor, 1);
4415 __kmp_create_monitor(&__kmp_monitor);
4416 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4427 while (TCR_4(__kmp_init_monitor) < 2) {
4430 KF_TRACE(10, (
"after monitor thread has started\n"));
4433 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4440 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4442 : __kmp_hidden_helper_threads_num + 1;
4444 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4446 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4449 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4450 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4455 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4457 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4459 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4462 __itt_suppress_mark_range(
4463 __itt_suppress_range, __itt_suppress_threading_errors,
4464 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4465 __itt_suppress_mark_range(
4466 __itt_suppress_range, __itt_suppress_threading_errors,
4467 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4469 __itt_suppress_mark_range(
4470 __itt_suppress_range, __itt_suppress_threading_errors,
4471 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4473 __itt_suppress_mark_range(__itt_suppress_range,
4474 __itt_suppress_threading_errors,
4475 &new_thr->th.th_suspend_init_count,
4476 sizeof(new_thr->th.th_suspend_init_count));
4479 __itt_suppress_mark_range(__itt_suppress_range,
4480 __itt_suppress_threading_errors,
4481 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4482 sizeof(new_thr->th.th_bar[0].bb.b_go));
4483 __itt_suppress_mark_range(__itt_suppress_range,
4484 __itt_suppress_threading_errors,
4485 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4486 sizeof(new_thr->th.th_bar[1].bb.b_go));
4487 __itt_suppress_mark_range(__itt_suppress_range,
4488 __itt_suppress_threading_errors,
4489 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4490 sizeof(new_thr->th.th_bar[2].bb.b_go));
4492 if (__kmp_storage_map) {
4493 __kmp_print_thread_storage_map(new_thr, new_gtid);
4498 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4499 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4500 new_thr->th.th_serial_team = serial_team =
4501 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4505 proc_bind_default, &r_icvs,
4506 0 USE_NESTED_HOT_ARG(NULL));
4508 KMP_ASSERT(serial_team);
4509 serial_team->t.t_serialized = 0;
4511 serial_team->t.t_threads[0] = new_thr;
4513 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4517 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4520 __kmp_initialize_fast_memory(new_thr);
4524 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4525 __kmp_initialize_bget(new_thr);
4528 __kmp_init_random(new_thr);
4532 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4533 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4536 kmp_balign_t *balign = new_thr->th.th_bar;
4537 for (b = 0; b < bs_last_barrier; ++b) {
4538 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4539 balign[b].bb.team = NULL;
4540 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4541 balign[b].bb.use_oncore_barrier = 0;
4544 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4545 new_thr->th.th_sleep_loc_type = flag_unset;
4547 new_thr->th.th_spin_here = FALSE;
4548 new_thr->th.th_next_waiting = 0;
4550 new_thr->th.th_blocking =
false;
4553 #if KMP_AFFINITY_SUPPORTED
4554 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4555 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4556 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4557 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4559 new_thr->th.th_def_allocator = __kmp_def_allocator;
4560 new_thr->th.th_prev_level = 0;
4561 new_thr->th.th_prev_num_threads = 1;
4563 TCW_4(new_thr->th.th_in_pool, FALSE);
4564 new_thr->th.th_active_in_pool = FALSE;
4565 TCW_4(new_thr->th.th_active, TRUE);
4573 if (__kmp_adjust_gtid_mode) {
4574 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4575 if (TCR_4(__kmp_gtid_mode) != 2) {
4576 TCW_4(__kmp_gtid_mode, 2);
4579 if (TCR_4(__kmp_gtid_mode) != 1) {
4580 TCW_4(__kmp_gtid_mode, 1);
4585 #ifdef KMP_ADJUST_BLOCKTIME
4588 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4589 if (__kmp_nth > __kmp_avail_proc) {
4590 __kmp_zero_bt = TRUE;
4597 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4598 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4600 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4602 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4613 static void __kmp_reinitialize_team(kmp_team_t *team,
4614 kmp_internal_control_t *new_icvs,
4616 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4617 team->t.t_threads[0], team));
4618 KMP_DEBUG_ASSERT(team && new_icvs);
4619 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4620 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4622 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4624 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4625 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4627 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4628 team->t.t_threads[0], team));
4634 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4635 kmp_internal_control_t *new_icvs,
4637 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4640 KMP_DEBUG_ASSERT(team);
4641 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4642 KMP_DEBUG_ASSERT(team->t.t_threads);
4645 team->t.t_master_tid = 0;
4647 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4648 team->t.t_nproc = new_nproc;
4651 team->t.t_next_pool = NULL;
4655 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4656 team->t.t_invoke = NULL;
4659 team->t.t_sched.sched = new_icvs->sched.sched;
4661 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4662 team->t.t_fp_control_saved = FALSE;
4663 team->t.t_x87_fpu_control_word = 0;
4664 team->t.t_mxcsr = 0;
4667 team->t.t_construct = 0;
4669 team->t.t_ordered.dt.t_value = 0;
4670 team->t.t_master_active = FALSE;
4673 team->t.t_copypriv_data = NULL;
4676 team->t.t_copyin_counter = 0;
4679 team->t.t_control_stack_top = NULL;
4681 __kmp_reinitialize_team(team, new_icvs, loc);
4684 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4687 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4690 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4691 if (KMP_AFFINITY_CAPABLE()) {
4693 if (old_mask != NULL) {
4694 status = __kmp_get_system_affinity(old_mask, TRUE);
4697 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4701 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4706 #if KMP_AFFINITY_SUPPORTED
4712 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4714 if (KMP_HIDDEN_HELPER_TEAM(team))
4717 kmp_info_t *master_th = team->t.t_threads[0];
4718 KMP_DEBUG_ASSERT(master_th != NULL);
4719 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4720 int first_place = master_th->th.th_first_place;
4721 int last_place = master_th->th.th_last_place;
4722 int masters_place = master_th->th.th_current_place;
4723 team->t.t_first_place = first_place;
4724 team->t.t_last_place = last_place;
4726 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4727 "bound to place %d partition = [%d,%d]\n",
4728 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4729 team->t.t_id, masters_place, first_place, last_place));
4731 switch (proc_bind) {
4733 case proc_bind_default:
4736 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4739 case proc_bind_primary: {
4741 int n_th = team->t.t_nproc;
4742 for (f = 1; f < n_th; f++) {
4743 kmp_info_t *th = team->t.t_threads[f];
4744 KMP_DEBUG_ASSERT(th != NULL);
4745 th->th.th_first_place = first_place;
4746 th->th.th_last_place = last_place;
4747 th->th.th_new_place = masters_place;
4748 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4749 team->t.t_display_affinity != 1) {
4750 team->t.t_display_affinity = 1;
4753 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4754 "partition = [%d,%d]\n",
4755 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4756 f, masters_place, first_place, last_place));
4760 case proc_bind_close: {
4762 int n_th = team->t.t_nproc;
4764 if (first_place <= last_place) {
4765 n_places = last_place - first_place + 1;
4767 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4769 if (n_th <= n_places) {
4770 int place = masters_place;
4771 for (f = 1; f < n_th; f++) {
4772 kmp_info_t *th = team->t.t_threads[f];
4773 KMP_DEBUG_ASSERT(th != NULL);
4775 if (place == last_place) {
4776 place = first_place;
4777 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4782 th->th.th_first_place = first_place;
4783 th->th.th_last_place = last_place;
4784 th->th.th_new_place = place;
4785 if (__kmp_display_affinity && place != th->th.th_current_place &&
4786 team->t.t_display_affinity != 1) {
4787 team->t.t_display_affinity = 1;
4790 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4791 "partition = [%d,%d]\n",
4792 __kmp_gtid_from_thread(team->t.t_threads[f]),
4793 team->t.t_id, f, place, first_place, last_place));
4796 int S, rem, gap, s_count;
4797 S = n_th / n_places;
4799 rem = n_th - (S * n_places);
4800 gap = rem > 0 ? n_places / rem : n_places;
4801 int place = masters_place;
4803 for (f = 0; f < n_th; f++) {
4804 kmp_info_t *th = team->t.t_threads[f];
4805 KMP_DEBUG_ASSERT(th != NULL);
4807 th->th.th_first_place = first_place;
4808 th->th.th_last_place = last_place;
4809 th->th.th_new_place = place;
4810 if (__kmp_display_affinity && place != th->th.th_current_place &&
4811 team->t.t_display_affinity != 1) {
4812 team->t.t_display_affinity = 1;
4816 if ((s_count == S) && rem && (gap_ct == gap)) {
4818 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4820 if (place == last_place) {
4821 place = first_place;
4822 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4830 }
else if (s_count == S) {
4831 if (place == last_place) {
4832 place = first_place;
4833 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4843 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4844 "partition = [%d,%d]\n",
4845 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4846 th->th.th_new_place, first_place, last_place));
4848 KMP_DEBUG_ASSERT(place == masters_place);
4852 case proc_bind_spread: {
4854 int n_th = team->t.t_nproc;
4857 if (first_place <= last_place) {
4858 n_places = last_place - first_place + 1;
4860 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4862 if (n_th <= n_places) {
4865 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4866 int S = n_places / n_th;
4867 int s_count, rem, gap, gap_ct;
4869 place = masters_place;
4870 rem = n_places - n_th * S;
4871 gap = rem ? n_th / rem : 1;
4874 if (update_master_only == 1)
4876 for (f = 0; f < thidx; f++) {
4877 kmp_info_t *th = team->t.t_threads[f];
4878 KMP_DEBUG_ASSERT(th != NULL);
4880 th->th.th_first_place = place;
4881 th->th.th_new_place = place;
4882 if (__kmp_display_affinity && place != th->th.th_current_place &&
4883 team->t.t_display_affinity != 1) {
4884 team->t.t_display_affinity = 1;
4887 while (s_count < S) {
4888 if (place == last_place) {
4889 place = first_place;
4890 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4897 if (rem && (gap_ct == gap)) {
4898 if (place == last_place) {
4899 place = first_place;
4900 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4908 th->th.th_last_place = place;
4911 if (place == last_place) {
4912 place = first_place;
4913 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4920 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4921 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4922 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4923 f, th->th.th_new_place, th->th.th_first_place,
4924 th->th.th_last_place, __kmp_affinity_num_masks));
4930 double current =
static_cast<double>(masters_place);
4932 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4937 if (update_master_only == 1)
4939 for (f = 0; f < thidx; f++) {
4940 first =
static_cast<int>(current);
4941 last =
static_cast<int>(current + spacing) - 1;
4942 KMP_DEBUG_ASSERT(last >= first);
4943 if (first >= n_places) {
4944 if (masters_place) {
4947 if (first == (masters_place + 1)) {
4948 KMP_DEBUG_ASSERT(f == n_th);
4951 if (last == masters_place) {
4952 KMP_DEBUG_ASSERT(f == (n_th - 1));
4956 KMP_DEBUG_ASSERT(f == n_th);
4961 if (last >= n_places) {
4962 last = (n_places - 1);
4967 KMP_DEBUG_ASSERT(0 <= first);
4968 KMP_DEBUG_ASSERT(n_places > first);
4969 KMP_DEBUG_ASSERT(0 <= last);
4970 KMP_DEBUG_ASSERT(n_places > last);
4971 KMP_DEBUG_ASSERT(last_place >= first_place);
4972 th = team->t.t_threads[f];
4973 KMP_DEBUG_ASSERT(th);
4974 th->th.th_first_place = first;
4975 th->th.th_new_place = place;
4976 th->th.th_last_place = last;
4977 if (__kmp_display_affinity && place != th->th.th_current_place &&
4978 team->t.t_display_affinity != 1) {
4979 team->t.t_display_affinity = 1;
4982 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4983 "partition = [%d,%d], spacing = %.4f\n",
4984 __kmp_gtid_from_thread(team->t.t_threads[f]),
4985 team->t.t_id, f, th->th.th_new_place,
4986 th->th.th_first_place, th->th.th_last_place, spacing));
4990 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4992 int S, rem, gap, s_count;
4993 S = n_th / n_places;
4995 rem = n_th - (S * n_places);
4996 gap = rem > 0 ? n_places / rem : n_places;
4997 int place = masters_place;
5000 if (update_master_only == 1)
5002 for (f = 0; f < thidx; f++) {
5003 kmp_info_t *th = team->t.t_threads[f];
5004 KMP_DEBUG_ASSERT(th != NULL);
5006 th->th.th_first_place = place;
5007 th->th.th_last_place = place;
5008 th->th.th_new_place = place;
5009 if (__kmp_display_affinity && place != th->th.th_current_place &&
5010 team->t.t_display_affinity != 1) {
5011 team->t.t_display_affinity = 1;
5015 if ((s_count == S) && rem && (gap_ct == gap)) {
5017 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5019 if (place == last_place) {
5020 place = first_place;
5021 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5029 }
else if (s_count == S) {
5030 if (place == last_place) {
5031 place = first_place;
5032 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5041 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5042 "partition = [%d,%d]\n",
5043 __kmp_gtid_from_thread(team->t.t_threads[f]),
5044 team->t.t_id, f, th->th.th_new_place,
5045 th->th.th_first_place, th->th.th_last_place));
5047 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5055 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5058 #endif // KMP_AFFINITY_SUPPORTED
5063 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5065 ompt_data_t ompt_parallel_data,
5067 kmp_proc_bind_t new_proc_bind,
5068 kmp_internal_control_t *new_icvs,
5069 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5070 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5073 int use_hot_team = !root->r.r_active;
5075 int do_place_partition = 1;
5077 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5078 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5079 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5082 #if KMP_NESTED_HOT_TEAMS
5083 kmp_hot_team_ptr_t *hot_teams;
5085 team = master->th.th_team;
5086 level = team->t.t_active_level;
5087 if (master->th.th_teams_microtask) {
5088 if (master->th.th_teams_size.nteams > 1 &&
5091 (microtask_t)__kmp_teams_master ||
5092 master->th.th_teams_level <
5099 if ((master->th.th_teams_size.nteams == 1 &&
5100 master->th.th_teams_level >= team->t.t_level) ||
5101 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5102 do_place_partition = 0;
5104 hot_teams = master->th.th_hot_teams;
5105 if (level < __kmp_hot_teams_max_level && hot_teams &&
5106 hot_teams[level].hot_team) {
5114 KMP_DEBUG_ASSERT(new_nproc == 1);
5118 if (use_hot_team && new_nproc > 1) {
5119 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5120 #if KMP_NESTED_HOT_TEAMS
5121 team = hot_teams[level].hot_team;
5123 team = root->r.r_hot_team;
5126 if (__kmp_tasking_mode != tskm_immediate_exec) {
5127 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5128 "task_team[1] = %p before reinit\n",
5129 team->t.t_task_team[0], team->t.t_task_team[1]));
5133 if (team->t.t_nproc != new_nproc &&
5134 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5136 int old_nthr = team->t.t_nproc;
5137 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5142 if (do_place_partition == 0)
5143 team->t.t_proc_bind = proc_bind_default;
5147 if (team->t.t_nproc == new_nproc) {
5148 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5151 if (team->t.t_size_changed == -1) {
5152 team->t.t_size_changed = 1;
5154 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5158 kmp_r_sched_t new_sched = new_icvs->sched;
5160 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5162 __kmp_reinitialize_team(team, new_icvs,
5163 root->r.r_uber_thread->th.th_ident);
5165 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5166 team->t.t_threads[0], team));
5167 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5169 #if KMP_AFFINITY_SUPPORTED
5170 if ((team->t.t_size_changed == 0) &&
5171 (team->t.t_proc_bind == new_proc_bind)) {
5172 if (new_proc_bind == proc_bind_spread) {
5173 if (do_place_partition) {
5175 __kmp_partition_places(team, 1);
5178 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5179 "proc_bind = %d, partition = [%d,%d]\n",
5180 team->t.t_id, new_proc_bind, team->t.t_first_place,
5181 team->t.t_last_place));
5183 if (do_place_partition) {
5184 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5185 __kmp_partition_places(team);
5189 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5191 }
else if (team->t.t_nproc > new_nproc) {
5193 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5196 team->t.t_size_changed = 1;
5197 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5200 __kmp_add_threads_to_team(team, new_nproc);
5202 #if KMP_NESTED_HOT_TEAMS
5203 if (__kmp_hot_teams_mode == 0) {
5206 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5207 hot_teams[level].hot_team_nth = new_nproc;
5208 #endif // KMP_NESTED_HOT_TEAMS
5210 for (f = new_nproc; f < team->t.t_nproc; f++) {
5211 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5212 if (__kmp_tasking_mode != tskm_immediate_exec) {
5215 team->t.t_threads[f]->th.th_task_team = NULL;
5217 __kmp_free_thread(team->t.t_threads[f]);
5218 team->t.t_threads[f] = NULL;
5220 #if KMP_NESTED_HOT_TEAMS
5225 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5226 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5227 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5228 for (
int b = 0; b < bs_last_barrier; ++b) {
5229 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5230 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5232 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5236 #endif // KMP_NESTED_HOT_TEAMS
5237 team->t.t_nproc = new_nproc;
5239 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5240 __kmp_reinitialize_team(team, new_icvs,
5241 root->r.r_uber_thread->th.th_ident);
5244 for (f = 0; f < new_nproc; ++f) {
5245 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5250 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5251 team->t.t_threads[0], team));
5253 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5256 for (f = 0; f < team->t.t_nproc; f++) {
5257 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5258 team->t.t_threads[f]->th.th_team_nproc ==
5263 if (do_place_partition) {
5264 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5265 #if KMP_AFFINITY_SUPPORTED
5266 __kmp_partition_places(team);
5270 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5271 kmp_affin_mask_t *old_mask;
5272 if (KMP_AFFINITY_CAPABLE()) {
5273 KMP_CPU_ALLOC(old_mask);
5278 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5280 int old_nproc = team->t.t_nproc;
5281 team->t.t_size_changed = 1;
5283 #if KMP_NESTED_HOT_TEAMS
5284 int avail_threads = hot_teams[level].hot_team_nth;
5285 if (new_nproc < avail_threads)
5286 avail_threads = new_nproc;
5287 kmp_info_t **other_threads = team->t.t_threads;
5288 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5292 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5293 for (b = 0; b < bs_last_barrier; ++b) {
5294 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5295 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5297 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5301 if (hot_teams[level].hot_team_nth >= new_nproc) {
5304 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5305 team->t.t_nproc = new_nproc;
5309 team->t.t_nproc = hot_teams[level].hot_team_nth;
5310 hot_teams[level].hot_team_nth = new_nproc;
5311 #endif // KMP_NESTED_HOT_TEAMS
5312 if (team->t.t_max_nproc < new_nproc) {
5314 __kmp_reallocate_team_arrays(team, new_nproc);
5315 __kmp_reinitialize_team(team, new_icvs, NULL);
5318 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5324 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5328 for (f = team->t.t_nproc; f < new_nproc; f++) {
5329 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5330 KMP_DEBUG_ASSERT(new_worker);
5331 team->t.t_threads[f] = new_worker;
5334 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5335 "join=%llu, plain=%llu\n",
5336 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5337 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5338 team->t.t_bar[bs_plain_barrier].b_arrived));
5342 kmp_balign_t *balign = new_worker->th.th_bar;
5343 for (b = 0; b < bs_last_barrier; ++b) {
5344 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5345 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5346 KMP_BARRIER_PARENT_FLAG);
5348 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5354 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5355 if (KMP_AFFINITY_CAPABLE()) {
5357 __kmp_set_system_affinity(old_mask, TRUE);
5358 KMP_CPU_FREE(old_mask);
5361 #if KMP_NESTED_HOT_TEAMS
5363 #endif // KMP_NESTED_HOT_TEAMS
5364 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5367 __kmp_add_threads_to_team(team, new_nproc);
5371 __kmp_initialize_team(team, new_nproc, new_icvs,
5372 root->r.r_uber_thread->th.th_ident);
5375 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5376 for (f = 0; f < team->t.t_nproc; ++f)
5377 __kmp_initialize_info(team->t.t_threads[f], team, f,
5378 __kmp_gtid_from_tid(f, team));
5386 for (f = old_nproc; f < team->t.t_nproc; ++f)
5387 team->t.t_threads[f]->th.th_task_state =
5388 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5391 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5392 for (f = old_nproc; f < team->t.t_nproc; ++f)
5393 team->t.t_threads[f]->th.th_task_state = old_state;
5397 for (f = 0; f < team->t.t_nproc; ++f) {
5398 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5399 team->t.t_threads[f]->th.th_team_nproc ==
5404 if (do_place_partition) {
5405 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5406 #if KMP_AFFINITY_SUPPORTED
5407 __kmp_partition_places(team);
5412 kmp_info_t *master = team->t.t_threads[0];
5413 if (master->th.th_teams_microtask) {
5414 for (f = 1; f < new_nproc; ++f) {
5416 kmp_info_t *thr = team->t.t_threads[f];
5417 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5418 thr->th.th_teams_level = master->th.th_teams_level;
5419 thr->th.th_teams_size = master->th.th_teams_size;
5422 #if KMP_NESTED_HOT_TEAMS
5426 for (f = 1; f < new_nproc; ++f) {
5427 kmp_info_t *thr = team->t.t_threads[f];
5429 kmp_balign_t *balign = thr->th.th_bar;
5430 for (b = 0; b < bs_last_barrier; ++b) {
5431 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5432 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5434 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5439 #endif // KMP_NESTED_HOT_TEAMS
5442 __kmp_alloc_argv_entries(argc, team, TRUE);
5443 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5447 KF_TRACE(10, (
" hot_team = %p\n", team));
5450 if (__kmp_tasking_mode != tskm_immediate_exec) {
5451 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5452 "task_team[1] = %p after reinit\n",
5453 team->t.t_task_team[0], team->t.t_task_team[1]));
5458 __ompt_team_assign_id(team, ompt_parallel_data);
5468 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5471 if (team->t.t_max_nproc >= max_nproc) {
5473 __kmp_team_pool = team->t.t_next_pool;
5475 if (max_nproc > 1 &&
5476 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5478 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5483 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5485 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5486 "task_team[1] %p to NULL\n",
5487 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5488 team->t.t_task_team[0] = NULL;
5489 team->t.t_task_team[1] = NULL;
5492 __kmp_alloc_argv_entries(argc, team, TRUE);
5493 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5496 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5497 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5500 for (b = 0; b < bs_last_barrier; ++b) {
5501 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5503 team->t.t_bar[b].b_master_arrived = 0;
5504 team->t.t_bar[b].b_team_arrived = 0;
5509 team->t.t_proc_bind = new_proc_bind;
5511 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5515 __ompt_team_assign_id(team, ompt_parallel_data);
5527 team = __kmp_reap_team(team);
5528 __kmp_team_pool = team;
5533 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5536 team->t.t_max_nproc = max_nproc;
5537 if (max_nproc > 1 &&
5538 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5540 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5545 __kmp_allocate_team_arrays(team, max_nproc);
5547 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5548 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5550 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5552 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5553 team->t.t_task_team[0] = NULL;
5555 team->t.t_task_team[1] = NULL;
5558 if (__kmp_storage_map) {
5559 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5563 __kmp_alloc_argv_entries(argc, team, FALSE);
5564 team->t.t_argc = argc;
5567 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5568 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5571 for (b = 0; b < bs_last_barrier; ++b) {
5572 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5574 team->t.t_bar[b].b_master_arrived = 0;
5575 team->t.t_bar[b].b_team_arrived = 0;
5580 team->t.t_proc_bind = new_proc_bind;
5583 __ompt_team_assign_id(team, ompt_parallel_data);
5584 team->t.ompt_serialized_team_info = NULL;
5589 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5600 void __kmp_free_team(kmp_root_t *root,
5601 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5603 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5607 KMP_DEBUG_ASSERT(root);
5608 KMP_DEBUG_ASSERT(team);
5609 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5610 KMP_DEBUG_ASSERT(team->t.t_threads);
5612 int use_hot_team = team == root->r.r_hot_team;
5613 #if KMP_NESTED_HOT_TEAMS
5616 level = team->t.t_active_level - 1;
5617 if (master->th.th_teams_microtask) {
5618 if (master->th.th_teams_size.nteams > 1) {
5622 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5623 master->th.th_teams_level == team->t.t_level) {
5629 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5631 if (level < __kmp_hot_teams_max_level) {
5632 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5636 #endif // KMP_NESTED_HOT_TEAMS
5639 TCW_SYNC_PTR(team->t.t_pkfn,
5642 team->t.t_copyin_counter = 0;
5647 if (!use_hot_team) {
5648 if (__kmp_tasking_mode != tskm_immediate_exec) {
5650 for (f = 1; f < team->t.t_nproc; ++f) {
5651 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5652 kmp_info_t *th = team->t.t_threads[f];
5653 volatile kmp_uint32 *state = &th->th.th_reap_state;
5654 while (*state != KMP_SAFE_TO_REAP) {
5658 if (!__kmp_is_thread_alive(th, &ecode)) {
5659 *state = KMP_SAFE_TO_REAP;
5664 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5665 if (fl.is_sleeping())
5666 fl.resume(__kmp_gtid_from_thread(th));
5673 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5674 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5675 if (task_team != NULL) {
5676 for (f = 0; f < team->t.t_nproc; ++f) {
5677 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5678 team->t.t_threads[f]->th.th_task_team = NULL;
5682 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5683 __kmp_get_gtid(), task_team, team->t.t_id));
5684 #if KMP_NESTED_HOT_TEAMS
5685 __kmp_free_task_team(master, task_team);
5687 team->t.t_task_team[tt_idx] = NULL;
5693 team->t.t_parent = NULL;
5694 team->t.t_level = 0;
5695 team->t.t_active_level = 0;
5698 for (f = 1; f < team->t.t_nproc; ++f) {
5699 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5700 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5701 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5704 __kmp_free_thread(team->t.t_threads[f]);
5707 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5710 team->t.b->go_release();
5711 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5712 for (f = 1; f < team->t.t_nproc; ++f) {
5713 if (team->t.b->sleep[f].sleep) {
5714 __kmp_atomic_resume_64(
5715 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5716 (kmp_atomic_flag_64<> *)NULL);
5721 for (
int f = 1; f < team->t.t_nproc; ++f) {
5722 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5728 for (f = 1; f < team->t.t_nproc; ++f) {
5729 team->t.t_threads[f] = NULL;
5732 if (team->t.t_max_nproc > 1 &&
5733 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5734 distributedBarrier::deallocate(team->t.b);
5739 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5740 __kmp_team_pool = (
volatile kmp_team_t *)team;
5743 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5744 team->t.t_threads[1]->th.th_cg_roots);
5745 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5747 for (f = 1; f < team->t.t_nproc; ++f) {
5748 kmp_info_t *thr = team->t.t_threads[f];
5749 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5750 thr->th.th_cg_roots->cg_root == thr);
5752 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5753 thr->th.th_cg_roots = tmp->up;
5754 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5755 " up to node %p. cg_nthreads was %d\n",
5756 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5757 int i = tmp->cg_nthreads--;
5762 if (thr->th.th_cg_roots)
5763 thr->th.th_current_task->td_icvs.thread_limit =
5764 thr->th.th_cg_roots->cg_thread_limit;
5773 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5774 kmp_team_t *next_pool = team->t.t_next_pool;
5776 KMP_DEBUG_ASSERT(team);
5777 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5778 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5779 KMP_DEBUG_ASSERT(team->t.t_threads);
5780 KMP_DEBUG_ASSERT(team->t.t_argv);
5785 __kmp_free_team_arrays(team);
5786 if (team->t.t_argv != &team->t.t_inline_argv[0])
5787 __kmp_free((
void *)team->t.t_argv);
5819 void __kmp_free_thread(kmp_info_t *this_th) {
5823 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5824 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5826 KMP_DEBUG_ASSERT(this_th);
5831 kmp_balign_t *balign = this_th->th.th_bar;
5832 for (b = 0; b < bs_last_barrier; ++b) {
5833 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5834 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5835 balign[b].bb.team = NULL;
5836 balign[b].bb.leaf_kids = 0;
5838 this_th->th.th_task_state = 0;
5839 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5842 TCW_PTR(this_th->th.th_team, NULL);
5843 TCW_PTR(this_th->th.th_root, NULL);
5844 TCW_PTR(this_th->th.th_dispatch, NULL);
5846 while (this_th->th.th_cg_roots) {
5847 this_th->th.th_cg_roots->cg_nthreads--;
5848 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5849 " %p of thread %p to %d\n",
5850 this_th, this_th->th.th_cg_roots,
5851 this_th->th.th_cg_roots->cg_root,
5852 this_th->th.th_cg_roots->cg_nthreads));
5853 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5854 if (tmp->cg_root == this_th) {
5855 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5857 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5858 this_th->th.th_cg_roots = tmp->up;
5861 if (tmp->cg_nthreads == 0) {
5864 this_th->th.th_cg_roots = NULL;
5874 __kmp_free_implicit_task(this_th);
5875 this_th->th.th_current_task = NULL;
5879 gtid = this_th->th.th_info.ds.ds_gtid;
5880 if (__kmp_thread_pool_insert_pt != NULL) {
5881 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5882 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5883 __kmp_thread_pool_insert_pt = NULL;
5892 if (__kmp_thread_pool_insert_pt != NULL) {
5893 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5895 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5897 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5898 scan = &((*scan)->th.th_next_pool))
5903 TCW_PTR(this_th->th.th_next_pool, *scan);
5904 __kmp_thread_pool_insert_pt = *scan = this_th;
5905 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5906 (this_th->th.th_info.ds.ds_gtid <
5907 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5908 TCW_4(this_th->th.th_in_pool, TRUE);
5909 __kmp_suspend_initialize_thread(this_th);
5910 __kmp_lock_suspend_mx(this_th);
5911 if (this_th->th.th_active == TRUE) {
5912 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5913 this_th->th.th_active_in_pool = TRUE;
5917 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5920 __kmp_unlock_suspend_mx(this_th);
5922 TCW_4(__kmp_nth, __kmp_nth - 1);
5924 #ifdef KMP_ADJUST_BLOCKTIME
5927 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5928 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5929 if (__kmp_nth <= __kmp_avail_proc) {
5930 __kmp_zero_bt = FALSE;
5940 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5941 #if OMP_PROFILING_SUPPORT
5942 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5944 if (ProfileTraceFile)
5945 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5948 int gtid = this_thr->th.th_info.ds.ds_gtid;
5950 kmp_team_t **
volatile pteam;
5953 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5955 if (__kmp_env_consistency_check) {
5956 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5960 if (ompd_state & OMPD_ENABLE_BP)
5961 ompd_bp_thread_begin();
5965 ompt_data_t *thread_data =
nullptr;
5966 if (ompt_enabled.enabled) {
5967 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5968 *thread_data = ompt_data_none;
5970 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5971 this_thr->th.ompt_thread_info.wait_id = 0;
5972 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5973 this_thr->th.ompt_thread_info.parallel_flags = 0;
5974 if (ompt_enabled.ompt_callback_thread_begin) {
5975 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5976 ompt_thread_worker, thread_data);
5978 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5983 while (!TCR_4(__kmp_global.g.g_done)) {
5984 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5988 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5991 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5994 if (ompt_enabled.enabled) {
5995 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5999 pteam = &this_thr->th.th_team;
6002 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6004 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6007 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6008 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6009 (*pteam)->t.t_pkfn));
6011 updateHWFPControl(*pteam);
6014 if (ompt_enabled.enabled) {
6015 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6019 rc = (*pteam)->t.t_invoke(gtid);
6023 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6024 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6025 (*pteam)->t.t_pkfn));
6028 if (ompt_enabled.enabled) {
6030 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6032 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6036 __kmp_join_barrier(gtid);
6039 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6042 if (ompd_state & OMPD_ENABLE_BP)
6043 ompd_bp_thread_end();
6047 if (ompt_enabled.ompt_callback_thread_end) {
6048 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6052 this_thr->th.th_task_team = NULL;
6054 __kmp_common_destroy_gtid(gtid);
6056 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6059 #if OMP_PROFILING_SUPPORT
6060 llvm::timeTraceProfilerFinishThread();
6067 void __kmp_internal_end_dest(
void *specific_gtid) {
6070 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6072 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6076 __kmp_internal_end_thread(gtid);
6079 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6081 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6082 __kmp_internal_end_atexit();
6089 void __kmp_internal_end_atexit(
void) {
6090 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6114 __kmp_internal_end_library(-1);
6116 __kmp_close_console();
6120 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6125 KMP_DEBUG_ASSERT(thread != NULL);
6127 gtid = thread->th.th_info.ds.ds_gtid;
6130 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6133 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6135 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6137 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6139 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6143 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6145 __kmp_release_64(&flag);
6150 __kmp_reap_worker(thread);
6162 if (thread->th.th_active_in_pool) {
6163 thread->th.th_active_in_pool = FALSE;
6164 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6165 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6169 __kmp_free_implicit_task(thread);
6173 __kmp_free_fast_memory(thread);
6176 __kmp_suspend_uninitialize_thread(thread);
6178 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6179 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6184 #ifdef KMP_ADJUST_BLOCKTIME
6187 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6188 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6189 if (__kmp_nth <= __kmp_avail_proc) {
6190 __kmp_zero_bt = FALSE;
6196 if (__kmp_env_consistency_check) {
6197 if (thread->th.th_cons) {
6198 __kmp_free_cons_stack(thread->th.th_cons);
6199 thread->th.th_cons = NULL;
6203 if (thread->th.th_pri_common != NULL) {
6204 __kmp_free(thread->th.th_pri_common);
6205 thread->th.th_pri_common = NULL;
6208 if (thread->th.th_task_state_memo_stack != NULL) {
6209 __kmp_free(thread->th.th_task_state_memo_stack);
6210 thread->th.th_task_state_memo_stack = NULL;
6214 if (thread->th.th_local.bget_data != NULL) {
6215 __kmp_finalize_bget(thread);
6219 #if KMP_AFFINITY_SUPPORTED
6220 if (thread->th.th_affin_mask != NULL) {
6221 KMP_CPU_FREE(thread->th.th_affin_mask);
6222 thread->th.th_affin_mask = NULL;
6226 #if KMP_USE_HIER_SCHED
6227 if (thread->th.th_hier_bar_data != NULL) {
6228 __kmp_free(thread->th.th_hier_bar_data);
6229 thread->th.th_hier_bar_data = NULL;
6233 __kmp_reap_team(thread->th.th_serial_team);
6234 thread->th.th_serial_team = NULL;
6241 static void __kmp_itthash_clean(kmp_info_t *th) {
6243 if (__kmp_itt_region_domains.count > 0) {
6244 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6245 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6247 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6248 __kmp_thread_free(th, bucket);
6253 if (__kmp_itt_barrier_domains.count > 0) {
6254 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6255 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6257 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6258 __kmp_thread_free(th, bucket);
6266 static void __kmp_internal_end(
void) {
6270 __kmp_unregister_library();
6277 __kmp_reclaim_dead_roots();
6281 for (i = 0; i < __kmp_threads_capacity; i++)
6283 if (__kmp_root[i]->r.r_active)
6286 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6288 if (i < __kmp_threads_capacity) {
6300 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6301 if (TCR_4(__kmp_init_monitor)) {
6302 __kmp_reap_monitor(&__kmp_monitor);
6303 TCW_4(__kmp_init_monitor, 0);
6305 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6306 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6307 #endif // KMP_USE_MONITOR
6312 for (i = 0; i < __kmp_threads_capacity; i++) {
6313 if (__kmp_root[i]) {
6316 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6325 while (__kmp_thread_pool != NULL) {
6327 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6328 __kmp_thread_pool = thread->th.th_next_pool;
6330 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6331 thread->th.th_next_pool = NULL;
6332 thread->th.th_in_pool = FALSE;
6333 __kmp_reap_thread(thread, 0);
6335 __kmp_thread_pool_insert_pt = NULL;
6338 while (__kmp_team_pool != NULL) {
6340 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6341 __kmp_team_pool = team->t.t_next_pool;
6343 team->t.t_next_pool = NULL;
6344 __kmp_reap_team(team);
6347 __kmp_reap_task_teams();
6354 for (i = 0; i < __kmp_threads_capacity; i++) {
6355 kmp_info_t *thr = __kmp_threads[i];
6356 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6361 for (i = 0; i < __kmp_threads_capacity; ++i) {
6368 TCW_SYNC_4(__kmp_init_common, FALSE);
6370 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6378 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6379 if (TCR_4(__kmp_init_monitor)) {
6380 __kmp_reap_monitor(&__kmp_monitor);
6381 TCW_4(__kmp_init_monitor, 0);
6383 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6384 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6387 TCW_4(__kmp_init_gtid, FALSE);
6396 void __kmp_internal_end_library(
int gtid_req) {
6403 if (__kmp_global.g.g_abort) {
6404 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6408 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6409 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6414 if (TCR_4(__kmp_init_hidden_helper) &&
6415 !TCR_4(__kmp_hidden_helper_team_done)) {
6416 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6418 __kmp_hidden_helper_main_thread_release();
6420 __kmp_hidden_helper_threads_deinitz_wait();
6426 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6428 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6429 if (gtid == KMP_GTID_SHUTDOWN) {
6430 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6431 "already shutdown\n"));
6433 }
else if (gtid == KMP_GTID_MONITOR) {
6434 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6435 "registered, or system shutdown\n"));
6437 }
else if (gtid == KMP_GTID_DNE) {
6438 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6441 }
else if (KMP_UBER_GTID(gtid)) {
6443 if (__kmp_root[gtid]->r.r_active) {
6444 __kmp_global.g.g_abort = -1;
6445 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6446 __kmp_unregister_library();
6448 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6452 __kmp_itthash_clean(__kmp_threads[gtid]);
6455 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6456 __kmp_unregister_root_current_thread(gtid);
6463 #ifdef DUMP_DEBUG_ON_EXIT
6464 if (__kmp_debug_buf)
6465 __kmp_dump_debug_buffer();
6470 __kmp_unregister_library();
6475 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6478 if (__kmp_global.g.g_abort) {
6479 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6481 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6484 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6485 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6494 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6497 __kmp_internal_end();
6499 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6500 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6502 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6504 #ifdef DUMP_DEBUG_ON_EXIT
6505 if (__kmp_debug_buf)
6506 __kmp_dump_debug_buffer();
6510 __kmp_close_console();
6513 __kmp_fini_allocator();
6517 void __kmp_internal_end_thread(
int gtid_req) {
6526 if (__kmp_global.g.g_abort) {
6527 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6531 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6532 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6537 if (TCR_4(__kmp_init_hidden_helper) &&
6538 !TCR_4(__kmp_hidden_helper_team_done)) {
6539 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6541 __kmp_hidden_helper_main_thread_release();
6543 __kmp_hidden_helper_threads_deinitz_wait();
6550 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6552 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6553 if (gtid == KMP_GTID_SHUTDOWN) {
6554 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6555 "already shutdown\n"));
6557 }
else if (gtid == KMP_GTID_MONITOR) {
6558 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6559 "registered, or system shutdown\n"));
6561 }
else if (gtid == KMP_GTID_DNE) {
6562 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6566 }
else if (KMP_UBER_GTID(gtid)) {
6568 if (__kmp_root[gtid]->r.r_active) {
6569 __kmp_global.g.g_abort = -1;
6570 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6572 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6576 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6578 __kmp_unregister_root_current_thread(gtid);
6582 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6585 __kmp_threads[gtid]->th.th_task_team = NULL;
6589 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6595 if (__kmp_pause_status != kmp_hard_paused)
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6604 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6607 if (__kmp_global.g.g_abort) {
6608 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6610 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6613 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6614 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6625 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6627 for (i = 0; i < __kmp_threads_capacity; ++i) {
6628 if (KMP_UBER_GTID(i)) {
6631 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6632 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6633 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6640 __kmp_internal_end();
6642 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6643 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6645 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6647 #ifdef DUMP_DEBUG_ON_EXIT
6648 if (__kmp_debug_buf)
6649 __kmp_dump_debug_buffer();
6656 static long __kmp_registration_flag = 0;
6658 static char *__kmp_registration_str = NULL;
6661 static inline char *__kmp_reg_status_name() {
6667 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6668 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6671 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6675 void __kmp_register_library_startup(
void) {
6677 char *name = __kmp_reg_status_name();
6683 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6684 __kmp_initialize_system_tick();
6686 __kmp_read_system_time(&time.dtime);
6687 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6688 __kmp_registration_str =
6689 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6690 __kmp_registration_flag, KMP_LIBRARY_FILE);
6692 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6693 __kmp_registration_str));
6699 #if defined(KMP_USE_SHM)
6700 char *shm_name = __kmp_str_format(
"/%s", name);
6701 int shm_preexist = 0;
6703 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6704 if ((fd1 == -1) && (errno == EEXIST)) {
6707 fd1 = shm_open(shm_name, O_RDWR, 0666);
6710 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6716 }
else if (fd1 == -1) {
6719 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6722 if (shm_preexist == 0) {
6724 if (ftruncate(fd1, SHM_SIZE) == -1) {
6726 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6727 KMP_ERR(errno), __kmp_msg_null);
6731 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6732 if (data1 == MAP_FAILED) {
6734 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6737 if (shm_preexist == 0) {
6738 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6741 value = __kmp_str_format(
"%s", data1);
6742 munmap(data1, SHM_SIZE);
6744 #else // Windows and unix with static library
6746 __kmp_env_set(name, __kmp_registration_str, 0);
6748 value = __kmp_env_get(name);
6751 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6758 char *flag_addr_str = NULL;
6759 char *flag_val_str = NULL;
6760 char const *file_name = NULL;
6761 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6762 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6765 unsigned long *flag_addr = 0;
6766 unsigned long flag_val = 0;
6767 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6768 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6769 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6773 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6787 file_name =
"unknown library";
6792 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6793 if (!__kmp_str_match_true(duplicate_ok)) {
6795 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6796 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6798 KMP_INTERNAL_FREE(duplicate_ok);
6799 __kmp_duplicate_library_ok = 1;
6804 #if defined(KMP_USE_SHM)
6806 shm_unlink(shm_name);
6809 __kmp_env_unset(name);
6813 KMP_DEBUG_ASSERT(0);
6817 KMP_INTERNAL_FREE((
void *)value);
6818 #if defined(KMP_USE_SHM)
6819 KMP_INTERNAL_FREE((
void *)shm_name);
6822 KMP_INTERNAL_FREE((
void *)name);
6826 void __kmp_unregister_library(
void) {
6828 char *name = __kmp_reg_status_name();
6831 #if defined(KMP_USE_SHM)
6832 char *shm_name = __kmp_str_format(
"/%s", name);
6833 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6838 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6839 if (data1 != MAP_FAILED) {
6840 value = __kmp_str_format(
"%s", data1);
6841 munmap(data1, SHM_SIZE);
6845 value = __kmp_env_get(name);
6848 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6849 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6850 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6852 #if defined(KMP_USE_SHM)
6853 shm_unlink(shm_name);
6855 __kmp_env_unset(name);
6859 #if defined(KMP_USE_SHM)
6860 KMP_INTERNAL_FREE(shm_name);
6863 KMP_INTERNAL_FREE(__kmp_registration_str);
6864 KMP_INTERNAL_FREE(value);
6865 KMP_INTERNAL_FREE(name);
6867 __kmp_registration_flag = 0;
6868 __kmp_registration_str = NULL;
6875 #if KMP_MIC_SUPPORTED
6877 static void __kmp_check_mic_type() {
6878 kmp_cpuid_t cpuid_state = {0};
6879 kmp_cpuid_t *cs_p = &cpuid_state;
6880 __kmp_x86_cpuid(1, 0, cs_p);
6882 if ((cs_p->eax & 0xff0) == 0xB10) {
6883 __kmp_mic_type = mic2;
6884 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6885 __kmp_mic_type = mic3;
6887 __kmp_mic_type = non_mic;
6894 static void __kmp_user_level_mwait_init() {
6895 struct kmp_cpuid buf;
6896 __kmp_x86_cpuid(7, 0, &buf);
6897 __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
6898 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6899 __kmp_umwait_enabled));
6901 #elif KMP_HAVE_MWAIT
6902 #ifndef AT_INTELPHIUSERMWAIT
6905 #define AT_INTELPHIUSERMWAIT 10000
6910 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6911 unsigned long getauxval(
unsigned long) {
return 0; }
6913 static void __kmp_user_level_mwait_init() {
6918 if (__kmp_mic_type == mic3) {
6919 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6920 if ((res & 0x1) || __kmp_user_level_mwait) {
6921 __kmp_mwait_enabled = TRUE;
6922 if (__kmp_user_level_mwait) {
6923 KMP_INFORM(EnvMwaitWarn);
6926 __kmp_mwait_enabled = FALSE;
6929 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6930 "__kmp_mwait_enabled = %d\n",
6931 __kmp_mic_type, __kmp_mwait_enabled));
6935 static void __kmp_do_serial_initialize(
void) {
6939 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6941 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6942 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6943 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6944 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6945 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6955 __kmp_validate_locks();
6958 __kmp_init_allocator();
6963 __kmp_register_library_startup();
6966 if (TCR_4(__kmp_global.g.g_done)) {
6967 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6970 __kmp_global.g.g_abort = 0;
6971 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6974 #if KMP_USE_ADAPTIVE_LOCKS
6975 #if KMP_DEBUG_ADAPTIVE_LOCKS
6976 __kmp_init_speculative_stats();
6979 #if KMP_STATS_ENABLED
6982 __kmp_init_lock(&__kmp_global_lock);
6983 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6984 __kmp_init_lock(&__kmp_debug_lock);
6985 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6986 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6987 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6988 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6989 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6990 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6991 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6992 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6993 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6994 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6995 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6996 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6997 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6998 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6999 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7001 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7003 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7007 __kmp_runtime_initialize();
7009 #if KMP_MIC_SUPPORTED
7010 __kmp_check_mic_type();
7017 __kmp_abort_delay = 0;
7021 __kmp_dflt_team_nth_ub = __kmp_xproc;
7022 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7023 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7025 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7026 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7028 __kmp_max_nth = __kmp_sys_max_nth;
7029 __kmp_cg_max_nth = __kmp_sys_max_nth;
7030 __kmp_teams_max_nth = __kmp_xproc;
7031 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7032 __kmp_teams_max_nth = __kmp_sys_max_nth;
7037 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7039 __kmp_monitor_wakeups =
7040 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7041 __kmp_bt_intervals =
7042 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7045 __kmp_library = library_throughput;
7047 __kmp_static = kmp_sch_static_balanced;
7054 #if KMP_FAST_REDUCTION_BARRIER
7055 #define kmp_reduction_barrier_gather_bb ((int)1)
7056 #define kmp_reduction_barrier_release_bb ((int)1)
7057 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7058 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7059 #endif // KMP_FAST_REDUCTION_BARRIER
7060 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7061 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7062 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7063 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7064 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7065 #if KMP_FAST_REDUCTION_BARRIER
7066 if (i == bs_reduction_barrier) {
7068 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7069 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7070 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7071 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7073 #endif // KMP_FAST_REDUCTION_BARRIER
7075 #if KMP_FAST_REDUCTION_BARRIER
7076 #undef kmp_reduction_barrier_release_pat
7077 #undef kmp_reduction_barrier_gather_pat
7078 #undef kmp_reduction_barrier_release_bb
7079 #undef kmp_reduction_barrier_gather_bb
7080 #endif // KMP_FAST_REDUCTION_BARRIER
7081 #if KMP_MIC_SUPPORTED
7082 if (__kmp_mic_type == mic2) {
7084 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7085 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7087 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7088 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7090 #if KMP_FAST_REDUCTION_BARRIER
7091 if (__kmp_mic_type == mic2) {
7092 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7093 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7095 #endif // KMP_FAST_REDUCTION_BARRIER
7096 #endif // KMP_MIC_SUPPORTED
7100 __kmp_env_checks = TRUE;
7102 __kmp_env_checks = FALSE;
7106 __kmp_foreign_tp = TRUE;
7108 __kmp_global.g.g_dynamic = FALSE;
7109 __kmp_global.g.g_dynamic_mode = dynamic_default;
7111 __kmp_init_nesting_mode();
7113 __kmp_env_initialize(NULL);
7115 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7116 __kmp_user_level_mwait_init();
7120 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7121 if (__kmp_str_match_true(val)) {
7122 kmp_str_buf_t buffer;
7123 __kmp_str_buf_init(&buffer);
7124 __kmp_i18n_dump_catalog(&buffer);
7125 __kmp_printf(
"%s", buffer.str);
7126 __kmp_str_buf_free(&buffer);
7128 __kmp_env_free(&val);
7131 __kmp_threads_capacity =
7132 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7134 __kmp_tp_capacity = __kmp_default_tp_capacity(
7135 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7140 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7141 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7142 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7143 __kmp_thread_pool = NULL;
7144 __kmp_thread_pool_insert_pt = NULL;
7145 __kmp_team_pool = NULL;
7152 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7154 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7155 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7156 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7159 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7161 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7166 gtid = __kmp_register_root(TRUE);
7167 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7168 KMP_ASSERT(KMP_UBER_GTID(gtid));
7169 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7173 __kmp_common_initialize();
7177 __kmp_register_atfork();
7180 #if !KMP_DYNAMIC_LIB
7184 int rc = atexit(__kmp_internal_end_atexit);
7186 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7192 #if KMP_HANDLE_SIGNALS
7198 __kmp_install_signals(FALSE);
7201 __kmp_install_signals(TRUE);
7206 __kmp_init_counter++;
7208 __kmp_init_serial = TRUE;
7210 if (__kmp_settings) {
7214 if (__kmp_display_env || __kmp_display_env_verbose) {
7215 __kmp_env_print_2();
7224 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7227 void __kmp_serial_initialize(
void) {
7228 if (__kmp_init_serial) {
7231 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7232 if (__kmp_init_serial) {
7233 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7236 __kmp_do_serial_initialize();
7237 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7240 static void __kmp_do_middle_initialize(
void) {
7242 int prev_dflt_team_nth;
7244 if (!__kmp_init_serial) {
7245 __kmp_do_serial_initialize();
7248 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7252 prev_dflt_team_nth = __kmp_dflt_team_nth;
7254 #if KMP_AFFINITY_SUPPORTED
7257 __kmp_affinity_initialize();
7261 KMP_ASSERT(__kmp_xproc > 0);
7262 if (__kmp_avail_proc == 0) {
7263 __kmp_avail_proc = __kmp_xproc;
7269 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7270 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7275 if (__kmp_dflt_team_nth == 0) {
7276 #ifdef KMP_DFLT_NTH_CORES
7278 __kmp_dflt_team_nth = __kmp_ncores;
7279 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7280 "__kmp_ncores (%d)\n",
7281 __kmp_dflt_team_nth));
7284 __kmp_dflt_team_nth = __kmp_avail_proc;
7285 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7286 "__kmp_avail_proc(%d)\n",
7287 __kmp_dflt_team_nth));
7291 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7292 __kmp_dflt_team_nth = KMP_MIN_NTH;
7294 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7295 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7298 if (__kmp_nesting_mode > 0)
7299 __kmp_set_nesting_mode_threads();
7303 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7305 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7310 for (i = 0; i < __kmp_threads_capacity; i++) {
7311 kmp_info_t *thread = __kmp_threads[i];
7314 if (thread->th.th_current_task->td_icvs.nproc != 0)
7317 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7322 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7323 __kmp_dflt_team_nth));
7325 #ifdef KMP_ADJUST_BLOCKTIME
7327 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7328 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7329 if (__kmp_nth > __kmp_avail_proc) {
7330 __kmp_zero_bt = TRUE;
7336 TCW_SYNC_4(__kmp_init_middle, TRUE);
7338 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7341 void __kmp_middle_initialize(
void) {
7342 if (__kmp_init_middle) {
7345 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7346 if (__kmp_init_middle) {
7347 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7350 __kmp_do_middle_initialize();
7351 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7354 void __kmp_parallel_initialize(
void) {
7355 int gtid = __kmp_entry_gtid();
7358 if (TCR_4(__kmp_init_parallel))
7360 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7361 if (TCR_4(__kmp_init_parallel)) {
7362 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7367 if (TCR_4(__kmp_global.g.g_done)) {
7370 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7371 __kmp_infinite_loop();
7377 if (!__kmp_init_middle) {
7378 __kmp_do_middle_initialize();
7380 __kmp_assign_root_init_mask();
7381 __kmp_resume_if_hard_paused();
7384 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7385 KMP_ASSERT(KMP_UBER_GTID(gtid));
7387 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7390 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7391 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7392 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7396 #if KMP_HANDLE_SIGNALS
7398 __kmp_install_signals(TRUE);
7402 __kmp_suspend_initialize();
7404 #if defined(USE_LOAD_BALANCE)
7405 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7406 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7409 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7410 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7414 if (__kmp_version) {
7415 __kmp_print_version_2();
7419 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7422 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7424 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7427 void __kmp_hidden_helper_initialize() {
7428 if (TCR_4(__kmp_init_hidden_helper))
7432 if (!TCR_4(__kmp_init_parallel))
7433 __kmp_parallel_initialize();
7437 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7438 if (TCR_4(__kmp_init_hidden_helper)) {
7439 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7444 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7448 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7451 __kmp_do_initialize_hidden_helper_threads();
7454 __kmp_hidden_helper_threads_initz_wait();
7457 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7459 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7464 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7466 kmp_disp_t *dispatch;
7471 this_thr->th.th_local.this_construct = 0;
7472 #if KMP_CACHE_MANAGE
7473 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7475 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7476 KMP_DEBUG_ASSERT(dispatch);
7477 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7481 dispatch->th_disp_index = 0;
7482 dispatch->th_doacross_buf_idx = 0;
7483 if (__kmp_env_consistency_check)
7484 __kmp_push_parallel(gtid, team->t.t_ident);
7489 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7491 if (__kmp_env_consistency_check)
7492 __kmp_pop_parallel(gtid, team->t.t_ident);
7494 __kmp_finish_implicit_task(this_thr);
7497 int __kmp_invoke_task_func(
int gtid) {
7499 int tid = __kmp_tid_from_gtid(gtid);
7500 kmp_info_t *this_thr = __kmp_threads[gtid];
7501 kmp_team_t *team = this_thr->th.th_team;
7503 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7505 if (__itt_stack_caller_create_ptr) {
7507 if (team->t.t_stack_id != NULL) {
7508 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7510 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7511 __kmp_itt_stack_callee_enter(
7512 (__itt_caller)team->t.t_parent->t.t_stack_id);
7516 #if INCLUDE_SSC_MARKS
7517 SSC_MARK_INVOKING();
7522 void **exit_frame_p;
7523 ompt_data_t *my_task_data;
7524 ompt_data_t *my_parallel_data;
7527 if (ompt_enabled.enabled) {
7528 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7529 .ompt_task_info.frame.exit_frame.ptr);
7531 exit_frame_p = &dummy;
7535 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7536 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7537 if (ompt_enabled.ompt_callback_implicit_task) {
7538 ompt_team_size = team->t.t_nproc;
7539 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7540 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7541 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7542 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7546 #if KMP_STATS_ENABLED
7548 if (previous_state == stats_state_e::TEAMS_REGION) {
7549 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7551 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7553 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7556 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7557 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7564 *exit_frame_p = NULL;
7565 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7568 #if KMP_STATS_ENABLED
7569 if (previous_state == stats_state_e::TEAMS_REGION) {
7570 KMP_SET_THREAD_STATE(previous_state);
7572 KMP_POP_PARTITIONED_TIMER();
7576 if (__itt_stack_caller_create_ptr) {
7578 if (team->t.t_stack_id != NULL) {
7579 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7581 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7582 __kmp_itt_stack_callee_leave(
7583 (__itt_caller)team->t.t_parent->t.t_stack_id);
7587 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7592 void __kmp_teams_master(
int gtid) {
7594 kmp_info_t *thr = __kmp_threads[gtid];
7595 kmp_team_t *team = thr->th.th_team;
7596 ident_t *loc = team->t.t_ident;
7597 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7598 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7599 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7600 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7601 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7604 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7607 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7608 tmp->cg_nthreads = 1;
7609 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7610 " cg_nthreads to 1\n",
7612 tmp->up = thr->th.th_cg_roots;
7613 thr->th.th_cg_roots = tmp;
7617 #if INCLUDE_SSC_MARKS
7620 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7621 (microtask_t)thr->th.th_teams_microtask,
7622 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7623 #if INCLUDE_SSC_MARKS
7627 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7628 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7631 __kmp_join_call(loc, gtid
7640 int __kmp_invoke_teams_master(
int gtid) {
7641 kmp_info_t *this_thr = __kmp_threads[gtid];
7642 kmp_team_t *team = this_thr->th.th_team;
7644 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7645 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7646 (
void *)__kmp_teams_master);
7648 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7650 int tid = __kmp_tid_from_gtid(gtid);
7651 ompt_data_t *task_data =
7652 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7653 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7654 if (ompt_enabled.ompt_callback_implicit_task) {
7655 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7656 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7658 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7661 __kmp_teams_master(gtid);
7663 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7665 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7674 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7675 kmp_info_t *thr = __kmp_threads[gtid];
7677 if (num_threads > 0)
7678 thr->th.th_set_nproc = num_threads;
7681 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7683 KMP_DEBUG_ASSERT(thr);
7685 if (!TCR_4(__kmp_init_middle))
7686 __kmp_middle_initialize();
7687 __kmp_assign_root_init_mask();
7688 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7689 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7691 if (num_threads == 0) {
7692 if (__kmp_teams_thread_limit > 0) {
7693 num_threads = __kmp_teams_thread_limit;
7695 num_threads = __kmp_avail_proc / num_teams;
7700 if (num_threads > __kmp_dflt_team_nth) {
7701 num_threads = __kmp_dflt_team_nth;
7703 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7704 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7706 if (num_teams * num_threads > __kmp_teams_max_nth) {
7707 num_threads = __kmp_teams_max_nth / num_teams;
7709 if (num_threads == 0) {
7713 if (num_threads < 0) {
7714 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7720 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7722 if (num_threads > __kmp_dflt_team_nth) {
7723 num_threads = __kmp_dflt_team_nth;
7725 if (num_teams * num_threads > __kmp_teams_max_nth) {
7726 int new_threads = __kmp_teams_max_nth / num_teams;
7727 if (new_threads == 0) {
7730 if (new_threads != num_threads) {
7731 if (!__kmp_reserve_warn) {
7732 __kmp_reserve_warn = 1;
7733 __kmp_msg(kmp_ms_warning,
7734 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7735 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7738 num_threads = new_threads;
7741 thr->th.th_teams_size.nth = num_threads;
7746 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7748 kmp_info_t *thr = __kmp_threads[gtid];
7749 if (num_teams < 0) {
7752 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7756 if (num_teams == 0) {
7757 if (__kmp_nteams > 0) {
7758 num_teams = __kmp_nteams;
7763 if (num_teams > __kmp_teams_max_nth) {
7764 if (!__kmp_reserve_warn) {
7765 __kmp_reserve_warn = 1;
7766 __kmp_msg(kmp_ms_warning,
7767 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7768 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7770 num_teams = __kmp_teams_max_nth;
7774 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7776 __kmp_push_thread_limit(thr, num_teams, num_threads);
7781 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7782 int num_teams_ub,
int num_threads) {
7783 kmp_info_t *thr = __kmp_threads[gtid];
7784 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7785 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7786 KMP_DEBUG_ASSERT(num_threads >= 0);
7788 if (num_teams_lb > num_teams_ub) {
7789 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7790 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7795 if (num_teams_lb == 0 && num_teams_ub > 0)
7796 num_teams_lb = num_teams_ub;
7798 if (num_teams_lb == 0 && num_teams_ub == 0) {
7799 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7800 if (num_teams > __kmp_teams_max_nth) {
7801 if (!__kmp_reserve_warn) {
7802 __kmp_reserve_warn = 1;
7803 __kmp_msg(kmp_ms_warning,
7804 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7805 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7807 num_teams = __kmp_teams_max_nth;
7809 }
else if (num_teams_lb == num_teams_ub) {
7810 num_teams = num_teams_ub;
7812 if (num_threads <= 0) {
7813 if (num_teams_ub > __kmp_teams_max_nth) {
7814 num_teams = num_teams_lb;
7816 num_teams = num_teams_ub;
7819 num_teams = (num_threads > __kmp_teams_max_nth)
7821 : __kmp_teams_max_nth / num_threads;
7822 if (num_teams < num_teams_lb) {
7823 num_teams = num_teams_lb;
7824 }
else if (num_teams > num_teams_ub) {
7825 num_teams = num_teams_ub;
7831 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7833 __kmp_push_thread_limit(thr, num_teams, num_threads);
7837 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7838 kmp_info_t *thr = __kmp_threads[gtid];
7839 thr->th.th_set_proc_bind = proc_bind;
7844 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7845 kmp_info_t *this_thr = __kmp_threads[gtid];
7851 KMP_DEBUG_ASSERT(team);
7852 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7853 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7856 team->t.t_construct = 0;
7857 team->t.t_ordered.dt.t_value =
7861 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7862 if (team->t.t_max_nproc > 1) {
7864 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7865 team->t.t_disp_buffer[i].buffer_index = i;
7866 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7869 team->t.t_disp_buffer[0].buffer_index = 0;
7870 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7874 KMP_ASSERT(this_thr->th.th_team == team);
7877 for (f = 0; f < team->t.t_nproc; f++) {
7878 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7879 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7884 __kmp_fork_barrier(gtid, 0);
7887 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7888 kmp_info_t *this_thr = __kmp_threads[gtid];
7890 KMP_DEBUG_ASSERT(team);
7891 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7892 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7898 if (__kmp_threads[gtid] &&
7899 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7900 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7901 __kmp_threads[gtid]);
7902 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7903 "team->t.t_nproc=%d\n",
7904 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7906 __kmp_print_structure();
7908 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7909 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7912 __kmp_join_barrier(gtid);
7914 if (ompt_enabled.enabled &&
7915 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7916 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7917 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7918 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7920 void *codeptr = NULL;
7921 if (KMP_MASTER_TID(ds_tid) &&
7922 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7923 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7924 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7926 if (ompt_enabled.ompt_callback_sync_region_wait) {
7927 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7928 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7931 if (ompt_enabled.ompt_callback_sync_region) {
7932 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7933 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7937 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7938 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7939 ompt_scope_end, NULL, task_data, 0, ds_tid,
7940 ompt_task_implicit);
7946 KMP_ASSERT(this_thr->th.th_team == team);
7951 #ifdef USE_LOAD_BALANCE
7955 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7958 kmp_team_t *hot_team;
7960 if (root->r.r_active) {
7963 hot_team = root->r.r_hot_team;
7964 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7965 return hot_team->t.t_nproc - 1;
7970 for (i = 1; i < hot_team->t.t_nproc; i++) {
7971 if (hot_team->t.t_threads[i]->th.th_active) {
7980 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7983 int hot_team_active;
7984 int team_curr_active;
7987 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7989 KMP_DEBUG_ASSERT(root);
7990 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7991 ->th.th_current_task->td_icvs.dynamic == TRUE);
7992 KMP_DEBUG_ASSERT(set_nproc > 1);
7994 if (set_nproc == 1) {
7995 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8004 pool_active = __kmp_thread_pool_active_nth;
8005 hot_team_active = __kmp_active_hot_team_nproc(root);
8006 team_curr_active = pool_active + hot_team_active + 1;
8009 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8010 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8011 "hot team active = %d\n",
8012 system_active, pool_active, hot_team_active));
8014 if (system_active < 0) {
8018 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8019 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8022 retval = __kmp_avail_proc - __kmp_nth +
8023 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8024 if (retval > set_nproc) {
8027 if (retval < KMP_MIN_NTH) {
8028 retval = KMP_MIN_NTH;
8031 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8039 if (system_active < team_curr_active) {
8040 system_active = team_curr_active;
8042 retval = __kmp_avail_proc - system_active + team_curr_active;
8043 if (retval > set_nproc) {
8046 if (retval < KMP_MIN_NTH) {
8047 retval = KMP_MIN_NTH;
8050 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8059 void __kmp_cleanup(
void) {
8062 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8064 if (TCR_4(__kmp_init_parallel)) {
8065 #if KMP_HANDLE_SIGNALS
8066 __kmp_remove_signals();
8068 TCW_4(__kmp_init_parallel, FALSE);
8071 if (TCR_4(__kmp_init_middle)) {
8072 #if KMP_AFFINITY_SUPPORTED
8073 __kmp_affinity_uninitialize();
8075 __kmp_cleanup_hierarchy();
8076 TCW_4(__kmp_init_middle, FALSE);
8079 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8081 if (__kmp_init_serial) {
8082 __kmp_runtime_destroy();
8083 __kmp_init_serial = FALSE;
8086 __kmp_cleanup_threadprivate_caches();
8088 for (f = 0; f < __kmp_threads_capacity; f++) {
8089 if (__kmp_root[f] != NULL) {
8090 __kmp_free(__kmp_root[f]);
8091 __kmp_root[f] = NULL;
8094 __kmp_free(__kmp_threads);
8097 __kmp_threads = NULL;
8099 __kmp_threads_capacity = 0;
8101 #if KMP_USE_DYNAMIC_LOCK
8102 __kmp_cleanup_indirect_user_locks();
8104 __kmp_cleanup_user_locks();
8108 __kmp_free(ompd_env_block);
8109 ompd_env_block = NULL;
8110 ompd_env_block_size = 0;
8114 #if KMP_AFFINITY_SUPPORTED
8115 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8116 __kmp_cpuinfo_file = NULL;
8119 #if KMP_USE_ADAPTIVE_LOCKS
8120 #if KMP_DEBUG_ADAPTIVE_LOCKS
8121 __kmp_print_speculative_stats();
8124 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8125 __kmp_nested_nth.nth = NULL;
8126 __kmp_nested_nth.size = 0;
8127 __kmp_nested_nth.used = 0;
8128 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8129 __kmp_nested_proc_bind.bind_types = NULL;
8130 __kmp_nested_proc_bind.size = 0;
8131 __kmp_nested_proc_bind.used = 0;
8132 if (__kmp_affinity_format) {
8133 KMP_INTERNAL_FREE(__kmp_affinity_format);
8134 __kmp_affinity_format = NULL;
8137 __kmp_i18n_catclose();
8139 #if KMP_USE_HIER_SCHED
8140 __kmp_hier_scheds.deallocate();
8143 #if KMP_STATS_ENABLED
8147 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8152 int __kmp_ignore_mppbeg(
void) {
8155 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8156 if (__kmp_str_match_false(env))
8163 int __kmp_ignore_mppend(
void) {
8166 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8167 if (__kmp_str_match_false(env))
8174 void __kmp_internal_begin(
void) {
8180 gtid = __kmp_entry_gtid();
8181 root = __kmp_threads[gtid]->th.th_root;
8182 KMP_ASSERT(KMP_UBER_GTID(gtid));
8184 if (root->r.r_begin)
8186 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8187 if (root->r.r_begin) {
8188 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8192 root->r.r_begin = TRUE;
8194 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8199 void __kmp_user_set_library(
enum library_type arg) {
8206 gtid = __kmp_entry_gtid();
8207 thread = __kmp_threads[gtid];
8209 root = thread->th.th_root;
8211 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8213 if (root->r.r_in_parallel) {
8215 KMP_WARNING(SetLibraryIncorrectCall);
8220 case library_serial:
8221 thread->th.th_set_nproc = 0;
8222 set__nproc(thread, 1);
8224 case library_turnaround:
8225 thread->th.th_set_nproc = 0;
8226 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8227 : __kmp_dflt_team_nth_ub);
8229 case library_throughput:
8230 thread->th.th_set_nproc = 0;
8231 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8232 : __kmp_dflt_team_nth_ub);
8235 KMP_FATAL(UnknownLibraryType, arg);
8238 __kmp_aux_set_library(arg);
8241 void __kmp_aux_set_stacksize(
size_t arg) {
8242 if (!__kmp_init_serial)
8243 __kmp_serial_initialize();
8246 if (arg & (0x1000 - 1)) {
8247 arg &= ~(0x1000 - 1);
8252 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8255 if (!TCR_4(__kmp_init_parallel)) {
8258 if (value < __kmp_sys_min_stksize)
8259 value = __kmp_sys_min_stksize;
8260 else if (value > KMP_MAX_STKSIZE)
8261 value = KMP_MAX_STKSIZE;
8263 __kmp_stksize = value;
8265 __kmp_env_stksize = TRUE;
8268 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8273 void __kmp_aux_set_library(
enum library_type arg) {
8274 __kmp_library = arg;
8276 switch (__kmp_library) {
8277 case library_serial: {
8278 KMP_INFORM(LibraryIsSerial);
8280 case library_turnaround:
8281 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8282 __kmp_use_yield = 2;
8284 case library_throughput:
8285 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8286 __kmp_dflt_blocktime = 200;
8289 KMP_FATAL(UnknownLibraryType, arg);
8295 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8296 kmp_info_t *thr = __kmp_entry_thread();
8297 teams_serialized = 0;
8298 if (thr->th.th_teams_microtask) {
8299 kmp_team_t *team = thr->th.th_team;
8300 int tlevel = thr->th.th_teams_level;
8301 int ii = team->t.t_level;
8302 teams_serialized = team->t.t_serialized;
8303 int level = tlevel + 1;
8304 KMP_DEBUG_ASSERT(ii >= tlevel);
8305 while (ii > level) {
8306 for (teams_serialized = team->t.t_serialized;
8307 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8309 if (team->t.t_serialized && (!teams_serialized)) {
8310 team = team->t.t_parent;
8314 team = team->t.t_parent;
8323 int __kmp_aux_get_team_num() {
8325 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8327 if (serialized > 1) {
8330 return team->t.t_master_tid;
8336 int __kmp_aux_get_num_teams() {
8338 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8340 if (serialized > 1) {
8343 return team->t.t_parent->t.t_nproc;
8382 typedef struct kmp_affinity_format_field_t {
8384 const char *long_name;
8387 } kmp_affinity_format_field_t;
8389 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8390 #if KMP_AFFINITY_SUPPORTED
8391 {
'A',
"thread_affinity",
's'},
8393 {
't',
"team_num",
'd'},
8394 {
'T',
"num_teams",
'd'},
8395 {
'L',
"nesting_level",
'd'},
8396 {
'n',
"thread_num",
'd'},
8397 {
'N',
"num_threads",
'd'},
8398 {
'a',
"ancestor_tnum",
'd'},
8400 {
'P',
"process_id",
'd'},
8401 {
'i',
"native_thread_id",
'd'}};
8404 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8406 kmp_str_buf_t *field_buffer) {
8407 int rc, format_index, field_value;
8408 const char *width_left, *width_right;
8409 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8410 static const int FORMAT_SIZE = 20;
8411 char format[FORMAT_SIZE] = {0};
8412 char absolute_short_name = 0;
8414 KMP_DEBUG_ASSERT(gtid >= 0);
8415 KMP_DEBUG_ASSERT(th);
8416 KMP_DEBUG_ASSERT(**ptr ==
'%');
8417 KMP_DEBUG_ASSERT(field_buffer);
8419 __kmp_str_buf_clear(field_buffer);
8426 __kmp_str_buf_cat(field_buffer,
"%", 1);
8437 right_justify =
false;
8439 right_justify =
true;
8443 width_left = width_right = NULL;
8444 if (**ptr >=
'0' && **ptr <=
'9') {
8452 format[format_index++] =
'%';
8454 format[format_index++] =
'-';
8456 format[format_index++] =
'0';
8457 if (width_left && width_right) {
8461 while (i < 8 && width_left < width_right) {
8462 format[format_index++] = *width_left;
8470 found_valid_name =
false;
8471 parse_long_name = (**ptr ==
'{');
8472 if (parse_long_name)
8474 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8475 sizeof(__kmp_affinity_format_table[0]);
8477 char short_name = __kmp_affinity_format_table[i].short_name;
8478 const char *long_name = __kmp_affinity_format_table[i].long_name;
8479 char field_format = __kmp_affinity_format_table[i].field_format;
8480 if (parse_long_name) {
8481 size_t length = KMP_STRLEN(long_name);
8482 if (strncmp(*ptr, long_name, length) == 0) {
8483 found_valid_name =
true;
8486 }
else if (**ptr == short_name) {
8487 found_valid_name =
true;
8490 if (found_valid_name) {
8491 format[format_index++] = field_format;
8492 format[format_index++] =
'\0';
8493 absolute_short_name = short_name;
8497 if (parse_long_name) {
8499 absolute_short_name = 0;
8507 switch (absolute_short_name) {
8509 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8512 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8515 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8518 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8521 static const int BUFFER_SIZE = 256;
8522 char buf[BUFFER_SIZE];
8523 __kmp_expand_host_name(buf, BUFFER_SIZE);
8524 rc = __kmp_str_buf_print(field_buffer, format, buf);
8527 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8530 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8533 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8537 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8538 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8540 #if KMP_AFFINITY_SUPPORTED
8543 __kmp_str_buf_init(&buf);
8544 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8545 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8546 __kmp_str_buf_free(&buf);
8552 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8554 if (parse_long_name) {
8563 KMP_ASSERT(format_index <= FORMAT_SIZE);
8573 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8574 kmp_str_buf_t *buffer) {
8575 const char *parse_ptr;
8577 const kmp_info_t *th;
8578 kmp_str_buf_t field;
8580 KMP_DEBUG_ASSERT(buffer);
8581 KMP_DEBUG_ASSERT(gtid >= 0);
8583 __kmp_str_buf_init(&field);
8584 __kmp_str_buf_clear(buffer);
8586 th = __kmp_threads[gtid];
8592 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8593 parse_ptr = __kmp_affinity_format;
8595 KMP_DEBUG_ASSERT(parse_ptr);
8597 while (*parse_ptr !=
'\0') {
8599 if (*parse_ptr ==
'%') {
8601 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8602 __kmp_str_buf_catbuf(buffer, &field);
8606 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8611 __kmp_str_buf_free(&field);
8616 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8618 __kmp_str_buf_init(&buf);
8619 __kmp_aux_capture_affinity(gtid, format, &buf);
8620 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8621 __kmp_str_buf_free(&buf);
8626 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8627 int blocktime = arg;
8633 __kmp_save_internal_controls(thread);
8636 if (blocktime < KMP_MIN_BLOCKTIME)
8637 blocktime = KMP_MIN_BLOCKTIME;
8638 else if (blocktime > KMP_MAX_BLOCKTIME)
8639 blocktime = KMP_MAX_BLOCKTIME;
8641 set__blocktime_team(thread->th.th_team, tid, blocktime);
8642 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8646 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8648 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8649 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8655 set__bt_set_team(thread->th.th_team, tid, bt_set);
8656 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8658 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8659 "bt_intervals=%d, monitor_updates=%d\n",
8660 __kmp_gtid_from_tid(tid, thread->th.th_team),
8661 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8662 __kmp_monitor_wakeups));
8664 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8665 __kmp_gtid_from_tid(tid, thread->th.th_team),
8666 thread->th.th_team->t.t_id, tid, blocktime));
8670 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8671 if (!__kmp_init_serial) {
8672 __kmp_serial_initialize();
8674 __kmp_env_initialize(str);
8676 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8684 PACKED_REDUCTION_METHOD_T
8685 __kmp_determine_reduction_method(
8686 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8687 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8688 kmp_critical_name *lck) {
8699 PACKED_REDUCTION_METHOD_T retval;
8703 KMP_DEBUG_ASSERT(loc);
8704 KMP_DEBUG_ASSERT(lck);
8706 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8707 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8708 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8710 retval = critical_reduce_block;
8713 team_size = __kmp_get_team_num_threads(global_tid);
8714 if (team_size == 1) {
8716 retval = empty_reduce_block;
8720 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8722 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8723 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8725 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8726 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8728 int teamsize_cutoff = 4;
8730 #if KMP_MIC_SUPPORTED
8731 if (__kmp_mic_type != non_mic) {
8732 teamsize_cutoff = 8;
8735 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8736 if (tree_available) {
8737 if (team_size <= teamsize_cutoff) {
8738 if (atomic_available) {
8739 retval = atomic_reduce_block;
8742 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8744 }
else if (atomic_available) {
8745 retval = atomic_reduce_block;
8748 #error "Unknown or unsupported OS"
8749 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8752 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8754 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8758 if (atomic_available) {
8759 if (num_vars <= 2) {
8760 retval = atomic_reduce_block;
8766 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8767 if (atomic_available && (num_vars <= 3)) {
8768 retval = atomic_reduce_block;
8769 }
else if (tree_available) {
8770 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8771 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8772 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8777 #error "Unknown or unsupported OS"
8781 #error "Unknown or unsupported architecture"
8789 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8792 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8794 int atomic_available, tree_available;
8796 switch ((forced_retval = __kmp_force_reduction_method)) {
8797 case critical_reduce_block:
8801 case atomic_reduce_block:
8802 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8803 if (!atomic_available) {
8804 KMP_WARNING(RedMethodNotSupported,
"atomic");
8805 forced_retval = critical_reduce_block;
8809 case tree_reduce_block:
8810 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8811 if (!tree_available) {
8812 KMP_WARNING(RedMethodNotSupported,
"tree");
8813 forced_retval = critical_reduce_block;
8815 #if KMP_FAST_REDUCTION_BARRIER
8816 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8825 retval = forced_retval;
8828 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8830 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8831 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8836 kmp_int32 __kmp_get_reduce_method(
void) {
8837 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8842 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8846 void __kmp_hard_pause() {
8847 __kmp_pause_status = kmp_hard_paused;
8848 __kmp_internal_end_thread(-1);
8852 void __kmp_resume_if_soft_paused() {
8853 if (__kmp_pause_status == kmp_soft_paused) {
8854 __kmp_pause_status = kmp_not_paused;
8856 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8857 kmp_info_t *thread = __kmp_threads[gtid];
8859 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8861 if (fl.is_sleeping())
8863 else if (__kmp_try_suspend_mx(thread)) {
8864 __kmp_unlock_suspend_mx(thread);
8867 if (fl.is_sleeping()) {
8870 }
else if (__kmp_try_suspend_mx(thread)) {
8871 __kmp_unlock_suspend_mx(thread);
8883 int __kmp_pause_resource(kmp_pause_status_t level) {
8884 if (level == kmp_not_paused) {
8885 if (__kmp_pause_status == kmp_not_paused) {
8889 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8890 __kmp_pause_status == kmp_hard_paused);
8891 __kmp_pause_status = kmp_not_paused;
8894 }
else if (level == kmp_soft_paused) {
8895 if (__kmp_pause_status != kmp_not_paused) {
8902 }
else if (level == kmp_hard_paused) {
8903 if (__kmp_pause_status != kmp_not_paused) {
8916 void __kmp_omp_display_env(
int verbose) {
8917 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8918 if (__kmp_init_serial == 0)
8919 __kmp_do_serial_initialize();
8920 __kmp_display_env_impl(!verbose, verbose);
8921 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8925 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8927 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8929 kmp_info_t **other_threads = team->t.t_threads;
8933 for (
int f = 1; f < old_nthreads; ++f) {
8934 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8936 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8942 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8943 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8947 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8949 team->t.t_threads[f]->th.th_used_in_team.store(2);
8950 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8953 kmp_uint64 new_value;
8954 new_value = team->t.b->go_release();
8961 int count = old_nthreads - 1;
8963 count = old_nthreads - 1;
8964 for (
int f = 1; f < old_nthreads; ++f) {
8965 my_go_index = f / team->t.b->threads_per_go;
8966 if (other_threads[f]->th.th_used_in_team.load() != 0) {
8967 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8968 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
8969 void *, other_threads[f]->th.th_sleep_loc);
8970 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
8973 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
8979 team->t.b->update_num_threads(new_nthreads);
8980 team->t.b->go_reset();
8983 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
8985 KMP_DEBUG_ASSERT(team);
8991 for (
int f = 1; f < new_nthreads; ++f) {
8992 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
8993 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
8995 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8996 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
8997 (kmp_flag_32<false, false> *)NULL);
9003 int count = new_nthreads - 1;
9005 count = new_nthreads - 1;
9006 for (
int f = 1; f < new_nthreads; ++f) {
9007 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9015 kmp_info_t **__kmp_hidden_helper_threads;
9016 kmp_info_t *__kmp_hidden_helper_main_thread;
9017 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9019 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9020 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9022 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9023 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9027 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9029 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9034 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9035 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9036 __kmp_hidden_helper_threads_num)
9042 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9043 __kmp_hidden_helper_initz_release();
9044 __kmp_hidden_helper_main_thread_wait();
9046 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9047 __kmp_hidden_helper_worker_thread_signal();
9053 void __kmp_hidden_helper_threads_initz_routine() {
9055 const int gtid = __kmp_register_root(TRUE);
9056 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9057 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9058 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9059 __kmp_hidden_helper_threads_num;
9061 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9066 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9068 __kmp_hidden_helper_threads_deinitz_release();
9088 void __kmp_init_nesting_mode() {
9089 int levels = KMP_HW_LAST;
9090 __kmp_nesting_mode_nlevels = levels;
9091 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9092 for (
int i = 0; i < levels; ++i)
9093 __kmp_nesting_nth_level[i] = 0;
9094 if (__kmp_nested_nth.size < levels) {
9095 __kmp_nested_nth.nth =
9096 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9097 __kmp_nested_nth.size = levels;
9102 void __kmp_set_nesting_mode_threads() {
9103 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9105 if (__kmp_nesting_mode == 1)
9106 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9107 else if (__kmp_nesting_mode > 1)
9108 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9110 if (__kmp_topology) {
9112 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9113 loc < __kmp_nesting_mode_nlevels;
9114 loc++, hw_level++) {
9115 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9116 if (__kmp_nesting_nth_level[loc] == 1)
9120 if (__kmp_nesting_mode > 1 && loc > 1) {
9121 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9122 int num_cores = __kmp_topology->get_count(core_level);
9123 int upper_levels = 1;
9124 for (
int level = 0; level < loc - 1; ++level)
9125 upper_levels *= __kmp_nesting_nth_level[level];
9126 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9127 __kmp_nesting_nth_level[loc - 1] =
9128 num_cores / __kmp_nesting_nth_level[loc - 2];
9130 __kmp_nesting_mode_nlevels = loc;
9131 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9133 if (__kmp_avail_proc >= 4) {
9134 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9135 __kmp_nesting_nth_level[1] = 2;
9136 __kmp_nesting_mode_nlevels = 2;
9138 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9139 __kmp_nesting_mode_nlevels = 1;
9141 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9143 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9144 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9146 set__nproc(thread, __kmp_nesting_nth_level[0]);
9147 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9148 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9149 if (get__max_active_levels(thread) > 1) {
9151 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9153 if (__kmp_nesting_mode == 1)
9154 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);