14 #include "kmp_affinity.h" 15 #include "kmp_atomic.h" 16 #include "kmp_environment.h" 17 #include "kmp_error.h" 21 #include "kmp_settings.h" 22 #include "kmp_stats.h" 24 #include "kmp_wait_release.h" 25 #include "kmp_wrapper_getpid.h" 26 #include "kmp_dispatch.h" 27 #if KMP_USE_HIER_SCHED 28 #include "kmp_dispatch_hier.h" 32 #include "ompt-specific.h" 36 #define KMP_USE_PRCTL 0 42 #include "tsan_annotations.h" 53 #if defined(KMP_GOMP_COMPAT) 54 char const __kmp_version_alt_comp[] =
55 KMP_VERSION_PREFIX
"alternative compiler support: yes";
58 char const __kmp_version_omp_api[] =
59 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
62 char const __kmp_version_lock[] =
63 KMP_VERSION_PREFIX
"lock type: run time selectable";
66 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 71 kmp_info_t __kmp_monitor;
76 void __kmp_cleanup(
void);
78 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
80 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
81 kmp_internal_control_t *new_icvs,
83 #if KMP_AFFINITY_SUPPORTED 84 static void __kmp_partition_places(kmp_team_t *team,
85 int update_master_only = 0);
87 static void __kmp_do_serial_initialize(
void);
88 void __kmp_fork_barrier(
int gtid,
int tid);
89 void __kmp_join_barrier(
int gtid);
90 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
91 kmp_internal_control_t *new_icvs,
ident_t *loc);
93 #ifdef USE_LOAD_BALANCE 94 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
97 static int __kmp_expand_threads(
int nNeed);
99 static int __kmp_unregister_root_other_thread(
int gtid);
101 static void __kmp_unregister_library(
void);
102 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
103 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
108 int __kmp_get_global_thread_id() {
110 kmp_info_t **other_threads;
118 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
119 __kmp_nth, __kmp_all_nth));
126 if (!TCR_4(__kmp_init_gtid))
129 #ifdef KMP_TDATA_GTID 130 if (TCR_4(__kmp_gtid_mode) >= 3) {
131 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
135 if (TCR_4(__kmp_gtid_mode) >= 2) {
136 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
137 return __kmp_gtid_get_specific();
139 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
141 stack_addr = (
char *)&stack_data;
142 other_threads = __kmp_threads;
155 for (i = 0; i < __kmp_threads_capacity; i++) {
157 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
161 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
162 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
166 if (stack_addr <= stack_base) {
167 size_t stack_diff = stack_base - stack_addr;
169 if (stack_diff <= stack_size) {
172 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
180 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 181 "thread, using TLS\n"));
182 i = __kmp_gtid_get_specific();
192 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
193 KMP_FATAL(StackOverflow, i);
196 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
197 if (stack_addr > stack_base) {
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 stack_base - stack_addr);
208 if (__kmp_storage_map) {
209 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
210 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
211 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
212 other_threads[i]->th.th_info.ds.ds_stacksize,
213 "th_%d stack (refinement)", i);
218 int __kmp_get_global_thread_id_reg() {
221 if (!__kmp_init_serial) {
224 #ifdef KMP_TDATA_GTID 225 if (TCR_4(__kmp_gtid_mode) >= 3) {
226 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
230 if (TCR_4(__kmp_gtid_mode) >= 2) {
231 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
232 gtid = __kmp_gtid_get_specific();
235 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
236 gtid = __kmp_get_global_thread_id();
240 if (gtid == KMP_GTID_DNE) {
242 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 243 "Registering a new gtid.\n"));
244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
245 if (!__kmp_init_serial) {
246 __kmp_do_serial_initialize();
247 gtid = __kmp_gtid_get_specific();
249 gtid = __kmp_register_root(FALSE);
251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
255 KMP_DEBUG_ASSERT(gtid >= 0);
261 void __kmp_check_stack_overlap(kmp_info_t *th) {
263 char *stack_beg = NULL;
264 char *stack_end = NULL;
267 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
268 if (__kmp_storage_map) {
269 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
270 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
272 gtid = __kmp_gtid_from_thread(th);
274 if (gtid == KMP_GTID_MONITOR) {
275 __kmp_print_storage_map_gtid(
276 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
277 "th_%s stack (%s)",
"mon",
278 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%d stack (%s)", gtid,
283 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 gtid = __kmp_gtid_from_thread(th);
290 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
292 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
293 if (stack_beg == NULL) {
294 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 for (f = 0; f < __kmp_threads_capacity; f++) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
301 if (f_th && f_th != th) {
302 char *other_stack_end =
303 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg =
305 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
310 if (__kmp_storage_map)
311 __kmp_print_storage_map_gtid(
312 -1, other_stack_beg, other_stack_end,
313 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
314 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
316 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
322 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
327 void __kmp_infinite_loop(
void) {
328 static int done = FALSE;
335 #define MAX_MESSAGE 512 337 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
338 char const *format, ...) {
339 char buffer[MAX_MESSAGE];
342 va_start(ap, format);
343 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
344 p2, (
unsigned long)size, format);
345 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
346 __kmp_vprintf(kmp_err, buffer, ap);
347 #if KMP_PRINT_DATA_PLACEMENT 350 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
351 if (__kmp_storage_map_verbose) {
352 node = __kmp_get_host_node(p1);
354 __kmp_storage_map_verbose = FALSE;
358 int localProc = __kmp_get_cpu_from_gtid(gtid);
360 const int page_size = KMP_GET_PAGE_SIZE();
362 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
363 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
365 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
368 __kmp_printf_no_lock(
" GTID %d\n", gtid);
377 (
char *)p1 += page_size;
378 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
379 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
384 (
char *)p1 + (page_size - 1),
385 __kmp_get_host_node(p1));
387 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
388 (
char *)p2 + (page_size - 1),
389 __kmp_get_host_node(p2));
395 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
398 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
401 void __kmp_warn(
char const *format, ...) {
402 char buffer[MAX_MESSAGE];
405 if (__kmp_generate_warnings == kmp_warnings_off) {
409 va_start(ap, format);
411 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
412 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
413 __kmp_vprintf(kmp_err, buffer, ap);
414 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
419 void __kmp_abort_process() {
421 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
423 if (__kmp_debug_buf) {
424 __kmp_dump_debug_buffer();
427 if (KMP_OS_WINDOWS) {
430 __kmp_global.g.g_abort = SIGABRT;
447 __kmp_infinite_loop();
448 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
452 void __kmp_abort_thread(
void) {
455 __kmp_infinite_loop();
461 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
462 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
466 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
468 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
469 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
471 __kmp_print_storage_map_gtid(
472 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier + 1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
481 &thr->th.th_bar[bs_forkjoin_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
485 #if KMP_FAST_REDUCTION_BARRIER 486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
490 #endif // KMP_FAST_REDUCTION_BARRIER 496 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
497 int team_id,
int num_thr) {
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
502 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
503 &team->t.t_bar[bs_last_barrier],
504 sizeof(kmp_balign_team_t) * bs_last_barrier,
505 "%s_%d.t_bar", header, team_id);
507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
508 &team->t.t_bar[bs_plain_barrier + 1],
509 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
513 &team->t.t_bar[bs_forkjoin_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[forkjoin]", header, team_id);
517 #if KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
519 &team->t.t_bar[bs_reduction_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[reduction]", header, team_id);
522 #endif // KMP_FAST_REDUCTION_BARRIER 524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
528 __kmp_print_storage_map_gtid(
529 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
533 &team->t.t_disp_buffer[num_disp_buff],
534 sizeof(dispatch_shared_info_t) * num_disp_buff,
535 "%s_%d.t_disp_buffer", header, team_id);
538 static void __kmp_init_allocator() { __kmp_init_memkind(); }
539 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
546 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
548 __kmp_init_bootstrap_lock(lck);
551 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
569 for (i = 0; i < __kmp_threads_capacity; ++i) {
572 kmp_info_t *th = __kmp_threads[i];
575 int gtid = th->th.th_info.ds.ds_gtid;
576 if (gtid == gtid_req)
581 int alive = __kmp_is_thread_alive(th, &exit_val);
586 if (thread_count == 0)
592 __kmp_reset_lock(&__kmp_forkjoin_lock);
594 __kmp_reset_lock(&__kmp_stdio_lock);
598 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
603 case DLL_PROCESS_ATTACH:
604 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
608 case DLL_PROCESS_DETACH:
609 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
611 if (lpReserved != NULL) {
637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
640 __kmp_internal_end_library(__kmp_gtid_get_specific());
644 case DLL_THREAD_ATTACH:
645 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
651 case DLL_THREAD_DETACH:
652 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
654 __kmp_internal_end_thread(__kmp_gtid_get_specific());
665 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
666 int gtid = *gtid_ref;
667 #ifdef BUILD_PARALLEL_ORDERED 668 kmp_team_t *team = __kmp_team_from_gtid(gtid);
671 if (__kmp_env_consistency_check) {
672 if (__kmp_threads[gtid]->th.th_root->r.r_active)
673 #if KMP_USE_DYNAMIC_LOCK 674 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
676 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
679 #ifdef BUILD_PARALLEL_ORDERED 680 if (!team->t.t_serialized) {
682 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
690 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
691 int gtid = *gtid_ref;
692 #ifdef BUILD_PARALLEL_ORDERED 693 int tid = __kmp_tid_from_gtid(gtid);
694 kmp_team_t *team = __kmp_team_from_gtid(gtid);
697 if (__kmp_env_consistency_check) {
698 if (__kmp_threads[gtid]->th.th_root->r.r_active)
699 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
701 #ifdef BUILD_PARALLEL_ORDERED 702 if (!team->t.t_serialized) {
707 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
717 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
722 if (!TCR_4(__kmp_init_parallel))
723 __kmp_parallel_initialize();
724 __kmp_resume_if_soft_paused();
726 th = __kmp_threads[gtid];
727 team = th->th.th_team;
730 th->th.th_ident = id_ref;
732 if (team->t.t_serialized) {
735 kmp_int32 old_this = th->th.th_local.this_construct;
737 ++th->th.th_local.this_construct;
741 if (team->t.t_construct == old_this) {
742 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
743 th->th.th_local.this_construct);
746 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
747 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
748 team->t.t_active_level ==
750 __kmp_itt_metadata_single(id_ref);
755 if (__kmp_env_consistency_check) {
756 if (status && push_ws) {
757 __kmp_push_workshare(gtid, ct_psingle, id_ref);
759 __kmp_check_workshare(gtid, ct_psingle, id_ref);
764 __kmp_itt_single_start(gtid);
770 void __kmp_exit_single(
int gtid) {
772 __kmp_itt_single_end(gtid);
774 if (__kmp_env_consistency_check)
775 __kmp_pop_workshare(gtid, ct_psingle, NULL);
784 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
785 int master_tid,
int set_nthreads,
789 KMP_DEBUG_ASSERT(__kmp_init_serial);
790 KMP_DEBUG_ASSERT(root && parent_team);
791 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
795 new_nthreads = set_nthreads;
796 if (!get__dynamic_2(parent_team, master_tid)) {
799 #ifdef USE_LOAD_BALANCE 800 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
801 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
815 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
816 new_nthreads = __kmp_avail_proc - __kmp_nth +
817 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
818 if (new_nthreads <= 1) {
819 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 820 "reservation to 1 thread\n",
824 if (new_nthreads < set_nthreads) {
825 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 826 "reservation to %d threads\n",
827 master_tid, new_nthreads));
829 new_nthreads = set_nthreads;
831 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
832 if (set_nthreads > 2) {
833 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
834 new_nthreads = (new_nthreads % set_nthreads) + 1;
835 if (new_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 837 "reservation to 1 thread\n",
841 if (new_nthreads < set_nthreads) {
842 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 843 "reservation to %d threads\n",
844 master_tid, new_nthreads));
852 if (__kmp_nth + new_nthreads -
853 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
855 int tl_nthreads = __kmp_max_nth - __kmp_nth +
856 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
857 if (tl_nthreads <= 0) {
862 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
863 __kmp_reserve_warn = 1;
864 __kmp_msg(kmp_ms_warning,
865 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
866 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
868 if (tl_nthreads == 1) {
869 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 870 "reduced reservation to 1 thread\n",
874 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 875 "reservation to %d threads\n",
876 master_tid, tl_nthreads));
877 new_nthreads = tl_nthreads;
881 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
882 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
883 if (cg_nthreads + new_nthreads -
884 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
886 int tl_nthreads = max_cg_threads - cg_nthreads +
887 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
888 if (tl_nthreads <= 0) {
893 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
894 __kmp_reserve_warn = 1;
895 __kmp_msg(kmp_ms_warning,
896 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
897 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
899 if (tl_nthreads == 1) {
900 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 901 "reduced reservation to 1 thread\n",
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 906 "reservation to %d threads\n",
907 master_tid, tl_nthreads));
908 new_nthreads = tl_nthreads;
914 capacity = __kmp_threads_capacity;
915 if (TCR_PTR(__kmp_threads[0]) == NULL) {
918 if (__kmp_nth + new_nthreads -
919 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
922 int slotsRequired = __kmp_nth + new_nthreads -
923 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
925 int slotsAdded = __kmp_expand_threads(slotsRequired);
926 if (slotsAdded < slotsRequired) {
928 new_nthreads -= (slotsRequired - slotsAdded);
929 KMP_ASSERT(new_nthreads >= 1);
932 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
933 __kmp_reserve_warn = 1;
934 if (__kmp_tp_cached) {
935 __kmp_msg(kmp_ms_warning,
936 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
937 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
938 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
940 __kmp_msg(kmp_ms_warning,
941 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
942 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
949 if (new_nthreads == 1) {
951 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 952 "dead roots and rechecking; requested %d threads\n",
953 __kmp_get_gtid(), set_nthreads));
955 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 957 __kmp_get_gtid(), new_nthreads, set_nthreads));
966 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
967 kmp_info_t *master_th,
int master_gtid) {
971 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
972 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
976 master_th->th.th_info.ds.ds_tid = 0;
977 master_th->th.th_team = team;
978 master_th->th.th_team_nproc = team->t.t_nproc;
979 master_th->th.th_team_master = master_th;
980 master_th->th.th_team_serialized = FALSE;
981 master_th->th.th_dispatch = &team->t.t_dispatch[0];
984 #if KMP_NESTED_HOT_TEAMS 986 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
989 int level = team->t.t_active_level - 1;
990 if (master_th->th.th_teams_microtask) {
991 if (master_th->th.th_teams_size.nteams > 1) {
995 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
996 master_th->th.th_teams_level == team->t.t_level) {
1001 if (level < __kmp_hot_teams_max_level) {
1002 if (hot_teams[level].hot_team) {
1004 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1008 hot_teams[level].hot_team = team;
1009 hot_teams[level].hot_team_nth = team->t.t_nproc;
1016 use_hot_team = team == root->r.r_hot_team;
1018 if (!use_hot_team) {
1021 team->t.t_threads[0] = master_th;
1022 __kmp_initialize_info(master_th, team, 0, master_gtid);
1025 for (i = 1; i < team->t.t_nproc; i++) {
1028 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1029 team->t.t_threads[i] = thr;
1030 KMP_DEBUG_ASSERT(thr);
1031 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1033 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1034 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1035 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1036 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1037 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1038 team->t.t_bar[bs_plain_barrier].b_arrived));
1039 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1040 thr->th.th_teams_level = master_th->th.th_teams_level;
1041 thr->th.th_teams_size = master_th->th.th_teams_size;
1044 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1045 for (b = 0; b < bs_last_barrier; ++b) {
1046 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1047 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1049 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1055 #if KMP_AFFINITY_SUPPORTED 1056 __kmp_partition_places(team);
1060 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1061 for (i = 0; i < team->t.t_nproc; i++) {
1062 kmp_info_t *thr = team->t.t_threads[i];
1063 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1064 thr->th.th_prev_level != team->t.t_level) {
1065 team->t.t_display_affinity = 1;
1074 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1078 inline static void propagateFPControl(kmp_team_t *team) {
1079 if (__kmp_inherit_fp_control) {
1080 kmp_int16 x87_fpu_control_word;
1084 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1085 __kmp_store_mxcsr(&mxcsr);
1086 mxcsr &= KMP_X86_MXCSR_MASK;
1097 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1098 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1101 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1105 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1111 inline static void updateHWFPControl(kmp_team_t *team) {
1112 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1115 kmp_int16 x87_fpu_control_word;
1117 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1118 __kmp_store_mxcsr(&mxcsr);
1119 mxcsr &= KMP_X86_MXCSR_MASK;
1121 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1122 __kmp_clear_x87_fpu_status_word();
1123 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1126 if (team->t.t_mxcsr != mxcsr) {
1127 __kmp_load_mxcsr(&team->t.t_mxcsr);
1132 #define propagateFPControl(x) ((void)0) 1133 #define updateHWFPControl(x) ((void)0) 1136 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1141 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1142 kmp_info_t *this_thr;
1143 kmp_team_t *serial_team;
1145 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1152 if (!TCR_4(__kmp_init_parallel))
1153 __kmp_parallel_initialize();
1154 __kmp_resume_if_soft_paused();
1156 this_thr = __kmp_threads[global_tid];
1157 serial_team = this_thr->th.th_serial_team;
1160 KMP_DEBUG_ASSERT(serial_team);
1163 if (__kmp_tasking_mode != tskm_immediate_exec) {
1165 this_thr->th.th_task_team ==
1166 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1167 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1169 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1170 "team %p, new task_team = NULL\n",
1171 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1172 this_thr->th.th_task_team = NULL;
1175 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1176 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1177 proc_bind = proc_bind_false;
1178 }
else if (proc_bind == proc_bind_default) {
1181 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1184 this_thr->th.th_set_proc_bind = proc_bind_default;
1187 ompt_data_t ompt_parallel_data = ompt_data_none;
1188 ompt_data_t *implicit_task_data;
1189 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1190 if (ompt_enabled.enabled &&
1191 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1193 ompt_task_info_t *parent_task_info;
1194 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1196 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1197 if (ompt_enabled.ompt_callback_parallel_begin) {
1200 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1201 &(parent_task_info->task_data), &(parent_task_info->frame),
1202 &ompt_parallel_data, team_size,
1203 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1206 #endif // OMPT_SUPPORT 1208 if (this_thr->th.th_team != serial_team) {
1210 int level = this_thr->th.th_team->t.t_level;
1212 if (serial_team->t.t_serialized) {
1215 kmp_team_t *new_team;
1217 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1220 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1224 proc_bind, &this_thr->th.th_current_task->td_icvs,
1225 0 USE_NESTED_HOT_ARG(NULL));
1226 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1227 KMP_ASSERT(new_team);
1230 new_team->t.t_threads[0] = this_thr;
1231 new_team->t.t_parent = this_thr->th.th_team;
1232 serial_team = new_team;
1233 this_thr->th.th_serial_team = serial_team;
1237 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1238 global_tid, serial_team));
1246 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1247 global_tid, serial_team));
1251 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1252 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1253 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1254 serial_team->t.t_ident = loc;
1255 serial_team->t.t_serialized = 1;
1256 serial_team->t.t_nproc = 1;
1257 serial_team->t.t_parent = this_thr->th.th_team;
1258 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1259 this_thr->th.th_team = serial_team;
1260 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1262 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1263 this_thr->th.th_current_task));
1264 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1265 this_thr->th.th_current_task->td_flags.executing = 0;
1267 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1272 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1273 &this_thr->th.th_current_task->td_parent->td_icvs);
1277 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1278 this_thr->th.th_current_task->td_icvs.nproc =
1279 __kmp_nested_nth.nth[level + 1];
1282 if (__kmp_nested_proc_bind.used &&
1283 (level + 1 < __kmp_nested_proc_bind.used)) {
1284 this_thr->th.th_current_task->td_icvs.proc_bind =
1285 __kmp_nested_proc_bind.bind_types[level + 1];
1289 serial_team->t.t_pkfn = (microtask_t)(~0);
1291 this_thr->th.th_info.ds.ds_tid = 0;
1294 this_thr->th.th_team_nproc = 1;
1295 this_thr->th.th_team_master = this_thr;
1296 this_thr->th.th_team_serialized = 1;
1298 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1299 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1300 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1302 propagateFPControl(serial_team);
1305 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1306 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1307 serial_team->t.t_dispatch->th_disp_buffer =
1308 (dispatch_private_info_t *)__kmp_allocate(
1309 sizeof(dispatch_private_info_t));
1311 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1318 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1319 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1320 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1321 ++serial_team->t.t_serialized;
1322 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1325 int level = this_thr->th.th_team->t.t_level;
1328 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1329 this_thr->th.th_current_task->td_icvs.nproc =
1330 __kmp_nested_nth.nth[level + 1];
1332 serial_team->t.t_level++;
1333 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1334 "of serial team %p to %d\n",
1335 global_tid, serial_team, serial_team->t.t_level));
1338 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1340 dispatch_private_info_t *disp_buffer =
1341 (dispatch_private_info_t *)__kmp_allocate(
1342 sizeof(dispatch_private_info_t));
1343 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1344 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1346 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1350 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1354 if (__kmp_display_affinity) {
1355 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1356 this_thr->th.th_prev_num_threads != 1) {
1358 __kmp_aux_display_affinity(global_tid, NULL);
1359 this_thr->th.th_prev_level = serial_team->t.t_level;
1360 this_thr->th.th_prev_num_threads = 1;
1364 if (__kmp_env_consistency_check)
1365 __kmp_push_parallel(global_tid, NULL);
1367 serial_team->t.ompt_team_info.master_return_address = codeptr;
1368 if (ompt_enabled.enabled &&
1369 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1370 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1372 ompt_lw_taskteam_t lw_taskteam;
1373 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1374 &ompt_parallel_data, codeptr);
1376 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1380 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1381 if (ompt_enabled.ompt_callback_implicit_task) {
1382 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1383 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1384 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1385 OMPT_CUR_TASK_INFO(this_thr)
1386 ->thread_num = __kmp_tid_from_gtid(global_tid);
1390 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1391 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1398 int __kmp_fork_call(
ident_t *loc,
int gtid,
1399 enum fork_context_e call_context,
1400 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1405 int master_this_cons;
1407 kmp_team_t *parent_team;
1408 kmp_info_t *master_th;
1412 int master_set_numthreads;
1416 #if KMP_NESTED_HOT_TEAMS 1417 kmp_hot_team_ptr_t **p_hot_teams;
1420 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1423 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1424 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1427 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1429 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1430 __kmp_stkpadding += (short)((kmp_int64)dummy);
1436 if (!TCR_4(__kmp_init_parallel))
1437 __kmp_parallel_initialize();
1438 __kmp_resume_if_soft_paused();
1441 master_th = __kmp_threads[gtid];
1443 parent_team = master_th->th.th_team;
1444 master_tid = master_th->th.th_info.ds.ds_tid;
1445 master_this_cons = master_th->th.th_local.this_construct;
1446 root = master_th->th.th_root;
1447 master_active = root->r.r_active;
1448 master_set_numthreads = master_th->th.th_set_nproc;
1451 ompt_data_t ompt_parallel_data = ompt_data_none;
1452 ompt_data_t *parent_task_data;
1453 ompt_frame_t *ompt_frame;
1454 ompt_data_t *implicit_task_data;
1455 void *return_address = NULL;
1457 if (ompt_enabled.enabled) {
1458 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1460 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1465 level = parent_team->t.t_level;
1467 active_level = parent_team->t.t_active_level;
1469 teams_level = master_th->th.th_teams_level;
1470 #if KMP_NESTED_HOT_TEAMS 1471 p_hot_teams = &master_th->th.th_hot_teams;
1472 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1473 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1474 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1475 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1477 (*p_hot_teams)[0].hot_team_nth = 1;
1482 if (ompt_enabled.enabled) {
1483 if (ompt_enabled.ompt_callback_parallel_begin) {
1484 int team_size = master_set_numthreads
1485 ? master_set_numthreads
1486 : get__nproc_2(parent_team, master_tid);
1487 int flags = OMPT_INVOKER(call_context) |
1488 ((microtask == (microtask_t)__kmp_teams_master)
1489 ? ompt_parallel_league
1490 : ompt_parallel_team);
1491 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1492 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1495 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1499 master_th->th.th_ident = loc;
1501 if (master_th->th.th_teams_microtask && ap &&
1502 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1506 parent_team->t.t_ident = loc;
1507 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1508 parent_team->t.t_argc = argc;
1509 argv = (
void **)parent_team->t.t_argv;
1510 for (i = argc - 1; i >= 0; --i)
1511 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1513 if (parent_team == master_th->th.th_serial_team) {
1516 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1518 if (call_context == fork_context_gnu) {
1521 parent_team->t.t_serialized--;
1527 void **exit_frame_p;
1529 ompt_lw_taskteam_t lw_taskteam;
1531 if (ompt_enabled.enabled) {
1532 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1533 &ompt_parallel_data, return_address);
1534 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1536 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1540 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1541 if (ompt_enabled.ompt_callback_implicit_task) {
1542 OMPT_CUR_TASK_INFO(master_th)
1543 ->thread_num = __kmp_tid_from_gtid(gtid);
1544 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1545 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1546 implicit_task_data, 1,
1547 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1551 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1553 exit_frame_p = &dummy;
1558 parent_team->t.t_serialized--;
1561 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1562 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1563 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1572 if (ompt_enabled.enabled) {
1573 *exit_frame_p = NULL;
1574 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1575 if (ompt_enabled.ompt_callback_implicit_task) {
1576 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1577 ompt_scope_end, NULL, implicit_task_data, 1,
1578 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1580 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1581 __ompt_lw_taskteam_unlink(master_th);
1582 if (ompt_enabled.ompt_callback_parallel_end) {
1583 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1584 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1585 OMPT_INVOKER(call_context) | ompt_parallel_team,
1588 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1594 parent_team->t.t_pkfn = microtask;
1595 parent_team->t.t_invoke = invoker;
1596 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1597 parent_team->t.t_active_level++;
1598 parent_team->t.t_level++;
1599 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1602 if (ompt_enabled.enabled) {
1603 ompt_lw_taskteam_t lw_taskteam;
1604 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1605 &ompt_parallel_data, return_address);
1606 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1611 if (master_set_numthreads) {
1612 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1614 kmp_info_t **other_threads = parent_team->t.t_threads;
1615 parent_team->t.t_nproc = master_set_numthreads;
1616 for (i = 0; i < master_set_numthreads; ++i) {
1617 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1621 master_th->th.th_set_nproc = 0;
1625 if (__kmp_debugging) {
1626 int nth = __kmp_omp_num_threads(loc);
1628 master_set_numthreads = nth;
1634 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1636 __kmp_forkjoin_frames_mode == 3 &&
1637 parent_team->t.t_active_level == 1
1638 && master_th->th.th_teams_size.nteams == 1) {
1639 kmp_uint64 tmp_time = __itt_get_timestamp();
1640 master_th->th.th_frame_time = tmp_time;
1641 parent_team->t.t_region_time = tmp_time;
1643 if (__itt_stack_caller_create_ptr) {
1645 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1649 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1650 "master_th=%p, gtid=%d\n",
1651 root, parent_team, master_th, gtid));
1652 __kmp_internal_fork(loc, gtid, parent_team);
1653 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1654 "master_th=%p, gtid=%d\n",
1655 root, parent_team, master_th, gtid));
1657 if (call_context == fork_context_gnu)
1661 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1662 parent_team->t.t_id, parent_team->t.t_pkfn));
1664 if (!parent_team->t.t_invoke(gtid)) {
1665 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1667 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1668 parent_team->t.t_id, parent_team->t.t_pkfn));
1671 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1677 if (__kmp_tasking_mode != tskm_immediate_exec) {
1678 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1679 parent_team->t.t_task_team[master_th->th.th_task_state]);
1683 if (parent_team->t.t_active_level >=
1684 master_th->th.th_current_task->td_icvs.max_active_levels) {
1687 int enter_teams = ((ap == NULL && active_level == 0) ||
1688 (ap && teams_level > 0 && teams_level == level));
1690 master_set_numthreads
1691 ? master_set_numthreads
1700 if ((get__max_active_levels(master_th) == 1 &&
1701 (root->r.r_in_parallel && !enter_teams)) ||
1702 (__kmp_library == library_serial)) {
1703 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1711 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1716 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1717 nthreads, enter_teams);
1718 if (nthreads == 1) {
1722 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1726 KMP_DEBUG_ASSERT(nthreads > 0);
1729 master_th->th.th_set_nproc = 0;
1732 if (nthreads == 1) {
1734 #if KMP_OS_LINUX && \ 1735 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1738 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1743 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1747 if (call_context == fork_context_intel) {
1749 master_th->th.th_serial_team->t.t_ident = loc;
1752 master_th->th.th_serial_team->t.t_level--;
1757 void **exit_frame_p;
1758 ompt_task_info_t *task_info;
1760 ompt_lw_taskteam_t lw_taskteam;
1762 if (ompt_enabled.enabled) {
1763 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1764 &ompt_parallel_data, return_address);
1766 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1769 task_info = OMPT_CUR_TASK_INFO(master_th);
1770 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1771 if (ompt_enabled.ompt_callback_implicit_task) {
1772 OMPT_CUR_TASK_INFO(master_th)
1773 ->thread_num = __kmp_tid_from_gtid(gtid);
1774 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1775 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1776 &(task_info->task_data), 1,
1777 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1778 ompt_task_implicit);
1782 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1784 exit_frame_p = &dummy;
1789 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1790 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1791 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1792 parent_team->t.t_argv
1801 if (ompt_enabled.enabled) {
1802 *exit_frame_p = NULL;
1803 if (ompt_enabled.ompt_callback_implicit_task) {
1804 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1805 ompt_scope_end, NULL, &(task_info->task_data), 1,
1806 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1807 ompt_task_implicit);
1809 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1810 __ompt_lw_taskteam_unlink(master_th);
1811 if (ompt_enabled.ompt_callback_parallel_end) {
1812 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1813 &ompt_parallel_data, parent_task_data,
1814 OMPT_INVOKER(call_context) | ompt_parallel_team,
1817 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1820 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1821 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1822 master_th->th.th_serial_team);
1823 team = master_th->th.th_team;
1825 team->t.t_invoke = invoker;
1826 __kmp_alloc_argv_entries(argc, team, TRUE);
1827 team->t.t_argc = argc;
1828 argv = (
void **)team->t.t_argv;
1830 for (i = argc - 1; i >= 0; --i)
1831 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1833 for (i = 0; i < argc; ++i)
1835 argv[i] = parent_team->t.t_argv[i];
1843 if (ompt_enabled.enabled) {
1844 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1845 if (ompt_enabled.ompt_callback_implicit_task) {
1846 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1847 ompt_scope_end, NULL, &(task_info->task_data), 0,
1848 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1850 if (ompt_enabled.ompt_callback_parallel_end) {
1851 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1852 &ompt_parallel_data, parent_task_data,
1853 OMPT_INVOKER(call_context) | ompt_parallel_league,
1856 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1861 for (i = argc - 1; i >= 0; --i)
1862 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1867 void **exit_frame_p;
1868 ompt_task_info_t *task_info;
1870 ompt_lw_taskteam_t lw_taskteam;
1872 if (ompt_enabled.enabled) {
1873 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1874 &ompt_parallel_data, return_address);
1875 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1877 task_info = OMPT_CUR_TASK_INFO(master_th);
1878 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1881 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1882 if (ompt_enabled.ompt_callback_implicit_task) {
1883 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1884 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1885 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1886 ompt_task_implicit);
1887 OMPT_CUR_TASK_INFO(master_th)
1888 ->thread_num = __kmp_tid_from_gtid(gtid);
1892 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1894 exit_frame_p = &dummy;
1899 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1900 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1901 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1910 if (ompt_enabled.enabled) {
1911 *exit_frame_p = NULL;
1912 if (ompt_enabled.ompt_callback_implicit_task) {
1913 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1914 ompt_scope_end, NULL, &(task_info->task_data), 1,
1915 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1916 ompt_task_implicit);
1919 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1920 __ompt_lw_taskteam_unlink(master_th);
1921 if (ompt_enabled.ompt_callback_parallel_end) {
1922 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1923 &ompt_parallel_data, parent_task_data,
1924 OMPT_INVOKER(call_context) | ompt_parallel_team,
1927 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1931 }
else if (call_context == fork_context_gnu) {
1933 ompt_lw_taskteam_t lwt;
1934 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1937 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1938 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1943 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1946 KMP_ASSERT2(call_context < fork_context_last,
1947 "__kmp_fork_call: unknown fork_context parameter");
1950 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1957 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1958 "curtask=%p, curtask_max_aclevel=%d\n",
1959 parent_team->t.t_active_level, master_th,
1960 master_th->th.th_current_task,
1961 master_th->th.th_current_task->td_icvs.max_active_levels));
1965 master_th->th.th_current_task->td_flags.executing = 0;
1967 if (!master_th->th.th_teams_microtask || level > teams_level) {
1969 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1973 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1974 if ((level + 1 < __kmp_nested_nth.used) &&
1975 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1976 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1982 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1983 kmp_proc_bind_t proc_bind_icv =
1985 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1986 proc_bind = proc_bind_false;
1988 if (proc_bind == proc_bind_default) {
1991 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1997 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1998 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1999 master_th->th.th_current_task->td_icvs.proc_bind)) {
2000 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2005 master_th->th.th_set_proc_bind = proc_bind_default;
2007 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2008 kmp_internal_control_t new_icvs;
2009 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2010 new_icvs.next = NULL;
2011 if (nthreads_icv > 0) {
2012 new_icvs.nproc = nthreads_icv;
2014 if (proc_bind_icv != proc_bind_default) {
2015 new_icvs.proc_bind = proc_bind_icv;
2019 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2020 team = __kmp_allocate_team(root, nthreads, nthreads,
2024 proc_bind, &new_icvs,
2025 argc USE_NESTED_HOT_ARG(master_th));
2028 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2029 team = __kmp_allocate_team(root, nthreads, nthreads,
2034 &master_th->th.th_current_task->td_icvs,
2035 argc USE_NESTED_HOT_ARG(master_th));
2038 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2041 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2042 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2043 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2044 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2045 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2047 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2050 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2052 if (!master_th->th.th_teams_microtask || level > teams_level) {
2053 int new_level = parent_team->t.t_level + 1;
2054 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2055 new_level = parent_team->t.t_active_level + 1;
2056 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2059 int new_level = parent_team->t.t_level;
2060 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2061 new_level = parent_team->t.t_active_level;
2062 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2064 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2066 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2068 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2069 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2072 propagateFPControl(team);
2074 if (__kmp_tasking_mode != tskm_immediate_exec) {
2077 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2078 parent_team->t.t_task_team[master_th->th.th_task_state]);
2079 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2080 "%p, new task_team %p / team %p\n",
2081 __kmp_gtid_from_thread(master_th),
2082 master_th->th.th_task_team, parent_team,
2083 team->t.t_task_team[master_th->th.th_task_state], team));
2085 if (active_level || master_th->th.th_task_team) {
2087 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2088 if (master_th->th.th_task_state_top >=
2089 master_th->th.th_task_state_stack_sz) {
2090 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2091 kmp_uint8 *old_stack, *new_stack;
2093 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2094 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2095 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2097 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2101 old_stack = master_th->th.th_task_state_memo_stack;
2102 master_th->th.th_task_state_memo_stack = new_stack;
2103 master_th->th.th_task_state_stack_sz = new_size;
2104 __kmp_free(old_stack);
2108 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2109 master_th->th.th_task_state;
2110 master_th->th.th_task_state_top++;
2111 #if KMP_NESTED_HOT_TEAMS 2112 if (master_th->th.th_hot_teams &&
2113 active_level < __kmp_hot_teams_max_level &&
2114 team == master_th->th.th_hot_teams[active_level].hot_team) {
2116 master_th->th.th_task_state =
2118 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2121 master_th->th.th_task_state = 0;
2122 #if KMP_NESTED_HOT_TEAMS 2126 #if !KMP_NESTED_HOT_TEAMS 2127 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2128 (team == root->r.r_hot_team));
2134 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2135 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2137 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2138 (team->t.t_master_tid == 0 &&
2139 (team->t.t_parent == root->r.r_root_team ||
2140 team->t.t_parent->t.t_serialized)));
2144 argv = (
void **)team->t.t_argv;
2146 for (i = argc - 1; i >= 0; --i) {
2147 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2148 KMP_CHECK_UPDATE(*argv, new_argv);
2152 for (i = 0; i < argc; ++i) {
2154 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2159 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2160 if (!root->r.r_active)
2161 root->r.r_active = TRUE;
2163 __kmp_fork_team_threads(root, team, master_th, gtid);
2164 __kmp_setup_icv_copy(team, nthreads,
2165 &master_th->th.th_current_task->td_icvs, loc);
2168 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2171 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2174 if (team->t.t_active_level == 1
2175 && !master_th->th.th_teams_microtask) {
2177 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2178 (__kmp_forkjoin_frames_mode == 3 ||
2179 __kmp_forkjoin_frames_mode == 1)) {
2180 kmp_uint64 tmp_time = 0;
2181 if (__itt_get_timestamp_ptr)
2182 tmp_time = __itt_get_timestamp();
2184 master_th->th.th_frame_time = tmp_time;
2185 if (__kmp_forkjoin_frames_mode == 3)
2186 team->t.t_region_time = tmp_time;
2190 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2191 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2193 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2199 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2202 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2203 root, team, master_th, gtid));
2206 if (__itt_stack_caller_create_ptr) {
2207 team->t.t_stack_id =
2208 __kmp_itt_stack_caller_create();
2216 __kmp_internal_fork(loc, gtid, team);
2217 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2218 "master_th=%p, gtid=%d\n",
2219 root, team, master_th, gtid));
2222 if (call_context == fork_context_gnu) {
2223 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2228 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2229 team->t.t_id, team->t.t_pkfn));
2232 #if KMP_STATS_ENABLED 2236 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2240 if (!team->t.t_invoke(gtid)) {
2241 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2244 #if KMP_STATS_ENABLED 2247 KMP_SET_THREAD_STATE(previous_state);
2251 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2252 team->t.t_id, team->t.t_pkfn));
2255 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2258 if (ompt_enabled.enabled) {
2259 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2267 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2270 thread->th.ompt_thread_info.state =
2271 ((team->t.t_serialized) ? ompt_state_work_serial
2272 : ompt_state_work_parallel);
2275 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2276 kmp_team_t *team, ompt_data_t *parallel_data,
2277 int flags,
void *codeptr) {
2278 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2279 if (ompt_enabled.ompt_callback_parallel_end) {
2280 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2281 parallel_data, &(task_info->task_data), flags, codeptr);
2284 task_info->frame.enter_frame = ompt_data_none;
2285 __kmp_join_restore_state(thread, team);
2289 void __kmp_join_call(
ident_t *loc,
int gtid
2292 enum fork_context_e fork_context
2296 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2298 kmp_team_t *parent_team;
2299 kmp_info_t *master_th;
2303 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2306 master_th = __kmp_threads[gtid];
2307 root = master_th->th.th_root;
2308 team = master_th->th.th_team;
2309 parent_team = team->t.t_parent;
2311 master_th->th.th_ident = loc;
2314 void *team_microtask = (
void *)team->t.t_pkfn;
2318 if (ompt_enabled.enabled &&
2319 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2320 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2325 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2326 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2327 "th_task_team = %p\n",
2328 __kmp_gtid_from_thread(master_th), team,
2329 team->t.t_task_team[master_th->th.th_task_state],
2330 master_th->th.th_task_team));
2331 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2332 team->t.t_task_team[master_th->th.th_task_state]);
2336 if (team->t.t_serialized) {
2337 if (master_th->th.th_teams_microtask) {
2339 int level = team->t.t_level;
2340 int tlevel = master_th->th.th_teams_level;
2341 if (level == tlevel) {
2345 }
else if (level == tlevel + 1) {
2349 team->t.t_serialized++;
2355 if (ompt_enabled.enabled) {
2356 __kmp_join_restore_state(master_th, parent_team);
2363 master_active = team->t.t_master_active;
2368 __kmp_internal_join(loc, gtid, team);
2370 master_th->th.th_task_state =
2377 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2378 void *codeptr = team->t.ompt_team_info.master_return_address;
2382 if (__itt_stack_caller_create_ptr) {
2384 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2387 if (team->t.t_active_level == 1 &&
2388 (!master_th->th.th_teams_microtask ||
2389 master_th->th.th_teams_size.nteams == 1)) {
2390 master_th->th.th_ident = loc;
2393 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2394 __kmp_forkjoin_frames_mode == 3)
2395 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2396 master_th->th.th_frame_time, 0, loc,
2397 master_th->th.th_team_nproc, 1);
2398 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2399 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2400 __kmp_itt_region_joined(gtid);
2404 if (master_th->th.th_teams_microtask && !exit_teams &&
2405 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2406 team->t.t_level == master_th->th.th_teams_level + 1) {
2411 ompt_data_t ompt_parallel_data = ompt_data_none;
2412 if (ompt_enabled.enabled) {
2413 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2414 if (ompt_enabled.ompt_callback_implicit_task) {
2415 int ompt_team_size = team->t.t_nproc;
2416 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2417 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2418 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2420 task_info->frame.exit_frame = ompt_data_none;
2421 task_info->task_data = ompt_data_none;
2422 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2423 __ompt_lw_taskteam_unlink(master_th);
2428 team->t.t_active_level--;
2429 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2435 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2436 int old_num = master_th->th.th_team_nproc;
2437 int new_num = master_th->th.th_teams_size.nth;
2438 kmp_info_t **other_threads = team->t.t_threads;
2439 team->t.t_nproc = new_num;
2440 for (
int i = 0; i < old_num; ++i) {
2441 other_threads[i]->th.th_team_nproc = new_num;
2444 for (
int i = old_num; i < new_num; ++i) {
2446 KMP_DEBUG_ASSERT(other_threads[i]);
2447 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2448 for (
int b = 0; b < bs_last_barrier; ++b) {
2449 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2450 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2452 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2455 if (__kmp_tasking_mode != tskm_immediate_exec) {
2457 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2463 if (ompt_enabled.enabled) {
2464 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2465 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2473 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2474 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2476 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2481 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2483 if (!master_th->th.th_teams_microtask ||
2484 team->t.t_level > master_th->th.th_teams_level) {
2486 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2488 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2491 if (ompt_enabled.enabled) {
2492 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2493 if (ompt_enabled.ompt_callback_implicit_task) {
2494 int flags = (team_microtask == (
void *)__kmp_teams_master)
2496 : ompt_task_implicit;
2497 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2498 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2499 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2500 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2502 task_info->frame.exit_frame = ompt_data_none;
2503 task_info->task_data = ompt_data_none;
2507 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2509 __kmp_pop_current_task_from_thread(master_th);
2511 #if KMP_AFFINITY_SUPPORTED 2513 master_th->th.th_first_place = team->t.t_first_place;
2514 master_th->th.th_last_place = team->t.t_last_place;
2515 #endif // KMP_AFFINITY_SUPPORTED 2516 master_th->th.th_def_allocator = team->t.t_def_allocator;
2518 updateHWFPControl(team);
2520 if (root->r.r_active != master_active)
2521 root->r.r_active = master_active;
2523 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2531 master_th->th.th_team = parent_team;
2532 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2533 master_th->th.th_team_master = parent_team->t.t_threads[0];
2534 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2537 if (parent_team->t.t_serialized &&
2538 parent_team != master_th->th.th_serial_team &&
2539 parent_team != root->r.r_root_team) {
2540 __kmp_free_team(root,
2541 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2542 master_th->th.th_serial_team = parent_team;
2545 if (__kmp_tasking_mode != tskm_immediate_exec) {
2546 if (master_th->th.th_task_state_top >
2548 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2550 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2551 master_th->th.th_task_state;
2552 --master_th->th.th_task_state_top;
2554 master_th->th.th_task_state =
2556 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2559 master_th->th.th_task_team =
2560 parent_team->t.t_task_team[master_th->th.th_task_state];
2562 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2563 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2570 master_th->th.th_current_task->td_flags.executing = 1;
2572 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2576 OMPT_INVOKER(fork_context) |
2577 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2578 : ompt_parallel_team);
2579 if (ompt_enabled.enabled) {
2580 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2586 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2591 void __kmp_save_internal_controls(kmp_info_t *thread) {
2593 if (thread->th.th_team != thread->th.th_serial_team) {
2596 if (thread->th.th_team->t.t_serialized > 1) {
2599 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2602 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2603 thread->th.th_team->t.t_serialized) {
2608 kmp_internal_control_t *control =
2609 (kmp_internal_control_t *)__kmp_allocate(
2610 sizeof(kmp_internal_control_t));
2612 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2614 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2616 control->next = thread->th.th_team->t.t_control_stack_top;
2617 thread->th.th_team->t.t_control_stack_top = control;
2623 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2627 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2628 KMP_DEBUG_ASSERT(__kmp_init_serial);
2632 else if (new_nth > __kmp_max_nth)
2633 new_nth = __kmp_max_nth;
2636 thread = __kmp_threads[gtid];
2637 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2640 __kmp_save_internal_controls(thread);
2642 set__nproc(thread, new_nth);
2647 root = thread->th.th_root;
2648 if (__kmp_init_parallel && (!root->r.r_active) &&
2649 (root->r.r_hot_team->t.t_nproc > new_nth)
2650 #
if KMP_NESTED_HOT_TEAMS
2651 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2654 kmp_team_t *hot_team = root->r.r_hot_team;
2657 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2660 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2661 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2662 if (__kmp_tasking_mode != tskm_immediate_exec) {
2665 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2667 __kmp_free_thread(hot_team->t.t_threads[f]);
2668 hot_team->t.t_threads[f] = NULL;
2670 hot_team->t.t_nproc = new_nth;
2671 #if KMP_NESTED_HOT_TEAMS 2672 if (thread->th.th_hot_teams) {
2673 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2674 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2678 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2681 for (f = 0; f < new_nth; f++) {
2682 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2683 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2686 hot_team->t.t_size_changed = -1;
2691 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2694 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2696 gtid, max_active_levels));
2697 KMP_DEBUG_ASSERT(__kmp_init_serial);
2700 if (max_active_levels < 0) {
2701 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2706 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2707 "max_active_levels for thread %d = (%d)\n",
2708 gtid, max_active_levels));
2711 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2716 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2717 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2718 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2724 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2725 "max_active_levels for thread %d = (%d)\n",
2726 gtid, max_active_levels));
2728 thread = __kmp_threads[gtid];
2730 __kmp_save_internal_controls(thread);
2732 set__max_active_levels(thread, max_active_levels);
2736 int __kmp_get_max_active_levels(
int gtid) {
2739 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2740 KMP_DEBUG_ASSERT(__kmp_init_serial);
2742 thread = __kmp_threads[gtid];
2743 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2744 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2745 "curtask_maxaclevel=%d\n",
2746 gtid, thread->th.th_current_task,
2747 thread->th.th_current_task->td_icvs.max_active_levels));
2748 return thread->th.th_current_task->td_icvs.max_active_levels;
2751 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2752 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2755 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2757 kmp_sched_t orig_kind;
2760 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2761 gtid, (
int)kind, chunk));
2762 KMP_DEBUG_ASSERT(__kmp_init_serial);
2769 kind = __kmp_sched_without_mods(kind);
2771 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2772 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2774 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2775 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2777 kind = kmp_sched_default;
2781 thread = __kmp_threads[gtid];
2783 __kmp_save_internal_controls(thread);
2785 if (kind < kmp_sched_upper_std) {
2786 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2789 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2791 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2792 __kmp_sch_map[kind - kmp_sched_lower - 1];
2797 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2798 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2799 kmp_sched_lower - 2];
2801 __kmp_sched_apply_mods_intkind(
2802 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2803 if (kind == kmp_sched_auto || chunk < 1) {
2805 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2807 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2812 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2816 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2817 KMP_DEBUG_ASSERT(__kmp_init_serial);
2819 thread = __kmp_threads[gtid];
2821 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2822 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2824 case kmp_sch_static_greedy:
2825 case kmp_sch_static_balanced:
2826 *kind = kmp_sched_static;
2827 __kmp_sched_apply_mods_stdkind(kind, th_type);
2830 case kmp_sch_static_chunked:
2831 *kind = kmp_sched_static;
2833 case kmp_sch_dynamic_chunked:
2834 *kind = kmp_sched_dynamic;
2837 case kmp_sch_guided_iterative_chunked:
2838 case kmp_sch_guided_analytical_chunked:
2839 *kind = kmp_sched_guided;
2842 *kind = kmp_sched_auto;
2844 case kmp_sch_trapezoidal:
2845 *kind = kmp_sched_trapezoidal;
2847 #if KMP_STATIC_STEAL_ENABLED 2848 case kmp_sch_static_steal:
2849 *kind = kmp_sched_static_steal;
2853 KMP_FATAL(UnknownSchedulingType, th_type);
2856 __kmp_sched_apply_mods_stdkind(kind, th_type);
2857 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2860 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2866 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2867 KMP_DEBUG_ASSERT(__kmp_init_serial);
2874 thr = __kmp_threads[gtid];
2875 team = thr->th.th_team;
2876 ii = team->t.t_level;
2880 if (thr->th.th_teams_microtask) {
2882 int tlevel = thr->th.th_teams_level;
2885 KMP_DEBUG_ASSERT(ii >= tlevel);
2897 return __kmp_tid_from_gtid(gtid);
2899 dd = team->t.t_serialized;
2901 while (ii > level) {
2902 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2904 if ((team->t.t_serialized) && (!dd)) {
2905 team = team->t.t_parent;
2909 team = team->t.t_parent;
2910 dd = team->t.t_serialized;
2915 return (dd > 1) ? (0) : (team->t.t_master_tid);
2918 int __kmp_get_team_size(
int gtid,
int level) {
2924 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2925 KMP_DEBUG_ASSERT(__kmp_init_serial);
2932 thr = __kmp_threads[gtid];
2933 team = thr->th.th_team;
2934 ii = team->t.t_level;
2938 if (thr->th.th_teams_microtask) {
2940 int tlevel = thr->th.th_teams_level;
2943 KMP_DEBUG_ASSERT(ii >= tlevel);
2954 while (ii > level) {
2955 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2957 if (team->t.t_serialized && (!dd)) {
2958 team = team->t.t_parent;
2962 team = team->t.t_parent;
2967 return team->t.t_nproc;
2970 kmp_r_sched_t __kmp_get_schedule_global() {
2975 kmp_r_sched_t r_sched;
2981 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
2982 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
2985 r_sched.r_sched_type = __kmp_static;
2988 r_sched.r_sched_type = __kmp_guided;
2990 r_sched.r_sched_type = __kmp_sched;
2992 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
2994 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2996 r_sched.chunk = KMP_DEFAULT_CHUNK;
2998 r_sched.chunk = __kmp_chunk;
3006 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3008 KMP_DEBUG_ASSERT(team);
3009 if (!realloc || argc > team->t.t_max_argc) {
3011 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 3012 "current entries=%d\n",
3013 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3015 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3016 __kmp_free((
void *)team->t.t_argv);
3018 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3020 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3021 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3023 team->t.t_id, team->t.t_max_argc));
3024 team->t.t_argv = &team->t.t_inline_argv[0];
3025 if (__kmp_storage_map) {
3026 __kmp_print_storage_map_gtid(
3027 -1, &team->t.t_inline_argv[0],
3028 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3029 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3034 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3035 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3037 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3039 team->t.t_id, team->t.t_max_argc));
3041 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3042 if (__kmp_storage_map) {
3043 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3044 &team->t.t_argv[team->t.t_max_argc],
3045 sizeof(
void *) * team->t.t_max_argc,
3046 "team_%d.t_argv", team->t.t_id);
3052 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3054 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3056 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3057 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3058 sizeof(dispatch_shared_info_t) * num_disp_buff);
3059 team->t.t_dispatch =
3060 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3061 team->t.t_implicit_task_taskdata =
3062 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3063 team->t.t_max_nproc = max_nth;
3066 for (i = 0; i < num_disp_buff; ++i) {
3067 team->t.t_disp_buffer[i].buffer_index = i;
3068 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3072 static void __kmp_free_team_arrays(kmp_team_t *team) {
3075 for (i = 0; i < team->t.t_max_nproc; ++i) {
3076 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3077 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3078 team->t.t_dispatch[i].th_disp_buffer = NULL;
3081 #if KMP_USE_HIER_SCHED 3082 __kmp_dispatch_free_hierarchies(team);
3084 __kmp_free(team->t.t_threads);
3085 __kmp_free(team->t.t_disp_buffer);
3086 __kmp_free(team->t.t_dispatch);
3087 __kmp_free(team->t.t_implicit_task_taskdata);
3088 team->t.t_threads = NULL;
3089 team->t.t_disp_buffer = NULL;
3090 team->t.t_dispatch = NULL;
3091 team->t.t_implicit_task_taskdata = 0;
3094 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3095 kmp_info_t **oldThreads = team->t.t_threads;
3097 __kmp_free(team->t.t_disp_buffer);
3098 __kmp_free(team->t.t_dispatch);
3099 __kmp_free(team->t.t_implicit_task_taskdata);
3100 __kmp_allocate_team_arrays(team, max_nth);
3102 KMP_MEMCPY(team->t.t_threads, oldThreads,
3103 team->t.t_nproc *
sizeof(kmp_info_t *));
3105 __kmp_free(oldThreads);
3108 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3110 kmp_r_sched_t r_sched =
3111 __kmp_get_schedule_global();
3113 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3115 kmp_internal_control_t g_icvs = {
3117 (kmp_int8)__kmp_global.g.g_dynamic,
3119 (kmp_int8)__kmp_env_blocktime,
3121 __kmp_dflt_blocktime,
3126 __kmp_dflt_team_nth,
3130 __kmp_dflt_max_active_levels,
3134 __kmp_nested_proc_bind.bind_types[0],
3135 __kmp_default_device,
3142 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3144 kmp_internal_control_t gx_icvs;
3145 gx_icvs.serial_nesting_level =
3147 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3148 gx_icvs.next = NULL;
3153 static void __kmp_initialize_root(kmp_root_t *root) {
3155 kmp_team_t *root_team;
3156 kmp_team_t *hot_team;
3157 int hot_team_max_nth;
3158 kmp_r_sched_t r_sched =
3159 __kmp_get_schedule_global();
3160 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3161 KMP_DEBUG_ASSERT(root);
3162 KMP_ASSERT(!root->r.r_begin);
3165 __kmp_init_lock(&root->r.r_begin_lock);
3166 root->r.r_begin = FALSE;
3167 root->r.r_active = FALSE;
3168 root->r.r_in_parallel = 0;
3169 root->r.r_blocktime = __kmp_dflt_blocktime;
3173 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3176 __kmp_allocate_team(root,
3182 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3184 USE_NESTED_HOT_ARG(NULL)
3189 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3192 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3194 root->r.r_root_team = root_team;
3195 root_team->t.t_control_stack_top = NULL;
3198 root_team->t.t_threads[0] = NULL;
3199 root_team->t.t_nproc = 1;
3200 root_team->t.t_serialized = 1;
3202 root_team->t.t_sched.sched = r_sched.sched;
3205 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3206 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3210 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3213 __kmp_allocate_team(root,
3215 __kmp_dflt_team_nth_ub * 2,
3219 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3221 USE_NESTED_HOT_ARG(NULL)
3223 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3225 root->r.r_hot_team = hot_team;
3226 root_team->t.t_control_stack_top = NULL;
3229 hot_team->t.t_parent = root_team;
3232 hot_team_max_nth = hot_team->t.t_max_nproc;
3233 for (f = 0; f < hot_team_max_nth; ++f) {
3234 hot_team->t.t_threads[f] = NULL;
3236 hot_team->t.t_nproc = 1;
3238 hot_team->t.t_sched.sched = r_sched.sched;
3239 hot_team->t.t_size_changed = 0;
3244 typedef struct kmp_team_list_item {
3245 kmp_team_p
const *entry;
3246 struct kmp_team_list_item *next;
3247 } kmp_team_list_item_t;
3248 typedef kmp_team_list_item_t *kmp_team_list_t;
3250 static void __kmp_print_structure_team_accum(
3251 kmp_team_list_t list,
3252 kmp_team_p
const *team
3262 KMP_DEBUG_ASSERT(list != NULL);
3267 __kmp_print_structure_team_accum(list, team->t.t_parent);
3268 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3272 while (l->next != NULL && l->entry != team) {
3275 if (l->next != NULL) {
3281 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3287 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3288 sizeof(kmp_team_list_item_t));
3295 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3298 __kmp_printf(
"%s", title);
3300 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3302 __kmp_printf(
" - (nil)\n");
3306 static void __kmp_print_structure_thread(
char const *title,
3307 kmp_info_p
const *thread) {
3308 __kmp_printf(
"%s", title);
3309 if (thread != NULL) {
3310 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3312 __kmp_printf(
" - (nil)\n");
3316 void __kmp_print_structure(
void) {
3318 kmp_team_list_t list;
3322 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3326 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3327 "Table\n------------------------------\n");
3330 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3331 __kmp_printf(
"%2d", gtid);
3332 if (__kmp_threads != NULL) {
3333 __kmp_printf(
" %p", __kmp_threads[gtid]);
3335 if (__kmp_root != NULL) {
3336 __kmp_printf(
" %p", __kmp_root[gtid]);
3343 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3345 if (__kmp_threads != NULL) {
3347 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3348 kmp_info_t
const *thread = __kmp_threads[gtid];
3349 if (thread != NULL) {
3350 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3351 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3352 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3353 __kmp_print_structure_team(
" Serial Team: ",
3354 thread->th.th_serial_team);
3355 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3356 __kmp_print_structure_thread(
" Master: ",
3357 thread->th.th_team_master);
3358 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3359 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3360 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3361 __kmp_print_structure_thread(
" Next in pool: ",
3362 thread->th.th_next_pool);
3364 __kmp_print_structure_team_accum(list, thread->th.th_team);
3365 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3369 __kmp_printf(
"Threads array is not allocated.\n");
3373 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3375 if (__kmp_root != NULL) {
3377 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3378 kmp_root_t
const *root = __kmp_root[gtid];
3380 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3381 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3382 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3383 __kmp_print_structure_thread(
" Uber Thread: ",
3384 root->r.r_uber_thread);
3385 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3386 __kmp_printf(
" In Parallel: %2d\n",
3387 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3389 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3390 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3394 __kmp_printf(
"Ubers array is not allocated.\n");
3397 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3399 while (list->next != NULL) {
3400 kmp_team_p
const *team = list->entry;
3402 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3403 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3404 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3405 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3406 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3407 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3408 for (i = 0; i < team->t.t_nproc; ++i) {
3409 __kmp_printf(
" Thread %2d: ", i);
3410 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3412 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3418 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3420 __kmp_print_structure_thread(
"Thread pool: ",
3421 CCAST(kmp_info_t *, __kmp_thread_pool));
3422 __kmp_print_structure_team(
"Team pool: ",
3423 CCAST(kmp_team_t *, __kmp_team_pool));
3427 while (list != NULL) {
3428 kmp_team_list_item_t *item = list;
3430 KMP_INTERNAL_FREE(item);
3439 static const unsigned __kmp_primes[] = {
3440 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3441 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3442 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3443 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3444 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3445 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3446 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3447 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3448 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3449 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3450 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3454 unsigned short __kmp_get_random(kmp_info_t *thread) {
3455 unsigned x = thread->th.th_x;
3456 unsigned short r = x >> 16;
3458 thread->th.th_x = x * thread->th.th_a + 1;
3460 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3461 thread->th.th_info.ds.ds_tid, r));
3467 void __kmp_init_random(kmp_info_t *thread) {
3468 unsigned seed = thread->th.th_info.ds.ds_tid;
3471 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3472 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3474 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3480 static int __kmp_reclaim_dead_roots(
void) {
3483 for (i = 0; i < __kmp_threads_capacity; ++i) {
3484 if (KMP_UBER_GTID(i) &&
3485 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3488 r += __kmp_unregister_root_other_thread(i);
3513 static int __kmp_expand_threads(
int nNeed) {
3515 int minimumRequiredCapacity;
3517 kmp_info_t **newThreads;
3518 kmp_root_t **newRoot;
3524 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 3527 added = __kmp_reclaim_dead_roots();
3556 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3559 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3563 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3565 newCapacity = __kmp_threads_capacity;
3567 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3568 : __kmp_sys_max_nth;
3569 }
while (newCapacity < minimumRequiredCapacity);
3570 newThreads = (kmp_info_t **)__kmp_allocate(
3571 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3573 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3574 KMP_MEMCPY(newThreads, __kmp_threads,
3575 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3576 KMP_MEMCPY(newRoot, __kmp_root,
3577 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3579 kmp_info_t **temp_threads = __kmp_threads;
3580 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3581 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3582 __kmp_free(temp_threads);
3583 added += newCapacity - __kmp_threads_capacity;
3584 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3586 if (newCapacity > __kmp_tp_capacity) {
3587 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3588 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3589 __kmp_threadprivate_resize_cache(newCapacity);
3591 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3593 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3602 int __kmp_register_root(
int initial_thread) {
3603 kmp_info_t *root_thread;
3607 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3608 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3625 capacity = __kmp_threads_capacity;
3626 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3631 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3632 if (__kmp_tp_cached) {
3633 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3634 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3635 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3637 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3645 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3649 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3650 KMP_ASSERT(gtid < __kmp_threads_capacity);
3654 TCW_4(__kmp_nth, __kmp_nth + 1);
3658 if (__kmp_adjust_gtid_mode) {
3659 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3660 if (TCR_4(__kmp_gtid_mode) != 2) {
3661 TCW_4(__kmp_gtid_mode, 2);
3664 if (TCR_4(__kmp_gtid_mode) != 1) {
3665 TCW_4(__kmp_gtid_mode, 1);
3670 #ifdef KMP_ADJUST_BLOCKTIME 3673 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3674 if (__kmp_nth > __kmp_avail_proc) {
3675 __kmp_zero_bt = TRUE;
3681 if (!(root = __kmp_root[gtid])) {
3682 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3683 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3686 #if KMP_STATS_ENABLED 3688 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3689 __kmp_stats_thread_ptr->startLife();
3690 KMP_SET_THREAD_STATE(SERIAL_REGION);
3693 __kmp_initialize_root(root);
3696 if (root->r.r_uber_thread) {
3697 root_thread = root->r.r_uber_thread;
3699 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3700 if (__kmp_storage_map) {
3701 __kmp_print_thread_storage_map(root_thread, gtid);
3703 root_thread->th.th_info.ds.ds_gtid = gtid;
3705 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3707 root_thread->th.th_root = root;
3708 if (__kmp_env_consistency_check) {
3709 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3712 __kmp_initialize_fast_memory(root_thread);
3716 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3717 __kmp_initialize_bget(root_thread);
3719 __kmp_init_random(root_thread);
3723 if (!root_thread->th.th_serial_team) {
3724 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3725 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3726 root_thread->th.th_serial_team = __kmp_allocate_team(
3731 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3733 KMP_ASSERT(root_thread->th.th_serial_team);
3734 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3735 root_thread->th.th_serial_team));
3738 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3740 root->r.r_root_team->t.t_threads[0] = root_thread;
3741 root->r.r_hot_team->t.t_threads[0] = root_thread;
3742 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3744 root_thread->th.th_serial_team->t.t_serialized = 0;
3745 root->r.r_uber_thread = root_thread;
3748 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3749 TCW_4(__kmp_init_gtid, TRUE);
3752 __kmp_gtid_set_specific(gtid);
3755 __kmp_itt_thread_name(gtid);
3758 #ifdef KMP_TDATA_GTID 3761 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3762 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3764 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3766 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3767 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3768 KMP_INIT_BARRIER_STATE));
3771 for (b = 0; b < bs_last_barrier; ++b) {
3772 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3774 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3778 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3779 KMP_INIT_BARRIER_STATE);
3781 #if KMP_AFFINITY_SUPPORTED 3782 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3783 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3784 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3785 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3786 if (TCR_4(__kmp_init_middle)) {
3787 __kmp_affinity_set_init_mask(gtid, TRUE);
3790 root_thread->th.th_def_allocator = __kmp_def_allocator;
3791 root_thread->th.th_prev_level = 0;
3792 root_thread->th.th_prev_num_threads = 1;
3794 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3795 tmp->cg_root = root_thread;
3796 tmp->cg_thread_limit = __kmp_cg_max_nth;
3797 tmp->cg_nthreads = 1;
3798 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with" 3799 " cg_nthreads init to 1\n",
3802 root_thread->th.th_cg_roots = tmp;
3804 __kmp_root_counter++;
3807 if (!initial_thread && ompt_enabled.enabled) {
3809 kmp_info_t *root_thread = ompt_get_thread();
3811 ompt_set_thread_state(root_thread, ompt_state_overhead);
3813 if (ompt_enabled.ompt_callback_thread_begin) {
3814 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3815 ompt_thread_initial, __ompt_get_thread_data_internal());
3817 ompt_data_t *task_data;
3818 ompt_data_t *parallel_data;
3819 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3820 if (ompt_enabled.ompt_callback_implicit_task) {
3821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3822 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3825 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3830 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3835 #if KMP_NESTED_HOT_TEAMS 3836 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3837 const int max_level) {
3839 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3840 if (!hot_teams || !hot_teams[level].hot_team) {
3843 KMP_DEBUG_ASSERT(level < max_level);
3844 kmp_team_t *team = hot_teams[level].hot_team;
3845 nth = hot_teams[level].hot_team_nth;
3847 if (level < max_level - 1) {
3848 for (i = 0; i < nth; ++i) {
3849 kmp_info_t *th = team->t.t_threads[i];
3850 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3851 if (i > 0 && th->th.th_hot_teams) {
3852 __kmp_free(th->th.th_hot_teams);
3853 th->th.th_hot_teams = NULL;
3857 __kmp_free_team(root, team, NULL);
3864 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3865 kmp_team_t *root_team = root->r.r_root_team;
3866 kmp_team_t *hot_team = root->r.r_hot_team;
3867 int n = hot_team->t.t_nproc;
3870 KMP_DEBUG_ASSERT(!root->r.r_active);
3872 root->r.r_root_team = NULL;
3873 root->r.r_hot_team = NULL;
3876 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3877 #if KMP_NESTED_HOT_TEAMS 3878 if (__kmp_hot_teams_max_level >
3880 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3881 kmp_info_t *th = hot_team->t.t_threads[i];
3882 if (__kmp_hot_teams_max_level > 1) {
3883 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3885 if (th->th.th_hot_teams) {
3886 __kmp_free(th->th.th_hot_teams);
3887 th->th.th_hot_teams = NULL;
3892 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3897 if (__kmp_tasking_mode != tskm_immediate_exec) {
3898 __kmp_wait_to_unref_task_teams();
3904 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3906 (LPVOID) & (root->r.r_uber_thread->th),
3907 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3908 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3912 ompt_data_t *task_data;
3913 ompt_data_t *parallel_data;
3914 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3915 if (ompt_enabled.ompt_callback_implicit_task) {
3916 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3917 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3919 if (ompt_enabled.ompt_callback_thread_end) {
3920 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3921 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3927 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3928 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p" 3930 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3931 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
3934 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
3935 root->r.r_uber_thread->th.th_cg_roots->cg_root);
3936 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
3937 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
3938 root->r.r_uber_thread->th.th_cg_roots = NULL;
3940 __kmp_reap_thread(root->r.r_uber_thread, 1);
3944 root->r.r_uber_thread = NULL;
3946 root->r.r_begin = FALSE;
3951 void __kmp_unregister_root_current_thread(
int gtid) {
3952 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3956 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3957 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3958 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3961 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3964 kmp_root_t *root = __kmp_root[gtid];
3966 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3967 KMP_ASSERT(KMP_UBER_GTID(gtid));
3968 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3969 KMP_ASSERT(root->r.r_active == FALSE);
3973 kmp_info_t *thread = __kmp_threads[gtid];
3974 kmp_team_t *team = thread->th.th_team;
3975 kmp_task_team_t *task_team = thread->th.th_task_team;
3978 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3981 thread->th.ompt_thread_info.state = ompt_state_undefined;
3983 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3986 __kmp_reset_root(gtid, root);
3989 __kmp_gtid_set_specific(KMP_GTID_DNE);
3990 #ifdef KMP_TDATA_GTID 3991 __kmp_gtid = KMP_GTID_DNE;
3996 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3998 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4005 static int __kmp_unregister_root_other_thread(
int gtid) {
4006 kmp_root_t *root = __kmp_root[gtid];
4009 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4010 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4011 KMP_ASSERT(KMP_UBER_GTID(gtid));
4012 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4013 KMP_ASSERT(root->r.r_active == FALSE);
4015 r = __kmp_reset_root(gtid, root);
4017 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4023 void __kmp_task_info() {
4025 kmp_int32 gtid = __kmp_entry_gtid();
4026 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4027 kmp_info_t *this_thr = __kmp_threads[gtid];
4028 kmp_team_t *steam = this_thr->th.th_serial_team;
4029 kmp_team_t *team = this_thr->th.th_team;
4032 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 4034 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4035 team->t.t_implicit_task_taskdata[tid].td_parent);
4042 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4043 int tid,
int gtid) {
4047 kmp_info_t *master = team->t.t_threads[0];
4048 KMP_DEBUG_ASSERT(this_thr != NULL);
4049 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4050 KMP_DEBUG_ASSERT(team);
4051 KMP_DEBUG_ASSERT(team->t.t_threads);
4052 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4053 KMP_DEBUG_ASSERT(master);
4054 KMP_DEBUG_ASSERT(master->th.th_root);
4058 TCW_SYNC_PTR(this_thr->th.th_team, team);
4060 this_thr->th.th_info.ds.ds_tid = tid;
4061 this_thr->th.th_set_nproc = 0;
4062 if (__kmp_tasking_mode != tskm_immediate_exec)
4065 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4067 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4068 this_thr->th.th_set_proc_bind = proc_bind_default;
4069 #if KMP_AFFINITY_SUPPORTED 4070 this_thr->th.th_new_place = this_thr->th.th_current_place;
4072 this_thr->th.th_root = master->th.th_root;
4075 this_thr->th.th_team_nproc = team->t.t_nproc;
4076 this_thr->th.th_team_master = master;
4077 this_thr->th.th_team_serialized = team->t.t_serialized;
4078 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4080 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4082 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4083 tid, gtid, this_thr, this_thr->th.th_current_task));
4085 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4088 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4089 tid, gtid, this_thr, this_thr->th.th_current_task));
4094 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4096 this_thr->th.th_local.this_construct = 0;
4098 if (!this_thr->th.th_pri_common) {
4099 this_thr->th.th_pri_common =
4100 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4101 if (__kmp_storage_map) {
4102 __kmp_print_storage_map_gtid(
4103 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4104 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4106 this_thr->th.th_pri_head = NULL;
4109 if (this_thr != master &&
4110 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4112 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4113 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4116 int i = tmp->cg_nthreads--;
4117 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads" 4118 " on node %p of thread %p to %d\n",
4119 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4124 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4126 this_thr->th.th_cg_roots->cg_nthreads++;
4127 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on" 4128 " node %p of thread %p to %d\n",
4129 this_thr, this_thr->th.th_cg_roots,
4130 this_thr->th.th_cg_roots->cg_root,
4131 this_thr->th.th_cg_roots->cg_nthreads));
4132 this_thr->th.th_current_task->td_icvs.thread_limit =
4133 this_thr->th.th_cg_roots->cg_thread_limit;
4138 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4141 sizeof(dispatch_private_info_t) *
4142 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4143 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4144 team->t.t_max_nproc));
4145 KMP_ASSERT(dispatch);
4146 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4147 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4149 dispatch->th_disp_index = 0;
4150 dispatch->th_doacross_buf_idx = 0;
4151 if (!dispatch->th_disp_buffer) {
4152 dispatch->th_disp_buffer =
4153 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4155 if (__kmp_storage_map) {
4156 __kmp_print_storage_map_gtid(
4157 gtid, &dispatch->th_disp_buffer[0],
4158 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4160 : __kmp_dispatch_num_buffers],
4161 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4162 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4163 gtid, team->t.t_id, gtid);
4166 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4169 dispatch->th_dispatch_pr_current = 0;
4170 dispatch->th_dispatch_sh_current = 0;
4172 dispatch->th_deo_fcn = 0;
4173 dispatch->th_dxo_fcn = 0;
4176 this_thr->th.th_next_pool = NULL;
4178 if (!this_thr->th.th_task_state_memo_stack) {
4180 this_thr->th.th_task_state_memo_stack =
4181 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4182 this_thr->th.th_task_state_top = 0;
4183 this_thr->th.th_task_state_stack_sz = 4;
4184 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4186 this_thr->th.th_task_state_memo_stack[i] = 0;
4189 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4190 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4200 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4202 kmp_team_t *serial_team;
4203 kmp_info_t *new_thr;
4206 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4207 KMP_DEBUG_ASSERT(root && team);
4208 #if !KMP_NESTED_HOT_TEAMS 4209 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4214 if (__kmp_thread_pool) {
4215 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4216 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4217 if (new_thr == __kmp_thread_pool_insert_pt) {
4218 __kmp_thread_pool_insert_pt = NULL;
4220 TCW_4(new_thr->th.th_in_pool, FALSE);
4221 __kmp_suspend_initialize_thread(new_thr);
4222 __kmp_lock_suspend_mx(new_thr);
4223 if (new_thr->th.th_active_in_pool == TRUE) {
4224 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4225 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4226 new_thr->th.th_active_in_pool = FALSE;
4228 __kmp_unlock_suspend_mx(new_thr);
4230 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4231 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4232 KMP_ASSERT(!new_thr->th.th_team);
4233 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4236 __kmp_initialize_info(new_thr, team, new_tid,
4237 new_thr->th.th_info.ds.ds_gtid);
4238 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4240 TCW_4(__kmp_nth, __kmp_nth + 1);
4242 new_thr->th.th_task_state = 0;
4243 new_thr->th.th_task_state_top = 0;
4244 new_thr->th.th_task_state_stack_sz = 4;
4246 #ifdef KMP_ADJUST_BLOCKTIME 4249 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4250 if (__kmp_nth > __kmp_avail_proc) {
4251 __kmp_zero_bt = TRUE;
4260 kmp_balign_t *balign = new_thr->th.th_bar;
4261 for (b = 0; b < bs_last_barrier; ++b)
4262 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4265 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4266 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4273 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4274 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4279 if (!TCR_4(__kmp_init_monitor)) {
4280 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4281 if (!TCR_4(__kmp_init_monitor)) {
4282 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4283 TCW_4(__kmp_init_monitor, 1);
4284 __kmp_create_monitor(&__kmp_monitor);
4285 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4296 while (TCR_4(__kmp_init_monitor) < 2) {
4299 KF_TRACE(10, (
"after monitor thread has started\n"));
4302 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4307 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4308 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4312 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4314 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4316 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG 4319 __itt_suppress_mark_range(
4320 __itt_suppress_range, __itt_suppress_threading_errors,
4321 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4322 __itt_suppress_mark_range(
4323 __itt_suppress_range, __itt_suppress_threading_errors,
4324 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4326 __itt_suppress_mark_range(
4327 __itt_suppress_range, __itt_suppress_threading_errors,
4328 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4330 __itt_suppress_mark_range(__itt_suppress_range,
4331 __itt_suppress_threading_errors,
4332 &new_thr->th.th_suspend_init_count,
4333 sizeof(new_thr->th.th_suspend_init_count));
4336 __itt_suppress_mark_range(__itt_suppress_range,
4337 __itt_suppress_threading_errors,
4338 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4339 sizeof(new_thr->th.th_bar[0].bb.b_go));
4340 __itt_suppress_mark_range(__itt_suppress_range,
4341 __itt_suppress_threading_errors,
4342 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4343 sizeof(new_thr->th.th_bar[1].bb.b_go));
4344 __itt_suppress_mark_range(__itt_suppress_range,
4345 __itt_suppress_threading_errors,
4346 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4347 sizeof(new_thr->th.th_bar[2].bb.b_go));
4349 if (__kmp_storage_map) {
4350 __kmp_print_thread_storage_map(new_thr, new_gtid);
4355 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4356 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4357 new_thr->th.th_serial_team = serial_team =
4358 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4362 proc_bind_default, &r_icvs,
4363 0 USE_NESTED_HOT_ARG(NULL));
4365 KMP_ASSERT(serial_team);
4366 serial_team->t.t_serialized = 0;
4368 serial_team->t.t_threads[0] = new_thr;
4370 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4374 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4377 __kmp_initialize_fast_memory(new_thr);
4381 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4382 __kmp_initialize_bget(new_thr);
4385 __kmp_init_random(new_thr);
4389 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4390 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4393 kmp_balign_t *balign = new_thr->th.th_bar;
4394 for (b = 0; b < bs_last_barrier; ++b) {
4395 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4396 balign[b].bb.team = NULL;
4397 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4398 balign[b].bb.use_oncore_barrier = 0;
4401 new_thr->th.th_spin_here = FALSE;
4402 new_thr->th.th_next_waiting = 0;
4404 new_thr->th.th_blocking =
false;
4407 #if KMP_AFFINITY_SUPPORTED 4408 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4409 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4410 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4411 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4413 new_thr->th.th_def_allocator = __kmp_def_allocator;
4414 new_thr->th.th_prev_level = 0;
4415 new_thr->th.th_prev_num_threads = 1;
4417 TCW_4(new_thr->th.th_in_pool, FALSE);
4418 new_thr->th.th_active_in_pool = FALSE;
4419 TCW_4(new_thr->th.th_active, TRUE);
4427 if (__kmp_adjust_gtid_mode) {
4428 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4429 if (TCR_4(__kmp_gtid_mode) != 2) {
4430 TCW_4(__kmp_gtid_mode, 2);
4433 if (TCR_4(__kmp_gtid_mode) != 1) {
4434 TCW_4(__kmp_gtid_mode, 1);
4439 #ifdef KMP_ADJUST_BLOCKTIME 4442 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4443 if (__kmp_nth > __kmp_avail_proc) {
4444 __kmp_zero_bt = TRUE;
4451 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4452 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4454 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4456 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4467 static void __kmp_reinitialize_team(kmp_team_t *team,
4468 kmp_internal_control_t *new_icvs,
4470 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4471 team->t.t_threads[0], team));
4472 KMP_DEBUG_ASSERT(team && new_icvs);
4473 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4474 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4476 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4478 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4479 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4481 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4482 team->t.t_threads[0], team));
4488 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4489 kmp_internal_control_t *new_icvs,
4491 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4494 KMP_DEBUG_ASSERT(team);
4495 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4496 KMP_DEBUG_ASSERT(team->t.t_threads);
4499 team->t.t_master_tid = 0;
4501 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4502 team->t.t_nproc = new_nproc;
4505 team->t.t_next_pool = NULL;
4509 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4510 team->t.t_invoke = NULL;
4513 team->t.t_sched.sched = new_icvs->sched.sched;
4515 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4516 team->t.t_fp_control_saved = FALSE;
4517 team->t.t_x87_fpu_control_word = 0;
4518 team->t.t_mxcsr = 0;
4521 team->t.t_construct = 0;
4523 team->t.t_ordered.dt.t_value = 0;
4524 team->t.t_master_active = FALSE;
4527 team->t.t_copypriv_data = NULL;
4530 team->t.t_copyin_counter = 0;
4533 team->t.t_control_stack_top = NULL;
4535 __kmp_reinitialize_team(team, new_icvs, loc);
4538 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4541 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 4544 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4545 if (KMP_AFFINITY_CAPABLE()) {
4547 if (old_mask != NULL) {
4548 status = __kmp_get_system_affinity(old_mask, TRUE);
4551 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4555 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4560 #if KMP_AFFINITY_SUPPORTED 4566 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4568 kmp_info_t *master_th = team->t.t_threads[0];
4569 KMP_DEBUG_ASSERT(master_th != NULL);
4570 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4571 int first_place = master_th->th.th_first_place;
4572 int last_place = master_th->th.th_last_place;
4573 int masters_place = master_th->th.th_current_place;
4574 team->t.t_first_place = first_place;
4575 team->t.t_last_place = last_place;
4577 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4578 "bound to place %d partition = [%d,%d]\n",
4579 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4580 team->t.t_id, masters_place, first_place, last_place));
4582 switch (proc_bind) {
4584 case proc_bind_default:
4587 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4590 case proc_bind_master: {
4592 int n_th = team->t.t_nproc;
4593 for (f = 1; f < n_th; f++) {
4594 kmp_info_t *th = team->t.t_threads[f];
4595 KMP_DEBUG_ASSERT(th != NULL);
4596 th->th.th_first_place = first_place;
4597 th->th.th_last_place = last_place;
4598 th->th.th_new_place = masters_place;
4599 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4600 team->t.t_display_affinity != 1) {
4601 team->t.t_display_affinity = 1;
4604 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4605 "partition = [%d,%d]\n",
4606 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4607 f, masters_place, first_place, last_place));
4611 case proc_bind_close: {
4613 int n_th = team->t.t_nproc;
4615 if (first_place <= last_place) {
4616 n_places = last_place - first_place + 1;
4618 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4620 if (n_th <= n_places) {
4621 int place = masters_place;
4622 for (f = 1; f < n_th; f++) {
4623 kmp_info_t *th = team->t.t_threads[f];
4624 KMP_DEBUG_ASSERT(th != NULL);
4626 if (place == last_place) {
4627 place = first_place;
4628 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4633 th->th.th_first_place = first_place;
4634 th->th.th_last_place = last_place;
4635 th->th.th_new_place = place;
4636 if (__kmp_display_affinity && place != th->th.th_current_place &&
4637 team->t.t_display_affinity != 1) {
4638 team->t.t_display_affinity = 1;
4641 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4642 "partition = [%d,%d]\n",
4643 __kmp_gtid_from_thread(team->t.t_threads[f]),
4644 team->t.t_id, f, place, first_place, last_place));
4647 int S, rem, gap, s_count;
4648 S = n_th / n_places;
4650 rem = n_th - (S * n_places);
4651 gap = rem > 0 ? n_places / rem : n_places;
4652 int place = masters_place;
4654 for (f = 0; f < n_th; f++) {
4655 kmp_info_t *th = team->t.t_threads[f];
4656 KMP_DEBUG_ASSERT(th != NULL);
4658 th->th.th_first_place = first_place;
4659 th->th.th_last_place = last_place;
4660 th->th.th_new_place = place;
4661 if (__kmp_display_affinity && place != th->th.th_current_place &&
4662 team->t.t_display_affinity != 1) {
4663 team->t.t_display_affinity = 1;
4667 if ((s_count == S) && rem && (gap_ct == gap)) {
4669 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4671 if (place == last_place) {
4672 place = first_place;
4673 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4681 }
else if (s_count == S) {
4682 if (place == last_place) {
4683 place = first_place;
4684 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4694 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4695 "partition = [%d,%d]\n",
4696 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4697 th->th.th_new_place, first_place, last_place));
4699 KMP_DEBUG_ASSERT(place == masters_place);
4703 case proc_bind_spread: {
4705 int n_th = team->t.t_nproc;
4708 if (first_place <= last_place) {
4709 n_places = last_place - first_place + 1;
4711 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4713 if (n_th <= n_places) {
4716 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4717 int S = n_places / n_th;
4718 int s_count, rem, gap, gap_ct;
4720 place = masters_place;
4721 rem = n_places - n_th * S;
4722 gap = rem ? n_th / rem : 1;
4725 if (update_master_only == 1)
4727 for (f = 0; f < thidx; f++) {
4728 kmp_info_t *th = team->t.t_threads[f];
4729 KMP_DEBUG_ASSERT(th != NULL);
4731 th->th.th_first_place = place;
4732 th->th.th_new_place = place;
4733 if (__kmp_display_affinity && place != th->th.th_current_place &&
4734 team->t.t_display_affinity != 1) {
4735 team->t.t_display_affinity = 1;
4738 while (s_count < S) {
4739 if (place == last_place) {
4740 place = first_place;
4741 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4748 if (rem && (gap_ct == gap)) {
4749 if (place == last_place) {
4750 place = first_place;
4751 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4759 th->th.th_last_place = place;
4762 if (place == last_place) {
4763 place = first_place;
4764 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4771 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4772 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4773 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4774 f, th->th.th_new_place, th->th.th_first_place,
4775 th->th.th_last_place, __kmp_affinity_num_masks));
4781 double current =
static_cast<double>(masters_place);
4783 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4788 if (update_master_only == 1)
4790 for (f = 0; f < thidx; f++) {
4791 first =
static_cast<int>(current);
4792 last =
static_cast<int>(current + spacing) - 1;
4793 KMP_DEBUG_ASSERT(last >= first);
4794 if (first >= n_places) {
4795 if (masters_place) {
4798 if (first == (masters_place + 1)) {
4799 KMP_DEBUG_ASSERT(f == n_th);
4802 if (last == masters_place) {
4803 KMP_DEBUG_ASSERT(f == (n_th - 1));
4807 KMP_DEBUG_ASSERT(f == n_th);
4812 if (last >= n_places) {
4813 last = (n_places - 1);
4818 KMP_DEBUG_ASSERT(0 <= first);
4819 KMP_DEBUG_ASSERT(n_places > first);
4820 KMP_DEBUG_ASSERT(0 <= last);
4821 KMP_DEBUG_ASSERT(n_places > last);
4822 KMP_DEBUG_ASSERT(last_place >= first_place);
4823 th = team->t.t_threads[f];
4824 KMP_DEBUG_ASSERT(th);
4825 th->th.th_first_place = first;
4826 th->th.th_new_place = place;
4827 th->th.th_last_place = last;
4828 if (__kmp_display_affinity && place != th->th.th_current_place &&
4829 team->t.t_display_affinity != 1) {
4830 team->t.t_display_affinity = 1;
4833 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4834 "partition = [%d,%d], spacing = %.4f\n",
4835 __kmp_gtid_from_thread(team->t.t_threads[f]),
4836 team->t.t_id, f, th->th.th_new_place,
4837 th->th.th_first_place, th->th.th_last_place, spacing));
4841 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4843 int S, rem, gap, s_count;
4844 S = n_th / n_places;
4846 rem = n_th - (S * n_places);
4847 gap = rem > 0 ? n_places / rem : n_places;
4848 int place = masters_place;
4851 if (update_master_only == 1)
4853 for (f = 0; f < thidx; f++) {
4854 kmp_info_t *th = team->t.t_threads[f];
4855 KMP_DEBUG_ASSERT(th != NULL);
4857 th->th.th_first_place = place;
4858 th->th.th_last_place = place;
4859 th->th.th_new_place = place;
4860 if (__kmp_display_affinity && place != th->th.th_current_place &&
4861 team->t.t_display_affinity != 1) {
4862 team->t.t_display_affinity = 1;
4866 if ((s_count == S) && rem && (gap_ct == gap)) {
4868 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4870 if (place == last_place) {
4871 place = first_place;
4872 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4880 }
else if (s_count == S) {
4881 if (place == last_place) {
4882 place = first_place;
4883 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4892 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4893 "partition = [%d,%d]\n",
4894 __kmp_gtid_from_thread(team->t.t_threads[f]),
4895 team->t.t_id, f, th->th.th_new_place,
4896 th->th.th_first_place, th->th.th_last_place));
4898 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4906 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4909 #endif // KMP_AFFINITY_SUPPORTED 4914 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4916 ompt_data_t ompt_parallel_data,
4918 kmp_proc_bind_t new_proc_bind,
4919 kmp_internal_control_t *new_icvs,
4920 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4921 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4924 int use_hot_team = !root->r.r_active;
4927 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4928 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4929 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4932 #if KMP_NESTED_HOT_TEAMS 4933 kmp_hot_team_ptr_t *hot_teams;
4935 team = master->th.th_team;
4936 level = team->t.t_active_level;
4937 if (master->th.th_teams_microtask) {
4938 if (master->th.th_teams_size.nteams > 1 &&
4941 (microtask_t)__kmp_teams_master ||
4942 master->th.th_teams_level <
4948 hot_teams = master->th.th_hot_teams;
4949 if (level < __kmp_hot_teams_max_level && hot_teams &&
4950 hot_teams[level].hot_team) {
4958 KMP_DEBUG_ASSERT(new_nproc == 1);
4962 if (use_hot_team && new_nproc > 1) {
4963 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
4964 #if KMP_NESTED_HOT_TEAMS 4965 team = hot_teams[level].hot_team;
4967 team = root->r.r_hot_team;
4970 if (__kmp_tasking_mode != tskm_immediate_exec) {
4971 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4972 "task_team[1] = %p before reinit\n",
4973 team->t.t_task_team[0], team->t.t_task_team[1]));
4980 if (team->t.t_nproc == new_nproc) {
4981 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4984 if (team->t.t_size_changed == -1) {
4985 team->t.t_size_changed = 1;
4987 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4991 kmp_r_sched_t new_sched = new_icvs->sched;
4993 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4995 __kmp_reinitialize_team(team, new_icvs,
4996 root->r.r_uber_thread->th.th_ident);
4998 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4999 team->t.t_threads[0], team));
5000 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5002 #if KMP_AFFINITY_SUPPORTED 5003 if ((team->t.t_size_changed == 0) &&
5004 (team->t.t_proc_bind == new_proc_bind)) {
5005 if (new_proc_bind == proc_bind_spread) {
5006 __kmp_partition_places(
5009 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 5010 "proc_bind = %d, partition = [%d,%d]\n",
5011 team->t.t_id, new_proc_bind, team->t.t_first_place,
5012 team->t.t_last_place));
5014 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5015 __kmp_partition_places(team);
5018 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5020 }
else if (team->t.t_nproc > new_nproc) {
5022 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5025 team->t.t_size_changed = 1;
5026 #if KMP_NESTED_HOT_TEAMS 5027 if (__kmp_hot_teams_mode == 0) {
5030 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5031 hot_teams[level].hot_team_nth = new_nproc;
5032 #endif // KMP_NESTED_HOT_TEAMS 5034 for (f = new_nproc; f < team->t.t_nproc; f++) {
5035 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5036 if (__kmp_tasking_mode != tskm_immediate_exec) {
5039 team->t.t_threads[f]->th.th_task_team = NULL;
5041 __kmp_free_thread(team->t.t_threads[f]);
5042 team->t.t_threads[f] = NULL;
5044 #if KMP_NESTED_HOT_TEAMS 5049 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5050 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5051 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5052 for (
int b = 0; b < bs_last_barrier; ++b) {
5053 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5054 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5056 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5060 #endif // KMP_NESTED_HOT_TEAMS 5061 team->t.t_nproc = new_nproc;
5063 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5064 __kmp_reinitialize_team(team, new_icvs,
5065 root->r.r_uber_thread->th.th_ident);
5068 for (f = 0; f < new_nproc; ++f) {
5069 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5074 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5075 team->t.t_threads[0], team));
5077 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5080 for (f = 0; f < team->t.t_nproc; f++) {
5081 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5082 team->t.t_threads[f]->th.th_team_nproc ==
5087 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5088 #if KMP_AFFINITY_SUPPORTED 5089 __kmp_partition_places(team);
5092 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5093 kmp_affin_mask_t *old_mask;
5094 if (KMP_AFFINITY_CAPABLE()) {
5095 KMP_CPU_ALLOC(old_mask);
5100 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5103 team->t.t_size_changed = 1;
5105 #if KMP_NESTED_HOT_TEAMS 5106 int avail_threads = hot_teams[level].hot_team_nth;
5107 if (new_nproc < avail_threads)
5108 avail_threads = new_nproc;
5109 kmp_info_t **other_threads = team->t.t_threads;
5110 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5114 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5115 for (b = 0; b < bs_last_barrier; ++b) {
5116 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5117 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5119 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5123 if (hot_teams[level].hot_team_nth >= new_nproc) {
5126 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5127 team->t.t_nproc = new_nproc;
5133 hot_teams[level].hot_team_nth = new_nproc;
5134 #endif // KMP_NESTED_HOT_TEAMS 5135 if (team->t.t_max_nproc < new_nproc) {
5137 __kmp_reallocate_team_arrays(team, new_nproc);
5138 __kmp_reinitialize_team(team, new_icvs, NULL);
5141 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5146 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5150 for (f = team->t.t_nproc; f < new_nproc; f++) {
5151 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5152 KMP_DEBUG_ASSERT(new_worker);
5153 team->t.t_threads[f] = new_worker;
5156 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5157 "join=%llu, plain=%llu\n",
5158 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5159 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5160 team->t.t_bar[bs_plain_barrier].b_arrived));
5164 kmp_balign_t *balign = new_worker->th.th_bar;
5165 for (b = 0; b < bs_last_barrier; ++b) {
5166 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5167 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5168 KMP_BARRIER_PARENT_FLAG);
5170 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5176 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED 5177 if (KMP_AFFINITY_CAPABLE()) {
5179 __kmp_set_system_affinity(old_mask, TRUE);
5180 KMP_CPU_FREE(old_mask);
5183 #if KMP_NESTED_HOT_TEAMS 5185 #endif // KMP_NESTED_HOT_TEAMS 5187 int old_nproc = team->t.t_nproc;
5189 __kmp_initialize_team(team, new_nproc, new_icvs,
5190 root->r.r_uber_thread->th.th_ident);
5193 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5194 for (f = 0; f < team->t.t_nproc; ++f)
5195 __kmp_initialize_info(team->t.t_threads[f], team, f,
5196 __kmp_gtid_from_tid(f, team));
5204 for (f = old_nproc; f < team->t.t_nproc; ++f)
5205 team->t.t_threads[f]->th.th_task_state =
5206 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5209 team->t.t_threads[0]->th.th_task_state;
5210 for (f = old_nproc; f < team->t.t_nproc; ++f)
5211 team->t.t_threads[f]->th.th_task_state = old_state;
5215 for (f = 0; f < team->t.t_nproc; ++f) {
5216 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5217 team->t.t_threads[f]->th.th_team_nproc ==
5222 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5223 #if KMP_AFFINITY_SUPPORTED 5224 __kmp_partition_places(team);
5228 kmp_info_t *master = team->t.t_threads[0];
5229 if (master->th.th_teams_microtask) {
5230 for (f = 1; f < new_nproc; ++f) {
5232 kmp_info_t *thr = team->t.t_threads[f];
5233 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5234 thr->th.th_teams_level = master->th.th_teams_level;
5235 thr->th.th_teams_size = master->th.th_teams_size;
5238 #if KMP_NESTED_HOT_TEAMS 5242 for (f = 1; f < new_nproc; ++f) {
5243 kmp_info_t *thr = team->t.t_threads[f];
5245 kmp_balign_t *balign = thr->th.th_bar;
5246 for (b = 0; b < bs_last_barrier; ++b) {
5247 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5248 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5250 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5255 #endif // KMP_NESTED_HOT_TEAMS 5258 __kmp_alloc_argv_entries(argc, team, TRUE);
5259 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5263 KF_TRACE(10, (
" hot_team = %p\n", team));
5266 if (__kmp_tasking_mode != tskm_immediate_exec) {
5267 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5268 "task_team[1] = %p after reinit\n",
5269 team->t.t_task_team[0], team->t.t_task_team[1]));
5274 __ompt_team_assign_id(team, ompt_parallel_data);
5284 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5287 if (team->t.t_max_nproc >= max_nproc) {
5289 __kmp_team_pool = team->t.t_next_pool;
5292 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5294 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5295 "task_team[1] %p to NULL\n",
5296 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5297 team->t.t_task_team[0] = NULL;
5298 team->t.t_task_team[1] = NULL;
5301 __kmp_alloc_argv_entries(argc, team, TRUE);
5302 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5305 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5306 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5309 for (b = 0; b < bs_last_barrier; ++b) {
5310 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5312 team->t.t_bar[b].b_master_arrived = 0;
5313 team->t.t_bar[b].b_team_arrived = 0;
5318 team->t.t_proc_bind = new_proc_bind;
5320 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5324 __ompt_team_assign_id(team, ompt_parallel_data);
5336 team = __kmp_reap_team(team);
5337 __kmp_team_pool = team;
5342 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5345 team->t.t_max_nproc = max_nproc;
5348 __kmp_allocate_team_arrays(team, max_nproc);
5350 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5351 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5353 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5355 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5356 team->t.t_task_team[0] = NULL;
5358 team->t.t_task_team[1] = NULL;
5361 if (__kmp_storage_map) {
5362 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5366 __kmp_alloc_argv_entries(argc, team, FALSE);
5367 team->t.t_argc = argc;
5370 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5371 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5374 for (b = 0; b < bs_last_barrier; ++b) {
5375 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5377 team->t.t_bar[b].b_master_arrived = 0;
5378 team->t.t_bar[b].b_team_arrived = 0;
5383 team->t.t_proc_bind = new_proc_bind;
5386 __ompt_team_assign_id(team, ompt_parallel_data);
5387 team->t.ompt_serialized_team_info = NULL;
5392 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5403 void __kmp_free_team(kmp_root_t *root,
5404 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5406 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5410 KMP_DEBUG_ASSERT(root);
5411 KMP_DEBUG_ASSERT(team);
5412 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5413 KMP_DEBUG_ASSERT(team->t.t_threads);
5415 int use_hot_team = team == root->r.r_hot_team;
5416 #if KMP_NESTED_HOT_TEAMS 5418 kmp_hot_team_ptr_t *hot_teams;
5420 level = team->t.t_active_level - 1;
5421 if (master->th.th_teams_microtask) {
5422 if (master->th.th_teams_size.nteams > 1) {
5426 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5427 master->th.th_teams_level == team->t.t_level) {
5432 hot_teams = master->th.th_hot_teams;
5433 if (level < __kmp_hot_teams_max_level) {
5434 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5438 #endif // KMP_NESTED_HOT_TEAMS 5441 TCW_SYNC_PTR(team->t.t_pkfn,
5444 team->t.t_copyin_counter = 0;
5449 if (!use_hot_team) {
5450 if (__kmp_tasking_mode != tskm_immediate_exec) {
5452 for (f = 1; f < team->t.t_nproc; ++f) {
5453 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5454 kmp_info_t *th = team->t.t_threads[f];
5455 volatile kmp_uint32 *state = &th->th.th_reap_state;
5456 while (*state != KMP_SAFE_TO_REAP) {
5460 if (!__kmp_is_thread_alive(th, &ecode)) {
5461 *state = KMP_SAFE_TO_REAP;
5466 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5467 if (fl.is_sleeping())
5468 fl.resume(__kmp_gtid_from_thread(th));
5475 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5476 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5477 if (task_team != NULL) {
5478 for (f = 0; f < team->t.t_nproc; ++f) {
5479 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5480 team->t.t_threads[f]->th.th_task_team = NULL;
5484 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5485 __kmp_get_gtid(), task_team, team->t.t_id));
5486 #if KMP_NESTED_HOT_TEAMS 5487 __kmp_free_task_team(master, task_team);
5489 team->t.t_task_team[tt_idx] = NULL;
5495 team->t.t_parent = NULL;
5496 team->t.t_level = 0;
5497 team->t.t_active_level = 0;
5500 for (f = 1; f < team->t.t_nproc; ++f) {
5501 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5502 __kmp_free_thread(team->t.t_threads[f]);
5503 team->t.t_threads[f] = NULL;
5508 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5509 __kmp_team_pool = (
volatile kmp_team_t *)team;
5512 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5513 team->t.t_threads[1]->th.th_cg_roots);
5514 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5516 for (f = 1; f < team->t.t_nproc; ++f) {
5517 kmp_info_t *thr = team->t.t_threads[f];
5518 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5519 thr->th.th_cg_roots->cg_root == thr);
5521 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5522 thr->th.th_cg_roots = tmp->up;
5523 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving" 5524 " up to node %p. cg_nthreads was %d\n",
5525 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5526 int i = tmp->cg_nthreads--;
5531 if (thr->th.th_cg_roots)
5532 thr->th.th_current_task->td_icvs.thread_limit =
5533 thr->th.th_cg_roots->cg_thread_limit;
5542 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5543 kmp_team_t *next_pool = team->t.t_next_pool;
5545 KMP_DEBUG_ASSERT(team);
5546 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5547 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5548 KMP_DEBUG_ASSERT(team->t.t_threads);
5549 KMP_DEBUG_ASSERT(team->t.t_argv);
5554 __kmp_free_team_arrays(team);
5555 if (team->t.t_argv != &team->t.t_inline_argv[0])
5556 __kmp_free((
void *)team->t.t_argv);
5588 void __kmp_free_thread(kmp_info_t *this_th) {
5592 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5593 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5595 KMP_DEBUG_ASSERT(this_th);
5600 kmp_balign_t *balign = this_th->th.th_bar;
5601 for (b = 0; b < bs_last_barrier; ++b) {
5602 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5603 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5604 balign[b].bb.team = NULL;
5605 balign[b].bb.leaf_kids = 0;
5607 this_th->th.th_task_state = 0;
5608 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5611 TCW_PTR(this_th->th.th_team, NULL);
5612 TCW_PTR(this_th->th.th_root, NULL);
5613 TCW_PTR(this_th->th.th_dispatch, NULL);
5615 while (this_th->th.th_cg_roots) {
5616 this_th->th.th_cg_roots->cg_nthreads--;
5617 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node" 5618 " %p of thread %p to %d\n",
5619 this_th, this_th->th.th_cg_roots,
5620 this_th->th.th_cg_roots->cg_root,
5621 this_th->th.th_cg_roots->cg_nthreads));
5622 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5623 if (tmp->cg_root == this_th) {
5624 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5626 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5627 this_th->th.th_cg_roots = tmp->up;
5630 if (tmp->cg_nthreads == 0) {
5633 this_th->th.th_cg_roots = NULL;
5643 __kmp_free_implicit_task(this_th);
5644 this_th->th.th_current_task = NULL;
5648 gtid = this_th->th.th_info.ds.ds_gtid;
5649 if (__kmp_thread_pool_insert_pt != NULL) {
5650 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5651 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5652 __kmp_thread_pool_insert_pt = NULL;
5661 if (__kmp_thread_pool_insert_pt != NULL) {
5662 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5664 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5666 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5667 scan = &((*scan)->th.th_next_pool))
5672 TCW_PTR(this_th->th.th_next_pool, *scan);
5673 __kmp_thread_pool_insert_pt = *scan = this_th;
5674 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5675 (this_th->th.th_info.ds.ds_gtid <
5676 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5677 TCW_4(this_th->th.th_in_pool, TRUE);
5678 __kmp_suspend_initialize_thread(this_th);
5679 __kmp_lock_suspend_mx(this_th);
5680 if (this_th->th.th_active == TRUE) {
5681 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5682 this_th->th.th_active_in_pool = TRUE;
5686 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5689 __kmp_unlock_suspend_mx(this_th);
5691 TCW_4(__kmp_nth, __kmp_nth - 1);
5693 #ifdef KMP_ADJUST_BLOCKTIME 5696 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5697 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5698 if (__kmp_nth <= __kmp_avail_proc) {
5699 __kmp_zero_bt = FALSE;
5709 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5710 int gtid = this_thr->th.th_info.ds.ds_gtid;
5712 kmp_team_t **
volatile pteam;
5715 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5717 if (__kmp_env_consistency_check) {
5718 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5722 ompt_data_t *thread_data;
5723 if (ompt_enabled.enabled) {
5724 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5725 *thread_data = ompt_data_none;
5727 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5728 this_thr->th.ompt_thread_info.wait_id = 0;
5729 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5730 this_thr->th.ompt_thread_info.parallel_flags = 0;
5731 if (ompt_enabled.ompt_callback_thread_begin) {
5732 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5733 ompt_thread_worker, thread_data);
5735 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5740 while (!TCR_4(__kmp_global.g.g_done)) {
5741 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5745 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5748 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5751 if (ompt_enabled.enabled) {
5752 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5756 pteam = &this_thr->th.th_team;
5759 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5761 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5764 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5765 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5766 (*pteam)->t.t_pkfn));
5768 updateHWFPControl(*pteam);
5771 if (ompt_enabled.enabled) {
5772 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5776 rc = (*pteam)->t.t_invoke(gtid);
5780 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5781 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5782 (*pteam)->t.t_pkfn));
5785 if (ompt_enabled.enabled) {
5787 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5789 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5793 __kmp_join_barrier(gtid);
5796 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5799 if (ompt_enabled.ompt_callback_thread_end) {
5800 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5804 this_thr->th.th_task_team = NULL;
5806 __kmp_common_destroy_gtid(gtid);
5808 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5815 void __kmp_internal_end_dest(
void *specific_gtid) {
5816 #if KMP_COMPILER_ICC 5817 #pragma warning(push) 5818 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5822 int gtid = (kmp_intptr_t)specific_gtid - 1;
5823 #if KMP_COMPILER_ICC 5824 #pragma warning(pop) 5827 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5840 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5841 __kmp_gtid_set_specific(gtid);
5842 #ifdef KMP_TDATA_GTID 5845 __kmp_internal_end_thread(gtid);
5848 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5850 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5851 __kmp_internal_end_atexit();
5858 void __kmp_internal_end_atexit(
void) {
5859 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5883 __kmp_internal_end_library(-1);
5885 __kmp_close_console();
5889 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5894 KMP_DEBUG_ASSERT(thread != NULL);
5896 gtid = thread->th.th_info.ds.ds_gtid;
5899 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5902 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5906 ANNOTATE_HAPPENS_BEFORE(thread);
5907 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5908 __kmp_release_64(&flag);
5912 __kmp_reap_worker(thread);
5924 if (thread->th.th_active_in_pool) {
5925 thread->th.th_active_in_pool = FALSE;
5926 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5927 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5931 __kmp_free_implicit_task(thread);
5935 __kmp_free_fast_memory(thread);
5938 __kmp_suspend_uninitialize_thread(thread);
5940 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5941 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5946 #ifdef KMP_ADJUST_BLOCKTIME 5949 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5950 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5951 if (__kmp_nth <= __kmp_avail_proc) {
5952 __kmp_zero_bt = FALSE;
5958 if (__kmp_env_consistency_check) {
5959 if (thread->th.th_cons) {
5960 __kmp_free_cons_stack(thread->th.th_cons);
5961 thread->th.th_cons = NULL;
5965 if (thread->th.th_pri_common != NULL) {
5966 __kmp_free(thread->th.th_pri_common);
5967 thread->th.th_pri_common = NULL;
5970 if (thread->th.th_task_state_memo_stack != NULL) {
5971 __kmp_free(thread->th.th_task_state_memo_stack);
5972 thread->th.th_task_state_memo_stack = NULL;
5976 if (thread->th.th_local.bget_data != NULL) {
5977 __kmp_finalize_bget(thread);
5981 #if KMP_AFFINITY_SUPPORTED 5982 if (thread->th.th_affin_mask != NULL) {
5983 KMP_CPU_FREE(thread->th.th_affin_mask);
5984 thread->th.th_affin_mask = NULL;
5988 #if KMP_USE_HIER_SCHED 5989 if (thread->th.th_hier_bar_data != NULL) {
5990 __kmp_free(thread->th.th_hier_bar_data);
5991 thread->th.th_hier_bar_data = NULL;
5995 __kmp_reap_team(thread->th.th_serial_team);
5996 thread->th.th_serial_team = NULL;
6003 static void __kmp_internal_end(
void) {
6007 __kmp_unregister_library();
6014 __kmp_reclaim_dead_roots();
6018 for (i = 0; i < __kmp_threads_capacity; i++)
6020 if (__kmp_root[i]->r.r_active)
6023 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6025 if (i < __kmp_threads_capacity) {
6037 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6038 if (TCR_4(__kmp_init_monitor)) {
6039 __kmp_reap_monitor(&__kmp_monitor);
6040 TCW_4(__kmp_init_monitor, 0);
6042 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6043 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6044 #endif // KMP_USE_MONITOR 6049 for (i = 0; i < __kmp_threads_capacity; i++) {
6050 if (__kmp_root[i]) {
6053 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6062 while (__kmp_thread_pool != NULL) {
6064 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6065 __kmp_thread_pool = thread->th.th_next_pool;
6067 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6068 thread->th.th_next_pool = NULL;
6069 thread->th.th_in_pool = FALSE;
6070 __kmp_reap_thread(thread, 0);
6072 __kmp_thread_pool_insert_pt = NULL;
6075 while (__kmp_team_pool != NULL) {
6077 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6078 __kmp_team_pool = team->t.t_next_pool;
6080 team->t.t_next_pool = NULL;
6081 __kmp_reap_team(team);
6084 __kmp_reap_task_teams();
6091 for (i = 0; i < __kmp_threads_capacity; i++) {
6092 kmp_info_t *thr = __kmp_threads[i];
6093 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6098 for (i = 0; i < __kmp_threads_capacity; ++i) {
6105 TCW_SYNC_4(__kmp_init_common, FALSE);
6107 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6115 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6116 if (TCR_4(__kmp_init_monitor)) {
6117 __kmp_reap_monitor(&__kmp_monitor);
6118 TCW_4(__kmp_init_monitor, 0);
6120 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6121 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6124 TCW_4(__kmp_init_gtid, FALSE);
6133 void __kmp_internal_end_library(
int gtid_req) {
6140 if (__kmp_global.g.g_abort) {
6141 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6145 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6146 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6153 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6155 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6156 if (gtid == KMP_GTID_SHUTDOWN) {
6157 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6158 "already shutdown\n"));
6160 }
else if (gtid == KMP_GTID_MONITOR) {
6161 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6162 "registered, or system shutdown\n"));
6164 }
else if (gtid == KMP_GTID_DNE) {
6165 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6168 }
else if (KMP_UBER_GTID(gtid)) {
6170 if (__kmp_root[gtid]->r.r_active) {
6171 __kmp_global.g.g_abort = -1;
6172 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6174 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6180 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6181 __kmp_unregister_root_current_thread(gtid);
6188 #ifdef DUMP_DEBUG_ON_EXIT 6189 if (__kmp_debug_buf)
6190 __kmp_dump_debug_buffer();
6195 __kmp_unregister_library();
6200 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6203 if (__kmp_global.g.g_abort) {
6204 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6206 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6209 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6210 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6219 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6222 __kmp_internal_end();
6224 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6225 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6227 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6229 #ifdef DUMP_DEBUG_ON_EXIT 6230 if (__kmp_debug_buf)
6231 __kmp_dump_debug_buffer();
6235 __kmp_close_console();
6238 __kmp_fini_allocator();
6242 void __kmp_internal_end_thread(
int gtid_req) {
6251 if (__kmp_global.g.g_abort) {
6252 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6256 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6257 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6265 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6267 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6268 if (gtid == KMP_GTID_SHUTDOWN) {
6269 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6270 "already shutdown\n"));
6272 }
else if (gtid == KMP_GTID_MONITOR) {
6273 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6274 "registered, or system shutdown\n"));
6276 }
else if (gtid == KMP_GTID_DNE) {
6277 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6281 }
else if (KMP_UBER_GTID(gtid)) {
6283 if (__kmp_root[gtid]->r.r_active) {
6284 __kmp_global.g.g_abort = -1;
6285 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6287 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6291 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6293 __kmp_unregister_root_current_thread(gtid);
6297 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6300 __kmp_threads[gtid]->th.th_task_team = NULL;
6304 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6310 if (__kmp_pause_status != kmp_hard_paused)
6314 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6319 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6322 if (__kmp_global.g.g_abort) {
6323 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6325 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6328 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6329 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6340 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6342 for (i = 0; i < __kmp_threads_capacity; ++i) {
6343 if (KMP_UBER_GTID(i)) {
6346 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6347 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6348 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6355 __kmp_internal_end();
6357 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6358 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6360 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6362 #ifdef DUMP_DEBUG_ON_EXIT 6363 if (__kmp_debug_buf)
6364 __kmp_dump_debug_buffer();
6371 static long __kmp_registration_flag = 0;
6373 static char *__kmp_registration_str = NULL;
6376 static inline char *__kmp_reg_status_name() {
6381 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6384 void __kmp_register_library_startup(
void) {
6386 char *name = __kmp_reg_status_name();
6392 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6393 __kmp_initialize_system_tick();
6395 __kmp_read_system_time(&time.dtime);
6396 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6397 __kmp_registration_str =
6398 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6399 __kmp_registration_flag, KMP_LIBRARY_FILE);
6401 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6402 __kmp_registration_str));
6408 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6409 char *shm_name = __kmp_str_format(
"/%s", name);
6410 int shm_preexist = 0;
6412 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6413 if ((fd1 == -1) && (errno == EEXIST)) {
6416 fd1 = shm_open(shm_name, O_RDWR, 0666);
6419 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6425 }
else if (fd1 == -1) {
6428 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6431 if (shm_preexist == 0) {
6433 if (ftruncate(fd1, SHM_SIZE) == -1) {
6435 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6436 KMP_ERR(errno), __kmp_msg_null);
6440 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6441 if (data1 == MAP_FAILED) {
6443 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6446 if (shm_preexist == 0) {
6447 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6450 value = __kmp_str_format(
"%s", data1);
6451 munmap(data1, SHM_SIZE);
6453 #else // Windows and unix with static library 6455 __kmp_env_set(name, __kmp_registration_str, 0);
6457 value = __kmp_env_get(name);
6460 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6467 char *flag_addr_str = NULL;
6468 char *flag_val_str = NULL;
6469 char const *file_name = NULL;
6470 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6471 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6474 long *flag_addr = 0;
6476 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6477 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6478 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6482 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6496 file_name =
"unknown library";
6501 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6502 if (!__kmp_str_match_true(duplicate_ok)) {
6504 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6505 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6507 KMP_INTERNAL_FREE(duplicate_ok);
6508 __kmp_duplicate_library_ok = 1;
6513 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6515 shm_unlink(shm_name);
6518 __kmp_env_unset(name);
6521 default: { KMP_DEBUG_ASSERT(0); }
break;
6524 KMP_INTERNAL_FREE((
void *)value);
6525 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6526 KMP_INTERNAL_FREE((
void *)shm_name);
6529 KMP_INTERNAL_FREE((
void *)name);
6533 void __kmp_unregister_library(
void) {
6535 char *name = __kmp_reg_status_name();
6538 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6539 char *shm_name = __kmp_str_format(
"/%s", name);
6540 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6545 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6546 if (data1 != MAP_FAILED) {
6547 value = __kmp_str_format(
"%s", data1);
6548 munmap(data1, SHM_SIZE);
6552 value = __kmp_env_get(name);
6555 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6556 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6557 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6559 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6560 shm_unlink(shm_name);
6562 __kmp_env_unset(name);
6566 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library 6567 KMP_INTERNAL_FREE(shm_name);
6570 KMP_INTERNAL_FREE(__kmp_registration_str);
6571 KMP_INTERNAL_FREE(value);
6572 KMP_INTERNAL_FREE(name);
6574 __kmp_registration_flag = 0;
6575 __kmp_registration_str = NULL;
6582 #if KMP_MIC_SUPPORTED 6584 static void __kmp_check_mic_type() {
6585 kmp_cpuid_t cpuid_state = {0};
6586 kmp_cpuid_t *cs_p = &cpuid_state;
6587 __kmp_x86_cpuid(1, 0, cs_p);
6589 if ((cs_p->eax & 0xff0) == 0xB10) {
6590 __kmp_mic_type = mic2;
6591 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6592 __kmp_mic_type = mic3;
6594 __kmp_mic_type = non_mic;
6600 static void __kmp_do_serial_initialize(
void) {
6604 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6606 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6607 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6608 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6609 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6610 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6616 __kmp_validate_locks();
6619 __kmp_init_allocator();
6624 __kmp_register_library_startup();
6627 if (TCR_4(__kmp_global.g.g_done)) {
6628 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6631 __kmp_global.g.g_abort = 0;
6632 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6635 #if KMP_USE_ADAPTIVE_LOCKS 6636 #if KMP_DEBUG_ADAPTIVE_LOCKS 6637 __kmp_init_speculative_stats();
6640 #if KMP_STATS_ENABLED 6643 __kmp_init_lock(&__kmp_global_lock);
6644 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6645 __kmp_init_lock(&__kmp_debug_lock);
6646 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6647 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6648 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6649 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6650 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6651 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6652 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6653 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6654 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6655 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6656 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6657 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6658 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6659 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6660 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6662 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6664 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6668 __kmp_runtime_initialize();
6670 #if KMP_MIC_SUPPORTED 6671 __kmp_check_mic_type();
6678 __kmp_abort_delay = 0;
6682 __kmp_dflt_team_nth_ub = __kmp_xproc;
6683 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6684 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6686 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6687 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6689 __kmp_max_nth = __kmp_sys_max_nth;
6690 __kmp_cg_max_nth = __kmp_sys_max_nth;
6691 __kmp_teams_max_nth = __kmp_xproc;
6692 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6693 __kmp_teams_max_nth = __kmp_sys_max_nth;
6698 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6700 __kmp_monitor_wakeups =
6701 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6702 __kmp_bt_intervals =
6703 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6706 __kmp_library = library_throughput;
6708 __kmp_static = kmp_sch_static_balanced;
6715 #if KMP_FAST_REDUCTION_BARRIER 6716 #define kmp_reduction_barrier_gather_bb ((int)1) 6717 #define kmp_reduction_barrier_release_bb ((int)1) 6718 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6719 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6720 #endif // KMP_FAST_REDUCTION_BARRIER 6721 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6722 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6723 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6724 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6725 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6726 #if KMP_FAST_REDUCTION_BARRIER 6727 if (i == bs_reduction_barrier) {
6729 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6730 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6731 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6732 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6734 #endif // KMP_FAST_REDUCTION_BARRIER 6736 #if KMP_FAST_REDUCTION_BARRIER 6737 #undef kmp_reduction_barrier_release_pat 6738 #undef kmp_reduction_barrier_gather_pat 6739 #undef kmp_reduction_barrier_release_bb 6740 #undef kmp_reduction_barrier_gather_bb 6741 #endif // KMP_FAST_REDUCTION_BARRIER 6742 #if KMP_MIC_SUPPORTED 6743 if (__kmp_mic_type == mic2) {
6745 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6746 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6748 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6749 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6751 #if KMP_FAST_REDUCTION_BARRIER 6752 if (__kmp_mic_type == mic2) {
6753 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6754 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6756 #endif // KMP_FAST_REDUCTION_BARRIER 6757 #endif // KMP_MIC_SUPPORTED 6761 __kmp_env_checks = TRUE;
6763 __kmp_env_checks = FALSE;
6767 __kmp_foreign_tp = TRUE;
6769 __kmp_global.g.g_dynamic = FALSE;
6770 __kmp_global.g.g_dynamic_mode = dynamic_default;
6772 __kmp_env_initialize(NULL);
6776 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6777 if (__kmp_str_match_true(val)) {
6778 kmp_str_buf_t buffer;
6779 __kmp_str_buf_init(&buffer);
6780 __kmp_i18n_dump_catalog(&buffer);
6781 __kmp_printf(
"%s", buffer.str);
6782 __kmp_str_buf_free(&buffer);
6784 __kmp_env_free(&val);
6787 __kmp_threads_capacity =
6788 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6790 __kmp_tp_capacity = __kmp_default_tp_capacity(
6791 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6796 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6797 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6798 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6799 __kmp_thread_pool = NULL;
6800 __kmp_thread_pool_insert_pt = NULL;
6801 __kmp_team_pool = NULL;
6808 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6810 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6811 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6812 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6815 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6817 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6822 gtid = __kmp_register_root(TRUE);
6823 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6824 KMP_ASSERT(KMP_UBER_GTID(gtid));
6825 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6829 __kmp_common_initialize();
6833 __kmp_register_atfork();
6836 #if !KMP_DYNAMIC_LIB 6840 int rc = atexit(__kmp_internal_end_atexit);
6842 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6848 #if KMP_HANDLE_SIGNALS 6854 __kmp_install_signals(FALSE);
6857 __kmp_install_signals(TRUE);
6862 __kmp_init_counter++;
6864 __kmp_init_serial = TRUE;
6866 if (__kmp_settings) {
6870 if (__kmp_display_env || __kmp_display_env_verbose) {
6871 __kmp_env_print_2();
6880 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6883 void __kmp_serial_initialize(
void) {
6884 if (__kmp_init_serial) {
6887 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6888 if (__kmp_init_serial) {
6889 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6892 __kmp_do_serial_initialize();
6893 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6896 static void __kmp_do_middle_initialize(
void) {
6898 int prev_dflt_team_nth;
6900 if (!__kmp_init_serial) {
6901 __kmp_do_serial_initialize();
6904 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6908 prev_dflt_team_nth = __kmp_dflt_team_nth;
6910 #if KMP_AFFINITY_SUPPORTED 6913 __kmp_affinity_initialize();
6917 for (i = 0; i < __kmp_threads_capacity; i++) {
6918 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6919 __kmp_affinity_set_init_mask(i, TRUE);
6924 KMP_ASSERT(__kmp_xproc > 0);
6925 if (__kmp_avail_proc == 0) {
6926 __kmp_avail_proc = __kmp_xproc;
6932 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6933 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6938 if (__kmp_dflt_team_nth == 0) {
6939 #ifdef KMP_DFLT_NTH_CORES 6941 __kmp_dflt_team_nth = __kmp_ncores;
6942 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6943 "__kmp_ncores (%d)\n",
6944 __kmp_dflt_team_nth));
6947 __kmp_dflt_team_nth = __kmp_avail_proc;
6948 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6949 "__kmp_avail_proc(%d)\n",
6950 __kmp_dflt_team_nth));
6954 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6955 __kmp_dflt_team_nth = KMP_MIN_NTH;
6957 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6958 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6963 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6965 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6970 for (i = 0; i < __kmp_threads_capacity; i++) {
6971 kmp_info_t *thread = __kmp_threads[i];
6974 if (thread->th.th_current_task->td_icvs.nproc != 0)
6977 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6982 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6983 __kmp_dflt_team_nth));
6985 #ifdef KMP_ADJUST_BLOCKTIME 6987 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6988 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6989 if (__kmp_nth > __kmp_avail_proc) {
6990 __kmp_zero_bt = TRUE;
6996 TCW_SYNC_4(__kmp_init_middle, TRUE);
6998 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7001 void __kmp_middle_initialize(
void) {
7002 if (__kmp_init_middle) {
7005 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7006 if (__kmp_init_middle) {
7007 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7010 __kmp_do_middle_initialize();
7011 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7014 void __kmp_parallel_initialize(
void) {
7015 int gtid = __kmp_entry_gtid();
7018 if (TCR_4(__kmp_init_parallel))
7020 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7021 if (TCR_4(__kmp_init_parallel)) {
7022 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7027 if (TCR_4(__kmp_global.g.g_done)) {
7030 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7031 __kmp_infinite_loop();
7037 if (!__kmp_init_middle) {
7038 __kmp_do_middle_initialize();
7040 __kmp_resume_if_hard_paused();
7043 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7044 KMP_ASSERT(KMP_UBER_GTID(gtid));
7046 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 7049 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7050 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7051 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7055 #if KMP_HANDLE_SIGNALS 7057 __kmp_install_signals(TRUE);
7061 __kmp_suspend_initialize();
7063 #if defined(USE_LOAD_BALANCE) 7064 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7065 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7068 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7069 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7073 if (__kmp_version) {
7074 __kmp_print_version_2();
7078 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7081 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7083 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7088 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7090 kmp_disp_t *dispatch;
7095 this_thr->th.th_local.this_construct = 0;
7096 #if KMP_CACHE_MANAGE 7097 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7099 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7100 KMP_DEBUG_ASSERT(dispatch);
7101 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7105 dispatch->th_disp_index = 0;
7106 dispatch->th_doacross_buf_idx = 0;
7107 if (__kmp_env_consistency_check)
7108 __kmp_push_parallel(gtid, team->t.t_ident);
7113 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7115 if (__kmp_env_consistency_check)
7116 __kmp_pop_parallel(gtid, team->t.t_ident);
7118 __kmp_finish_implicit_task(this_thr);
7121 int __kmp_invoke_task_func(
int gtid) {
7123 int tid = __kmp_tid_from_gtid(gtid);
7124 kmp_info_t *this_thr = __kmp_threads[gtid];
7125 kmp_team_t *team = this_thr->th.th_team;
7127 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7129 if (__itt_stack_caller_create_ptr) {
7130 __kmp_itt_stack_callee_enter(
7132 team->t.t_stack_id);
7135 #if INCLUDE_SSC_MARKS 7136 SSC_MARK_INVOKING();
7141 void **exit_frame_p;
7142 ompt_data_t *my_task_data;
7143 ompt_data_t *my_parallel_data;
7146 if (ompt_enabled.enabled) {
7148 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
7150 exit_frame_p = &dummy;
7154 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7155 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7156 if (ompt_enabled.ompt_callback_implicit_task) {
7157 ompt_team_size = team->t.t_nproc;
7158 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7159 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7160 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7161 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7165 #if KMP_STATS_ENABLED 7167 if (previous_state == stats_state_e::TEAMS_REGION) {
7168 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7170 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7172 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7175 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7176 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7183 *exit_frame_p = NULL;
7184 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7187 #if KMP_STATS_ENABLED 7188 if (previous_state == stats_state_e::TEAMS_REGION) {
7189 KMP_SET_THREAD_STATE(previous_state);
7191 KMP_POP_PARTITIONED_TIMER();
7195 if (__itt_stack_caller_create_ptr) {
7196 __kmp_itt_stack_callee_leave(
7198 team->t.t_stack_id);
7201 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7206 void __kmp_teams_master(
int gtid) {
7208 kmp_info_t *thr = __kmp_threads[gtid];
7209 kmp_team_t *team = thr->th.th_team;
7210 ident_t *loc = team->t.t_ident;
7211 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7212 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7213 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7214 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7215 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7218 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7221 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7222 tmp->cg_nthreads = 1;
7223 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init" 7224 " cg_nthreads to 1\n",
7226 tmp->up = thr->th.th_cg_roots;
7227 thr->th.th_cg_roots = tmp;
7231 #if INCLUDE_SSC_MARKS 7234 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7235 (microtask_t)thr->th.th_teams_microtask,
7236 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7237 #if INCLUDE_SSC_MARKS 7241 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7242 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7245 __kmp_join_call(loc, gtid
7254 int __kmp_invoke_teams_master(
int gtid) {
7255 kmp_info_t *this_thr = __kmp_threads[gtid];
7256 kmp_team_t *team = this_thr->th.th_team;
7258 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7259 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7260 (
void *)__kmp_teams_master);
7262 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7264 int tid = __kmp_tid_from_gtid(gtid);
7265 ompt_data_t *task_data =
7266 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7267 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7268 if (ompt_enabled.ompt_callback_implicit_task) {
7269 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7270 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7272 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7275 __kmp_teams_master(gtid);
7277 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7279 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7288 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7289 kmp_info_t *thr = __kmp_threads[gtid];
7291 if (num_threads > 0)
7292 thr->th.th_set_nproc = num_threads;
7297 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7299 kmp_info_t *thr = __kmp_threads[gtid];
7300 KMP_DEBUG_ASSERT(num_teams >= 0);
7301 KMP_DEBUG_ASSERT(num_threads >= 0);
7305 if (num_teams > __kmp_teams_max_nth) {
7306 if (!__kmp_reserve_warn) {
7307 __kmp_reserve_warn = 1;
7308 __kmp_msg(kmp_ms_warning,
7309 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7310 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7312 num_teams = __kmp_teams_max_nth;
7316 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7319 if (!TCR_4(__kmp_init_middle))
7320 __kmp_middle_initialize();
7321 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7322 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7323 if (num_threads == 0) {
7324 num_threads = __kmp_avail_proc / num_teams;
7328 if (num_threads > __kmp_dflt_team_nth) {
7329 num_threads = __kmp_dflt_team_nth;
7331 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7332 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7334 if (num_teams * num_threads > __kmp_teams_max_nth) {
7335 num_threads = __kmp_teams_max_nth / num_teams;
7340 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7342 if (num_threads > __kmp_dflt_team_nth) {
7343 num_threads = __kmp_dflt_team_nth;
7345 if (num_teams * num_threads > __kmp_teams_max_nth) {
7346 int new_threads = __kmp_teams_max_nth / num_teams;
7347 if (!__kmp_reserve_warn) {
7348 __kmp_reserve_warn = 1;
7349 __kmp_msg(kmp_ms_warning,
7350 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7351 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7353 num_threads = new_threads;
7356 thr->th.th_teams_size.nth = num_threads;
7360 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7361 kmp_info_t *thr = __kmp_threads[gtid];
7362 thr->th.th_set_proc_bind = proc_bind;
7367 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7368 kmp_info_t *this_thr = __kmp_threads[gtid];
7374 KMP_DEBUG_ASSERT(team);
7375 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7376 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7379 team->t.t_construct = 0;
7380 team->t.t_ordered.dt.t_value =
7384 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7385 if (team->t.t_max_nproc > 1) {
7387 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7388 team->t.t_disp_buffer[i].buffer_index = i;
7389 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7392 team->t.t_disp_buffer[0].buffer_index = 0;
7393 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7397 KMP_ASSERT(this_thr->th.th_team == team);
7400 for (f = 0; f < team->t.t_nproc; f++) {
7401 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7402 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7407 __kmp_fork_barrier(gtid, 0);
7410 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7411 kmp_info_t *this_thr = __kmp_threads[gtid];
7413 KMP_DEBUG_ASSERT(team);
7414 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7415 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7421 if (__kmp_threads[gtid] &&
7422 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7423 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7424 __kmp_threads[gtid]);
7425 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7426 "team->t.t_nproc=%d\n",
7427 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7429 __kmp_print_structure();
7431 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7432 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7435 __kmp_join_barrier(gtid);
7437 if (ompt_enabled.enabled &&
7438 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7439 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7440 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7441 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7443 void *codeptr = NULL;
7444 if (KMP_MASTER_TID(ds_tid) &&
7445 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7446 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7447 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7449 if (ompt_enabled.ompt_callback_sync_region_wait) {
7450 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7451 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7454 if (ompt_enabled.ompt_callback_sync_region) {
7455 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7456 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7460 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7461 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7462 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7468 KMP_ASSERT(this_thr->th.th_team == team);
7473 #ifdef USE_LOAD_BALANCE 7477 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7480 kmp_team_t *hot_team;
7482 if (root->r.r_active) {
7485 hot_team = root->r.r_hot_team;
7486 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7487 return hot_team->t.t_nproc - 1;
7492 for (i = 1; i < hot_team->t.t_nproc; i++) {
7493 if (hot_team->t.t_threads[i]->th.th_active) {
7502 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7505 int hot_team_active;
7506 int team_curr_active;
7509 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7511 KMP_DEBUG_ASSERT(root);
7512 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7513 ->th.th_current_task->td_icvs.dynamic == TRUE);
7514 KMP_DEBUG_ASSERT(set_nproc > 1);
7516 if (set_nproc == 1) {
7517 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7526 pool_active = __kmp_thread_pool_active_nth;
7527 hot_team_active = __kmp_active_hot_team_nproc(root);
7528 team_curr_active = pool_active + hot_team_active + 1;
7531 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7532 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7533 "hot team active = %d\n",
7534 system_active, pool_active, hot_team_active));
7536 if (system_active < 0) {
7540 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7541 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7544 retval = __kmp_avail_proc - __kmp_nth +
7545 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7546 if (retval > set_nproc) {
7549 if (retval < KMP_MIN_NTH) {
7550 retval = KMP_MIN_NTH;
7553 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7561 if (system_active < team_curr_active) {
7562 system_active = team_curr_active;
7564 retval = __kmp_avail_proc - system_active + team_curr_active;
7565 if (retval > set_nproc) {
7568 if (retval < KMP_MIN_NTH) {
7569 retval = KMP_MIN_NTH;
7572 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7581 void __kmp_cleanup(
void) {
7584 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7586 if (TCR_4(__kmp_init_parallel)) {
7587 #if KMP_HANDLE_SIGNALS 7588 __kmp_remove_signals();
7590 TCW_4(__kmp_init_parallel, FALSE);
7593 if (TCR_4(__kmp_init_middle)) {
7594 #if KMP_AFFINITY_SUPPORTED 7595 __kmp_affinity_uninitialize();
7597 __kmp_cleanup_hierarchy();
7598 TCW_4(__kmp_init_middle, FALSE);
7601 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7603 if (__kmp_init_serial) {
7604 __kmp_runtime_destroy();
7605 __kmp_init_serial = FALSE;
7608 __kmp_cleanup_threadprivate_caches();
7610 for (f = 0; f < __kmp_threads_capacity; f++) {
7611 if (__kmp_root[f] != NULL) {
7612 __kmp_free(__kmp_root[f]);
7613 __kmp_root[f] = NULL;
7616 __kmp_free(__kmp_threads);
7619 __kmp_threads = NULL;
7621 __kmp_threads_capacity = 0;
7623 #if KMP_USE_DYNAMIC_LOCK 7624 __kmp_cleanup_indirect_user_locks();
7626 __kmp_cleanup_user_locks();
7629 #if KMP_AFFINITY_SUPPORTED 7630 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7631 __kmp_cpuinfo_file = NULL;
7634 #if KMP_USE_ADAPTIVE_LOCKS 7635 #if KMP_DEBUG_ADAPTIVE_LOCKS 7636 __kmp_print_speculative_stats();
7639 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7640 __kmp_nested_nth.nth = NULL;
7641 __kmp_nested_nth.size = 0;
7642 __kmp_nested_nth.used = 0;
7643 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7644 __kmp_nested_proc_bind.bind_types = NULL;
7645 __kmp_nested_proc_bind.size = 0;
7646 __kmp_nested_proc_bind.used = 0;
7647 if (__kmp_affinity_format) {
7648 KMP_INTERNAL_FREE(__kmp_affinity_format);
7649 __kmp_affinity_format = NULL;
7652 __kmp_i18n_catclose();
7654 #if KMP_USE_HIER_SCHED 7655 __kmp_hier_scheds.deallocate();
7658 #if KMP_STATS_ENABLED 7662 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7667 int __kmp_ignore_mppbeg(
void) {
7670 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7671 if (__kmp_str_match_false(env))
7678 int __kmp_ignore_mppend(
void) {
7681 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7682 if (__kmp_str_match_false(env))
7689 void __kmp_internal_begin(
void) {
7695 gtid = __kmp_entry_gtid();
7696 root = __kmp_threads[gtid]->th.th_root;
7697 KMP_ASSERT(KMP_UBER_GTID(gtid));
7699 if (root->r.r_begin)
7701 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7702 if (root->r.r_begin) {
7703 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7707 root->r.r_begin = TRUE;
7709 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7714 void __kmp_user_set_library(
enum library_type arg) {
7721 gtid = __kmp_entry_gtid();
7722 thread = __kmp_threads[gtid];
7724 root = thread->th.th_root;
7726 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7728 if (root->r.r_in_parallel) {
7730 KMP_WARNING(SetLibraryIncorrectCall);
7735 case library_serial:
7736 thread->th.th_set_nproc = 0;
7737 set__nproc(thread, 1);
7739 case library_turnaround:
7740 thread->th.th_set_nproc = 0;
7741 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7742 : __kmp_dflt_team_nth_ub);
7744 case library_throughput:
7745 thread->th.th_set_nproc = 0;
7746 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7747 : __kmp_dflt_team_nth_ub);
7750 KMP_FATAL(UnknownLibraryType, arg);
7753 __kmp_aux_set_library(arg);
7756 void __kmp_aux_set_stacksize(
size_t arg) {
7757 if (!__kmp_init_serial)
7758 __kmp_serial_initialize();
7761 if (arg & (0x1000 - 1)) {
7762 arg &= ~(0x1000 - 1);
7767 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7770 if (!TCR_4(__kmp_init_parallel)) {
7773 if (value < __kmp_sys_min_stksize)
7774 value = __kmp_sys_min_stksize;
7775 else if (value > KMP_MAX_STKSIZE)
7776 value = KMP_MAX_STKSIZE;
7778 __kmp_stksize = value;
7780 __kmp_env_stksize = TRUE;
7783 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7788 void __kmp_aux_set_library(
enum library_type arg) {
7789 __kmp_library = arg;
7791 switch (__kmp_library) {
7792 case library_serial: {
7793 KMP_INFORM(LibraryIsSerial);
7795 case library_turnaround:
7796 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7797 __kmp_use_yield = 2;
7799 case library_throughput:
7800 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7801 __kmp_dflt_blocktime = 200;
7804 KMP_FATAL(UnknownLibraryType, arg);
7810 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7811 kmp_info_t *thr = __kmp_entry_thread();
7812 teams_serialized = 0;
7813 if (thr->th.th_teams_microtask) {
7814 kmp_team_t *team = thr->th.th_team;
7815 int tlevel = thr->th.th_teams_level;
7816 int ii = team->t.t_level;
7817 teams_serialized = team->t.t_serialized;
7818 int level = tlevel + 1;
7819 KMP_DEBUG_ASSERT(ii >= tlevel);
7820 while (ii > level) {
7821 for (teams_serialized = team->t.t_serialized;
7822 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7824 if (team->t.t_serialized && (!teams_serialized)) {
7825 team = team->t.t_parent;
7829 team = team->t.t_parent;
7838 int __kmp_aux_get_team_num() {
7840 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7842 if (serialized > 1) {
7845 return team->t.t_master_tid;
7851 int __kmp_aux_get_num_teams() {
7853 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7855 if (serialized > 1) {
7858 return team->t.t_parent->t.t_nproc;
7897 typedef struct kmp_affinity_format_field_t {
7899 const char *long_name;
7902 } kmp_affinity_format_field_t;
7904 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7905 #if KMP_AFFINITY_SUPPORTED 7906 {
'A',
"thread_affinity",
's'},
7908 {
't',
"team_num",
'd'},
7909 {
'T',
"num_teams",
'd'},
7910 {
'L',
"nesting_level",
'd'},
7911 {
'n',
"thread_num",
'd'},
7912 {
'N',
"num_threads",
'd'},
7913 {
'a',
"ancestor_tnum",
'd'},
7915 {
'P',
"process_id",
'd'},
7916 {
'i',
"native_thread_id",
'd'}};
7919 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
7921 kmp_str_buf_t *field_buffer) {
7922 int rc, format_index, field_value;
7923 const char *width_left, *width_right;
7924 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7925 static const int FORMAT_SIZE = 20;
7926 char format[FORMAT_SIZE] = {0};
7927 char absolute_short_name = 0;
7929 KMP_DEBUG_ASSERT(gtid >= 0);
7930 KMP_DEBUG_ASSERT(th);
7931 KMP_DEBUG_ASSERT(**ptr ==
'%');
7932 KMP_DEBUG_ASSERT(field_buffer);
7934 __kmp_str_buf_clear(field_buffer);
7941 __kmp_str_buf_cat(field_buffer,
"%", 1);
7952 right_justify =
false;
7954 right_justify =
true;
7958 width_left = width_right = NULL;
7959 if (**ptr >=
'0' && **ptr <=
'9') {
7967 format[format_index++] =
'%';
7969 format[format_index++] =
'-';
7971 format[format_index++] =
'0';
7972 if (width_left && width_right) {
7976 while (i < 8 && width_left < width_right) {
7977 format[format_index++] = *width_left;
7985 found_valid_name =
false;
7986 parse_long_name = (**ptr ==
'{');
7987 if (parse_long_name)
7989 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
7990 sizeof(__kmp_affinity_format_table[0]);
7992 char short_name = __kmp_affinity_format_table[i].short_name;
7993 const char *long_name = __kmp_affinity_format_table[i].long_name;
7994 char field_format = __kmp_affinity_format_table[i].field_format;
7995 if (parse_long_name) {
7996 int length = KMP_STRLEN(long_name);
7997 if (strncmp(*ptr, long_name, length) == 0) {
7998 found_valid_name =
true;
8001 }
else if (**ptr == short_name) {
8002 found_valid_name =
true;
8005 if (found_valid_name) {
8006 format[format_index++] = field_format;
8007 format[format_index++] =
'\0';
8008 absolute_short_name = short_name;
8012 if (parse_long_name) {
8014 absolute_short_name = 0;
8022 switch (absolute_short_name) {
8024 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8027 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8030 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8033 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8036 static const int BUFFER_SIZE = 256;
8037 char buf[BUFFER_SIZE];
8038 __kmp_expand_host_name(buf, BUFFER_SIZE);
8039 rc = __kmp_str_buf_print(field_buffer, format, buf);
8042 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8045 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8048 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8052 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8053 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8055 #if KMP_AFFINITY_SUPPORTED 8058 __kmp_str_buf_init(&buf);
8059 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8060 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8061 __kmp_str_buf_free(&buf);
8067 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8069 if (parse_long_name) {
8078 KMP_ASSERT(format_index <= FORMAT_SIZE);
8088 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8089 kmp_str_buf_t *buffer) {
8090 const char *parse_ptr;
8092 const kmp_info_t *th;
8093 kmp_str_buf_t field;
8095 KMP_DEBUG_ASSERT(buffer);
8096 KMP_DEBUG_ASSERT(gtid >= 0);
8098 __kmp_str_buf_init(&field);
8099 __kmp_str_buf_clear(buffer);
8101 th = __kmp_threads[gtid];
8107 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8108 parse_ptr = __kmp_affinity_format;
8110 KMP_DEBUG_ASSERT(parse_ptr);
8112 while (*parse_ptr !=
'\0') {
8114 if (*parse_ptr ==
'%') {
8116 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8117 __kmp_str_buf_catbuf(buffer, &field);
8121 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8126 __kmp_str_buf_free(&field);
8131 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8133 __kmp_str_buf_init(&buf);
8134 __kmp_aux_capture_affinity(gtid, format, &buf);
8135 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8136 __kmp_str_buf_free(&buf);
8141 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8142 int blocktime = arg;
8148 __kmp_save_internal_controls(thread);
8151 if (blocktime < KMP_MIN_BLOCKTIME)
8152 blocktime = KMP_MIN_BLOCKTIME;
8153 else if (blocktime > KMP_MAX_BLOCKTIME)
8154 blocktime = KMP_MAX_BLOCKTIME;
8156 set__blocktime_team(thread->th.th_team, tid, blocktime);
8157 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8161 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8163 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8164 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8170 set__bt_set_team(thread->th.th_team, tid, bt_set);
8171 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8173 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 8174 "bt_intervals=%d, monitor_updates=%d\n",
8175 __kmp_gtid_from_tid(tid, thread->th.th_team),
8176 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8177 __kmp_monitor_wakeups));
8179 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8180 __kmp_gtid_from_tid(tid, thread->th.th_team),
8181 thread->th.th_team->t.t_id, tid, blocktime));
8185 void __kmp_aux_set_defaults(
char const *str,
int len) {
8186 if (!__kmp_init_serial) {
8187 __kmp_serial_initialize();
8189 __kmp_env_initialize(str);
8191 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8199 PACKED_REDUCTION_METHOD_T
8200 __kmp_determine_reduction_method(
8201 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8202 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8203 kmp_critical_name *lck) {
8214 PACKED_REDUCTION_METHOD_T retval;
8218 KMP_DEBUG_ASSERT(loc);
8219 KMP_DEBUG_ASSERT(lck);
8221 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 8222 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 8223 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 8225 retval = critical_reduce_block;
8228 team_size = __kmp_get_team_num_threads(global_tid);
8229 if (team_size == 1) {
8231 retval = empty_reduce_block;
8235 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8237 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ 8238 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 8240 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ 8241 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD 8243 int teamsize_cutoff = 4;
8245 #if KMP_MIC_SUPPORTED 8246 if (__kmp_mic_type != non_mic) {
8247 teamsize_cutoff = 8;
8250 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8251 if (tree_available) {
8252 if (team_size <= teamsize_cutoff) {
8253 if (atomic_available) {
8254 retval = atomic_reduce_block;
8257 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8259 }
else if (atomic_available) {
8260 retval = atomic_reduce_block;
8263 #error "Unknown or unsupported OS" 8264 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || 8267 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 8269 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD 8273 if (atomic_available) {
8274 if (num_vars <= 2) {
8275 retval = atomic_reduce_block;
8281 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8282 if (atomic_available && (num_vars <= 3)) {
8283 retval = atomic_reduce_block;
8284 }
else if (tree_available) {
8285 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8286 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8287 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8292 #error "Unknown or unsupported OS" 8296 #error "Unknown or unsupported architecture" 8304 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8307 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8309 int atomic_available, tree_available;
8311 switch ((forced_retval = __kmp_force_reduction_method)) {
8312 case critical_reduce_block:
8316 case atomic_reduce_block:
8317 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8318 if (!atomic_available) {
8319 KMP_WARNING(RedMethodNotSupported,
"atomic");
8320 forced_retval = critical_reduce_block;
8324 case tree_reduce_block:
8325 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8326 if (!tree_available) {
8327 KMP_WARNING(RedMethodNotSupported,
"tree");
8328 forced_retval = critical_reduce_block;
8330 #if KMP_FAST_REDUCTION_BARRIER 8331 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8340 retval = forced_retval;
8343 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8345 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 8346 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 8351 kmp_int32 __kmp_get_reduce_method(
void) {
8352 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8357 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8361 void __kmp_hard_pause() {
8362 __kmp_pause_status = kmp_hard_paused;
8363 __kmp_internal_end_thread(-1);
8367 void __kmp_resume_if_soft_paused() {
8368 if (__kmp_pause_status == kmp_soft_paused) {
8369 __kmp_pause_status = kmp_not_paused;
8371 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8372 kmp_info_t *thread = __kmp_threads[gtid];
8374 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8375 if (fl.is_sleeping())
8377 else if (__kmp_try_suspend_mx(thread)) {
8378 __kmp_unlock_suspend_mx(thread);
8381 if (fl.is_sleeping()) {
8384 }
else if (__kmp_try_suspend_mx(thread)) {
8385 __kmp_unlock_suspend_mx(thread);
8397 int __kmp_pause_resource(kmp_pause_status_t level) {
8398 if (level == kmp_not_paused) {
8399 if (__kmp_pause_status == kmp_not_paused) {
8403 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8404 __kmp_pause_status == kmp_hard_paused);
8405 __kmp_pause_status = kmp_not_paused;
8408 }
else if (level == kmp_soft_paused) {
8409 if (__kmp_pause_status != kmp_not_paused) {
8416 }
else if (level == kmp_hard_paused) {
8417 if (__kmp_pause_status != kmp_not_paused) {
8431 void __kmp_omp_display_env(
int verbose) {
8432 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8433 if (__kmp_init_serial == 0)
8434 __kmp_do_serial_initialize();
8435 __kmp_display_env_impl(!verbose, verbose);
8436 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in