14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
36 #define KMP_USE_PRCTL 0
42 #include "tsan_annotations.h"
44 #if defined(KMP_GOMP_COMPAT)
45 char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX
"alternative compiler support: yes";
49 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
61 char const __kmp_version_lock[] =
62 KMP_VERSION_PREFIX
"lock type: run time selectable";
65 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
70 kmp_info_t __kmp_monitor;
75 void __kmp_cleanup(
void);
77 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
79 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
80 kmp_internal_control_t *new_icvs,
82 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
83 static void __kmp_partition_places(kmp_team_t *team,
84 int update_master_only = 0);
86 static void __kmp_do_serial_initialize(
void);
87 void __kmp_fork_barrier(
int gtid,
int tid);
88 void __kmp_join_barrier(
int gtid);
89 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
90 kmp_internal_control_t *new_icvs,
ident_t *loc);
92 #ifdef USE_LOAD_BALANCE
93 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
96 static int __kmp_expand_threads(
int nNeed);
98 static int __kmp_unregister_root_other_thread(
int gtid);
100 static void __kmp_unregister_library(
void);
101 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
102 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
107 int __kmp_get_global_thread_id() {
109 kmp_info_t **other_threads;
117 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
118 __kmp_nth, __kmp_all_nth));
125 if (!TCR_4(__kmp_init_gtid))
128 #ifdef KMP_TDATA_GTID
129 if (TCR_4(__kmp_gtid_mode) >= 3) {
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
134 if (TCR_4(__kmp_gtid_mode) >= 2) {
135 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
136 return __kmp_gtid_get_specific();
138 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
140 stack_addr = (
char *)&stack_data;
141 other_threads = __kmp_threads;
154 for (i = 0; i < __kmp_threads_capacity; i++) {
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
160 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
161 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
165 if (stack_addr <= stack_base) {
166 size_t stack_diff = stack_base - stack_addr;
168 if (stack_diff <= stack_size) {
171 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
179 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
180 "thread, using TLS\n"));
181 i = __kmp_gtid_get_specific();
191 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
192 KMP_FATAL(StackOverflow, i);
195 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
196 if (stack_addr > stack_base) {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
199 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
203 stack_base - stack_addr);
207 if (__kmp_storage_map) {
208 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
209 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
210 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
211 other_threads[i]->th.th_info.ds.ds_stacksize,
212 "th_%d stack (refinement)", i);
217 int __kmp_get_global_thread_id_reg() {
220 if (!__kmp_init_serial) {
223 #ifdef KMP_TDATA_GTID
224 if (TCR_4(__kmp_gtid_mode) >= 3) {
225 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
229 if (TCR_4(__kmp_gtid_mode) >= 2) {
230 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
231 gtid = __kmp_gtid_get_specific();
234 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
235 gtid = __kmp_get_global_thread_id();
239 if (gtid == KMP_GTID_DNE) {
241 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
242 "Registering a new gtid.\n"));
243 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
244 if (!__kmp_init_serial) {
245 __kmp_do_serial_initialize();
246 gtid = __kmp_gtid_get_specific();
248 gtid = __kmp_register_root(FALSE);
250 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
254 KMP_DEBUG_ASSERT(gtid >= 0);
260 void __kmp_check_stack_overlap(kmp_info_t *th) {
262 char *stack_beg = NULL;
263 char *stack_end = NULL;
266 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
267 if (__kmp_storage_map) {
268 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
269 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
271 gtid = __kmp_gtid_from_thread(th);
273 if (gtid == KMP_GTID_MONITOR) {
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%s stack (%s)",
"mon",
277 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
279 __kmp_print_storage_map_gtid(
280 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%d stack (%s)", gtid,
282 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
288 gtid = __kmp_gtid_from_thread(th);
289 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
291 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
292 if (stack_beg == NULL) {
293 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
294 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
297 for (f = 0; f < __kmp_threads_capacity; f++) {
298 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
300 if (f_th && f_th != th) {
301 char *other_stack_end =
302 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
303 char *other_stack_beg =
304 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
305 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
306 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
309 if (__kmp_storage_map)
310 __kmp_print_storage_map_gtid(
311 -1, other_stack_beg, other_stack_end,
312 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
313 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
315 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
321 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
326 void __kmp_infinite_loop(
void) {
327 static int done = FALSE;
334 #define MAX_MESSAGE 512
336 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
337 char const *format, ...) {
338 char buffer[MAX_MESSAGE];
341 va_start(ap, format);
342 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
343 p2, (
unsigned long)size, format);
344 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
345 __kmp_vprintf(kmp_err, buffer, ap);
346 #if KMP_PRINT_DATA_PLACEMENT
349 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
350 if (__kmp_storage_map_verbose) {
351 node = __kmp_get_host_node(p1);
353 __kmp_storage_map_verbose = FALSE;
357 int localProc = __kmp_get_cpu_from_gtid(gtid);
359 const int page_size = KMP_GET_PAGE_SIZE();
361 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
362 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
364 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
367 __kmp_printf_no_lock(
" GTID %d\n", gtid);
376 (
char *)p1 += page_size;
377 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
378 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
382 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
383 (
char *)p1 + (page_size - 1),
384 __kmp_get_host_node(p1));
386 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
387 (
char *)p2 + (page_size - 1),
388 __kmp_get_host_node(p2));
394 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
397 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
400 void __kmp_warn(
char const *format, ...) {
401 char buffer[MAX_MESSAGE];
404 if (__kmp_generate_warnings == kmp_warnings_off) {
408 va_start(ap, format);
410 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
411 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
412 __kmp_vprintf(kmp_err, buffer, ap);
413 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
418 void __kmp_abort_process() {
420 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
422 if (__kmp_debug_buf) {
423 __kmp_dump_debug_buffer();
426 if (KMP_OS_WINDOWS) {
429 __kmp_global.g.g_abort = SIGABRT;
446 __kmp_infinite_loop();
447 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
451 void __kmp_abort_thread(
void) {
454 __kmp_infinite_loop();
460 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
461 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
465 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
467 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
468 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
470 __kmp_print_storage_map_gtid(
471 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
472 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
475 &thr->th.th_bar[bs_plain_barrier + 1],
476 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
479 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier + 1],
481 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
484 #if KMP_FAST_REDUCTION_BARRIER
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
486 &thr->th.th_bar[bs_reduction_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
489 #endif // KMP_FAST_REDUCTION_BARRIER
495 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
496 int team_id,
int num_thr) {
497 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
498 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
502 &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier,
504 "%s_%d.t_bar", header, team_id);
506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
507 &team->t.t_bar[bs_plain_barrier + 1],
508 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
511 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
512 &team->t.t_bar[bs_forkjoin_barrier + 1],
513 sizeof(kmp_balign_team_t),
514 "%s_%d.t_bar[forkjoin]", header, team_id);
516 #if KMP_FAST_REDUCTION_BARRIER
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
518 &team->t.t_bar[bs_reduction_barrier + 1],
519 sizeof(kmp_balign_team_t),
520 "%s_%d.t_bar[reduction]", header, team_id);
521 #endif // KMP_FAST_REDUCTION_BARRIER
523 __kmp_print_storage_map_gtid(
524 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
525 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
527 __kmp_print_storage_map_gtid(
528 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
529 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
531 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
532 &team->t.t_disp_buffer[num_disp_buff],
533 sizeof(dispatch_shared_info_t) * num_disp_buff,
534 "%s_%d.t_disp_buffer", header, team_id);
536 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
537 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
541 static void __kmp_init_allocator() {
543 __kmp_init_memkind();
546 static void __kmp_fini_allocator() {
548 __kmp_fini_memkind();
557 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
559 __kmp_init_bootstrap_lock(lck);
562 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
580 for (i = 0; i < __kmp_threads_capacity; ++i) {
583 kmp_info_t *th = __kmp_threads[i];
586 int gtid = th->th.th_info.ds.ds_gtid;
587 if (gtid == gtid_req)
592 int alive = __kmp_is_thread_alive(th, &exit_val);
597 if (thread_count == 0)
603 __kmp_reset_lock(&__kmp_forkjoin_lock);
605 __kmp_reset_lock(&__kmp_stdio_lock);
609 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
614 case DLL_PROCESS_ATTACH:
615 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
619 case DLL_PROCESS_DETACH:
620 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
622 if (lpReserved != NULL) {
648 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
651 __kmp_internal_end_library(__kmp_gtid_get_specific());
655 case DLL_THREAD_ATTACH:
656 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
662 case DLL_THREAD_DETACH:
663 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
665 __kmp_internal_end_thread(__kmp_gtid_get_specific());
677 int __kmp_change_library(
int status) {
680 old_status = __kmp_yield_init &
684 __kmp_yield_init |= 1;
686 __kmp_yield_init &= ~1;
694 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
695 int gtid = *gtid_ref;
696 #ifdef BUILD_PARALLEL_ORDERED
697 kmp_team_t *team = __kmp_team_from_gtid(gtid);
700 if (__kmp_env_consistency_check) {
701 if (__kmp_threads[gtid]->th.th_root->r.r_active)
702 #if KMP_USE_DYNAMIC_LOCK
703 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
705 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
708 #ifdef BUILD_PARALLEL_ORDERED
709 if (!team->t.t_serialized) {
711 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
719 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
720 int gtid = *gtid_ref;
721 #ifdef BUILD_PARALLEL_ORDERED
722 int tid = __kmp_tid_from_gtid(gtid);
723 kmp_team_t *team = __kmp_team_from_gtid(gtid);
726 if (__kmp_env_consistency_check) {
727 if (__kmp_threads[gtid]->th.th_root->r.r_active)
728 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
730 #ifdef BUILD_PARALLEL_ORDERED
731 if (!team->t.t_serialized) {
736 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
746 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
751 if (!TCR_4(__kmp_init_parallel))
752 __kmp_parallel_initialize();
755 __kmp_resume_if_soft_paused();
758 th = __kmp_threads[gtid];
759 team = th->th.th_team;
762 th->th.th_ident = id_ref;
764 if (team->t.t_serialized) {
767 kmp_int32 old_this = th->th.th_local.this_construct;
769 ++th->th.th_local.this_construct;
773 if (team->t.t_construct == old_this) {
774 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
775 th->th.th_local.this_construct);
778 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
779 KMP_MASTER_GTID(gtid) &&
781 th->th.th_teams_microtask == NULL &&
783 team->t.t_active_level ==
785 __kmp_itt_metadata_single(id_ref);
790 if (__kmp_env_consistency_check) {
791 if (status && push_ws) {
792 __kmp_push_workshare(gtid, ct_psingle, id_ref);
794 __kmp_check_workshare(gtid, ct_psingle, id_ref);
799 __kmp_itt_single_start(gtid);
805 void __kmp_exit_single(
int gtid) {
807 __kmp_itt_single_end(gtid);
809 if (__kmp_env_consistency_check)
810 __kmp_pop_workshare(gtid, ct_psingle, NULL);
819 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
820 int master_tid,
int set_nthreads
828 KMP_DEBUG_ASSERT(__kmp_init_serial);
829 KMP_DEBUG_ASSERT(root && parent_team);
830 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
834 new_nthreads = set_nthreads;
835 if (!get__dynamic_2(parent_team, master_tid)) {
838 #ifdef USE_LOAD_BALANCE
839 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
840 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
841 if (new_nthreads == 1) {
842 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
843 "reservation to 1 thread\n",
847 if (new_nthreads < set_nthreads) {
848 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
849 "reservation to %d threads\n",
850 master_tid, new_nthreads));
854 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
855 new_nthreads = __kmp_avail_proc - __kmp_nth +
856 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
857 if (new_nthreads <= 1) {
858 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
859 "reservation to 1 thread\n",
863 if (new_nthreads < set_nthreads) {
864 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
865 "reservation to %d threads\n",
866 master_tid, new_nthreads));
868 new_nthreads = set_nthreads;
870 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
871 if (set_nthreads > 2) {
872 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
873 new_nthreads = (new_nthreads % set_nthreads) + 1;
874 if (new_nthreads == 1) {
875 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
876 "reservation to 1 thread\n",
880 if (new_nthreads < set_nthreads) {
881 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
882 "reservation to %d threads\n",
883 master_tid, new_nthreads));
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
894 int tl_nthreads = __kmp_max_nth - __kmp_nth +
895 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
896 if (tl_nthreads <= 0) {
901 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
902 __kmp_reserve_warn = 1;
903 __kmp_msg(kmp_ms_warning,
904 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
905 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
907 if (tl_nthreads == 1) {
908 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
909 "reduced reservation to 1 thread\n",
913 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
914 "reservation to %d threads\n",
915 master_tid, tl_nthreads));
916 new_nthreads = tl_nthreads;
920 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
921 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
922 if (cg_nthreads + new_nthreads -
923 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
925 int tl_nthreads = max_cg_threads - cg_nthreads +
926 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
927 if (tl_nthreads <= 0) {
932 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
933 __kmp_reserve_warn = 1;
934 __kmp_msg(kmp_ms_warning,
935 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
936 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
938 if (tl_nthreads == 1) {
939 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
940 "reduced reservation to 1 thread\n",
944 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
945 "reservation to %d threads\n",
946 master_tid, tl_nthreads));
947 new_nthreads = tl_nthreads;
953 capacity = __kmp_threads_capacity;
954 if (TCR_PTR(__kmp_threads[0]) == NULL) {
957 if (__kmp_nth + new_nthreads -
958 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
961 int slotsRequired = __kmp_nth + new_nthreads -
962 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
964 int slotsAdded = __kmp_expand_threads(slotsRequired);
965 if (slotsAdded < slotsRequired) {
967 new_nthreads -= (slotsRequired - slotsAdded);
968 KMP_ASSERT(new_nthreads >= 1);
971 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
972 __kmp_reserve_warn = 1;
973 if (__kmp_tp_cached) {
974 __kmp_msg(kmp_ms_warning,
975 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
976 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
977 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
979 __kmp_msg(kmp_ms_warning,
980 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
981 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
988 if (new_nthreads == 1) {
990 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
991 "dead roots and rechecking; requested %d threads\n",
992 __kmp_get_gtid(), set_nthreads));
994 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
996 __kmp_get_gtid(), new_nthreads, set_nthreads));
1005 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
1006 kmp_info_t *master_th,
int master_gtid) {
1010 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
1011 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1015 master_th->th.th_info.ds.ds_tid = 0;
1016 master_th->th.th_team = team;
1017 master_th->th.th_team_nproc = team->t.t_nproc;
1018 master_th->th.th_team_master = master_th;
1019 master_th->th.th_team_serialized = FALSE;
1020 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1023 #if KMP_NESTED_HOT_TEAMS
1025 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1028 int level = team->t.t_active_level - 1;
1029 if (master_th->th.th_teams_microtask) {
1030 if (master_th->th.th_teams_size.nteams > 1) {
1034 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1035 master_th->th.th_teams_level == team->t.t_level) {
1040 if (level < __kmp_hot_teams_max_level) {
1041 if (hot_teams[level].hot_team) {
1043 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1047 hot_teams[level].hot_team = team;
1048 hot_teams[level].hot_team_nth = team->t.t_nproc;
1055 use_hot_team = team == root->r.r_hot_team;
1057 if (!use_hot_team) {
1060 team->t.t_threads[0] = master_th;
1061 __kmp_initialize_info(master_th, team, 0, master_gtid);
1064 for (i = 1; i < team->t.t_nproc; i++) {
1067 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1068 team->t.t_threads[i] = thr;
1069 KMP_DEBUG_ASSERT(thr);
1070 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1072 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1073 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1074 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1075 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1076 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1077 team->t.t_bar[bs_plain_barrier].b_arrived));
1079 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1080 thr->th.th_teams_level = master_th->th.th_teams_level;
1081 thr->th.th_teams_size = master_th->th.th_teams_size;
1085 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1086 for (b = 0; b < bs_last_barrier; ++b) {
1087 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1088 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1090 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1096 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1097 __kmp_partition_places(team);
1102 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1103 for (i = 0; i < team->t.t_nproc; i++) {
1104 kmp_info_t *thr = team->t.t_threads[i];
1105 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1106 thr->th.th_prev_level != team->t.t_level) {
1107 team->t.t_display_affinity = 1;
1117 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1121 inline static void propagateFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control) {
1123 kmp_int16 x87_fpu_control_word;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
1140 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1141 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1144 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1148 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1154 inline static void updateHWFPControl(kmp_team_t *team) {
1155 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1158 kmp_int16 x87_fpu_control_word;
1160 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1161 __kmp_store_mxcsr(&mxcsr);
1162 mxcsr &= KMP_X86_MXCSR_MASK;
1164 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1165 __kmp_clear_x87_fpu_status_word();
1166 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1169 if (team->t.t_mxcsr != mxcsr) {
1170 __kmp_load_mxcsr(&team->t.t_mxcsr);
1175 #define propagateFPControl(x) ((void)0)
1176 #define updateHWFPControl(x) ((void)0)
1179 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1184 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1185 kmp_info_t *this_thr;
1186 kmp_team_t *serial_team;
1188 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1195 if (!TCR_4(__kmp_init_parallel))
1196 __kmp_parallel_initialize();
1199 __kmp_resume_if_soft_paused();
1202 this_thr = __kmp_threads[global_tid];
1203 serial_team = this_thr->th.th_serial_team;
1206 KMP_DEBUG_ASSERT(serial_team);
1209 if (__kmp_tasking_mode != tskm_immediate_exec) {
1211 this_thr->th.th_task_team ==
1212 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1213 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1215 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1216 "team %p, new task_team = NULL\n",
1217 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1218 this_thr->th.th_task_team = NULL;
1222 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1223 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1224 proc_bind = proc_bind_false;
1225 }
else if (proc_bind == proc_bind_default) {
1228 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1231 this_thr->th.th_set_proc_bind = proc_bind_default;
1235 ompt_data_t ompt_parallel_data = ompt_data_none;
1236 ompt_data_t *implicit_task_data;
1237 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1238 if (ompt_enabled.enabled &&
1239 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1241 ompt_task_info_t *parent_task_info;
1242 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1244 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1245 if (ompt_enabled.ompt_callback_parallel_begin) {
1248 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1249 &(parent_task_info->task_data), &(parent_task_info->frame),
1250 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1254 #endif // OMPT_SUPPORT
1256 if (this_thr->th.th_team != serial_team) {
1258 int level = this_thr->th.th_team->t.t_level;
1260 if (serial_team->t.t_serialized) {
1263 kmp_team_t *new_team;
1265 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1267 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1274 &this_thr->th.th_current_task->td_icvs,
1275 0 USE_NESTED_HOT_ARG(NULL));
1276 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1277 KMP_ASSERT(new_team);
1280 new_team->t.t_threads[0] = this_thr;
1281 new_team->t.t_parent = this_thr->th.th_team;
1282 serial_team = new_team;
1283 this_thr->th.th_serial_team = serial_team;
1287 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1288 global_tid, serial_team));
1296 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1297 global_tid, serial_team));
1301 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1302 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1303 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1304 serial_team->t.t_ident = loc;
1305 serial_team->t.t_serialized = 1;
1306 serial_team->t.t_nproc = 1;
1307 serial_team->t.t_parent = this_thr->th.th_team;
1308 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1309 this_thr->th.th_team = serial_team;
1310 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1312 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1313 this_thr->th.th_current_task));
1314 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1315 this_thr->th.th_current_task->td_flags.executing = 0;
1317 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1322 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1323 &this_thr->th.th_current_task->td_parent->td_icvs);
1327 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1328 this_thr->th.th_current_task->td_icvs.nproc =
1329 __kmp_nested_nth.nth[level + 1];
1333 if (__kmp_nested_proc_bind.used &&
1334 (level + 1 < __kmp_nested_proc_bind.used)) {
1335 this_thr->th.th_current_task->td_icvs.proc_bind =
1336 __kmp_nested_proc_bind.bind_types[level + 1];
1341 serial_team->t.t_pkfn = (microtask_t)(~0);
1343 this_thr->th.th_info.ds.ds_tid = 0;
1346 this_thr->th.th_team_nproc = 1;
1347 this_thr->th.th_team_master = this_thr;
1348 this_thr->th.th_team_serialized = 1;
1350 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1351 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1353 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1356 propagateFPControl(serial_team);
1359 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1360 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1361 serial_team->t.t_dispatch->th_disp_buffer =
1362 (dispatch_private_info_t *)__kmp_allocate(
1363 sizeof(dispatch_private_info_t));
1365 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1372 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1373 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1374 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1375 ++serial_team->t.t_serialized;
1376 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1379 int level = this_thr->th.th_team->t.t_level;
1382 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1383 this_thr->th.th_current_task->td_icvs.nproc =
1384 __kmp_nested_nth.nth[level + 1];
1386 serial_team->t.t_level++;
1387 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1388 "of serial team %p to %d\n",
1389 global_tid, serial_team, serial_team->t.t_level));
1392 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1394 dispatch_private_info_t *disp_buffer =
1395 (dispatch_private_info_t *)__kmp_allocate(
1396 sizeof(dispatch_private_info_t));
1397 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1398 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1400 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1405 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1411 if (__kmp_display_affinity) {
1412 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1413 this_thr->th.th_prev_num_threads != 1) {
1415 __kmp_aux_display_affinity(global_tid, NULL);
1416 this_thr->th.th_prev_level = serial_team->t.t_level;
1417 this_thr->th.th_prev_num_threads = 1;
1422 if (__kmp_env_consistency_check)
1423 __kmp_push_parallel(global_tid, NULL);
1425 serial_team->t.ompt_team_info.master_return_address = codeptr;
1426 if (ompt_enabled.enabled &&
1427 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1428 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1430 ompt_lw_taskteam_t lw_taskteam;
1431 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1432 &ompt_parallel_data, codeptr);
1434 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1438 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1439 if (ompt_enabled.ompt_callback_implicit_task) {
1440 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1441 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1442 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1443 OMPT_CUR_TASK_INFO(this_thr)
1444 ->thread_num = __kmp_tid_from_gtid(global_tid);
1448 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1449 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1456 int __kmp_fork_call(
ident_t *loc,
int gtid,
1457 enum fork_context_e call_context,
1458 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1460 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1469 int master_this_cons;
1471 kmp_team_t *parent_team;
1472 kmp_info_t *master_th;
1476 int master_set_numthreads;
1482 #if KMP_NESTED_HOT_TEAMS
1483 kmp_hot_team_ptr_t **p_hot_teams;
1486 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1489 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1490 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1493 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1495 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1496 __kmp_stkpadding += (short)((kmp_int64)dummy);
1502 if (!TCR_4(__kmp_init_parallel))
1503 __kmp_parallel_initialize();
1506 __kmp_resume_if_soft_paused();
1510 master_th = __kmp_threads[gtid];
1512 parent_team = master_th->th.th_team;
1513 master_tid = master_th->th.th_info.ds.ds_tid;
1514 master_this_cons = master_th->th.th_local.this_construct;
1515 root = master_th->th.th_root;
1516 master_active = root->r.r_active;
1517 master_set_numthreads = master_th->th.th_set_nproc;
1520 ompt_data_t ompt_parallel_data = ompt_data_none;
1521 ompt_data_t *parent_task_data;
1522 ompt_frame_t *ompt_frame;
1523 ompt_data_t *implicit_task_data;
1524 void *return_address = NULL;
1526 if (ompt_enabled.enabled) {
1527 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1529 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1534 level = parent_team->t.t_level;
1536 active_level = parent_team->t.t_active_level;
1539 teams_level = master_th->th.th_teams_level;
1541 #if KMP_NESTED_HOT_TEAMS
1542 p_hot_teams = &master_th->th.th_hot_teams;
1543 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1544 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1545 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1546 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1548 (*p_hot_teams)[0].hot_team_nth = 1;
1553 if (ompt_enabled.enabled) {
1554 if (ompt_enabled.ompt_callback_parallel_begin) {
1555 int team_size = master_set_numthreads
1556 ? master_set_numthreads
1557 : get__nproc_2(parent_team, master_tid);
1558 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1559 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1560 OMPT_INVOKER(call_context), return_address);
1562 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1566 master_th->th.th_ident = loc;
1569 if (master_th->th.th_teams_microtask && ap &&
1570 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1574 parent_team->t.t_ident = loc;
1575 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1576 parent_team->t.t_argc = argc;
1577 argv = (
void **)parent_team->t.t_argv;
1578 for (i = argc - 1; i >= 0; --i)
1580 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1581 *argv++ = va_arg(*ap,
void *);
1583 *argv++ = va_arg(ap,
void *);
1586 if (parent_team == master_th->th.th_serial_team) {
1589 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1592 parent_team->t.t_serialized--;
1595 void **exit_runtime_p;
1597 ompt_lw_taskteam_t lw_taskteam;
1599 if (ompt_enabled.enabled) {
1600 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1601 &ompt_parallel_data, return_address);
1602 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1604 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1608 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1609 if (ompt_enabled.ompt_callback_implicit_task) {
1610 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1611 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1612 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1613 OMPT_CUR_TASK_INFO(master_th)
1614 ->thread_num = __kmp_tid_from_gtid(gtid);
1618 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1620 exit_runtime_p = &dummy;
1625 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1626 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1627 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1636 *exit_runtime_p = NULL;
1637 if (ompt_enabled.enabled) {
1638 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1639 if (ompt_enabled.ompt_callback_implicit_task) {
1640 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1641 ompt_scope_end, NULL, implicit_task_data, 1,
1642 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1644 __ompt_lw_taskteam_unlink(master_th);
1646 if (ompt_enabled.ompt_callback_parallel_end) {
1647 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1648 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1649 OMPT_INVOKER(call_context), return_address);
1651 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1657 parent_team->t.t_pkfn = microtask;
1658 parent_team->t.t_invoke = invoker;
1659 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1660 parent_team->t.t_active_level++;
1661 parent_team->t.t_level++;
1663 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1667 if (master_set_numthreads) {
1668 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1670 kmp_info_t **other_threads = parent_team->t.t_threads;
1671 parent_team->t.t_nproc = master_set_numthreads;
1672 for (i = 0; i < master_set_numthreads; ++i) {
1673 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1677 master_th->th.th_set_nproc = 0;
1681 if (__kmp_debugging) {
1682 int nth = __kmp_omp_num_threads(loc);
1684 master_set_numthreads = nth;
1689 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1690 "master_th=%p, gtid=%d\n",
1691 root, parent_team, master_th, gtid));
1692 __kmp_internal_fork(loc, gtid, parent_team);
1693 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1694 "master_th=%p, gtid=%d\n",
1695 root, parent_team, master_th, gtid));
1698 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1699 parent_team->t.t_id, parent_team->t.t_pkfn));
1701 if (!parent_team->t.t_invoke(gtid)) {
1702 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1704 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1705 parent_team->t.t_id, parent_team->t.t_pkfn));
1708 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1715 if (__kmp_tasking_mode != tskm_immediate_exec) {
1716 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1717 parent_team->t.t_task_team[master_th->th.th_task_state]);
1721 if (parent_team->t.t_active_level >=
1722 master_th->th.th_current_task->td_icvs.max_active_levels) {
1726 int enter_teams = ((ap == NULL && active_level == 0) ||
1727 (ap && teams_level > 0 && teams_level == level));
1730 master_set_numthreads
1731 ? master_set_numthreads
1740 if ((!get__nested(master_th) && (root->r.r_in_parallel
1745 (__kmp_library == library_serial)) {
1746 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1754 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1755 nthreads = __kmp_reserve_threads(
1756 root, parent_team, master_tid, nthreads
1767 if (nthreads == 1) {
1771 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1775 KMP_DEBUG_ASSERT(nthreads > 0);
1778 master_th->th.th_set_nproc = 0;
1781 if (nthreads == 1) {
1783 #if KMP_OS_LINUX && \
1784 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1787 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1792 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1796 if (call_context == fork_context_intel) {
1798 master_th->th.th_serial_team->t.t_ident = loc;
1802 master_th->th.th_serial_team->t.t_level--;
1807 void **exit_runtime_p;
1808 ompt_task_info_t *task_info;
1810 ompt_lw_taskteam_t lw_taskteam;
1812 if (ompt_enabled.enabled) {
1813 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1814 &ompt_parallel_data, return_address);
1816 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1819 task_info = OMPT_CUR_TASK_INFO(master_th);
1820 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1821 if (ompt_enabled.ompt_callback_implicit_task) {
1822 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1823 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1824 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1825 OMPT_CUR_TASK_INFO(master_th)
1826 ->thread_num = __kmp_tid_from_gtid(gtid);
1830 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1832 exit_runtime_p = &dummy;
1837 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1838 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1839 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1840 parent_team->t.t_argv
1849 if (ompt_enabled.enabled) {
1850 exit_runtime_p = NULL;
1851 if (ompt_enabled.ompt_callback_implicit_task) {
1852 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1853 ompt_scope_end, NULL, &(task_info->task_data), 1,
1854 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1857 __ompt_lw_taskteam_unlink(master_th);
1858 if (ompt_enabled.ompt_callback_parallel_end) {
1859 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1860 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1861 OMPT_INVOKER(call_context), return_address);
1863 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1866 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1867 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1868 master_th->th.th_serial_team);
1869 team = master_th->th.th_team;
1871 team->t.t_invoke = invoker;
1872 __kmp_alloc_argv_entries(argc, team, TRUE);
1873 team->t.t_argc = argc;
1874 argv = (
void **)team->t.t_argv;
1876 for (i = argc - 1; i >= 0; --i)
1878 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1879 *argv++ = va_arg(*ap,
void *);
1881 *argv++ = va_arg(ap,
void *);
1884 for (i = 0; i < argc; ++i)
1886 argv[i] = parent_team->t.t_argv[i];
1896 for (i = argc - 1; i >= 0; --i)
1898 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1899 *argv++ = va_arg(*ap,
void *);
1901 *argv++ = va_arg(ap,
void *);
1907 void **exit_runtime_p;
1908 ompt_task_info_t *task_info;
1910 ompt_lw_taskteam_t lw_taskteam;
1912 if (ompt_enabled.enabled) {
1913 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1914 &ompt_parallel_data, return_address);
1915 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1917 task_info = OMPT_CUR_TASK_INFO(master_th);
1918 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1921 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1922 if (ompt_enabled.ompt_callback_implicit_task) {
1923 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1924 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1925 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1926 OMPT_CUR_TASK_INFO(master_th)
1927 ->thread_num = __kmp_tid_from_gtid(gtid);
1931 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1933 exit_runtime_p = &dummy;
1938 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1939 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1940 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1949 if (ompt_enabled.enabled) {
1950 *exit_runtime_p = NULL;
1951 if (ompt_enabled.ompt_callback_implicit_task) {
1952 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1953 ompt_scope_end, NULL, &(task_info->task_data), 1,
1954 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1957 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1958 __ompt_lw_taskteam_unlink(master_th);
1959 if (ompt_enabled.ompt_callback_parallel_end) {
1960 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1961 &ompt_parallel_data, parent_task_data,
1962 OMPT_INVOKER(call_context), return_address);
1964 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1970 }
else if (call_context == fork_context_gnu) {
1972 ompt_lw_taskteam_t lwt;
1973 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1976 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1977 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1982 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1985 KMP_ASSERT2(call_context < fork_context_last,
1986 "__kmp_fork_call: unknown fork_context parameter");
1989 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1996 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1997 "curtask=%p, curtask_max_aclevel=%d\n",
1998 parent_team->t.t_active_level, master_th,
1999 master_th->th.th_current_task,
2000 master_th->th.th_current_task->td_icvs.max_active_levels));
2004 master_th->th.th_current_task->td_flags.executing = 0;
2007 if (!master_th->th.th_teams_microtask || level > teams_level)
2011 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2015 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2016 if ((level + 1 < __kmp_nested_nth.used) &&
2017 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2018 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2025 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2026 kmp_proc_bind_t proc_bind_icv =
2028 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2029 proc_bind = proc_bind_false;
2031 if (proc_bind == proc_bind_default) {
2034 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2040 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2041 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2042 master_th->th.th_current_task->td_icvs.proc_bind)) {
2043 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2048 master_th->th.th_set_proc_bind = proc_bind_default;
2051 if ((nthreads_icv > 0)
2053 || (proc_bind_icv != proc_bind_default)
2056 kmp_internal_control_t new_icvs;
2057 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2058 new_icvs.next = NULL;
2059 if (nthreads_icv > 0) {
2060 new_icvs.nproc = nthreads_icv;
2064 if (proc_bind_icv != proc_bind_default) {
2065 new_icvs.proc_bind = proc_bind_icv;
2070 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2071 team = __kmp_allocate_team(root, nthreads, nthreads,
2078 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2081 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2082 team = __kmp_allocate_team(root, nthreads, nthreads,
2089 &master_th->th.th_current_task->td_icvs,
2090 argc USE_NESTED_HOT_ARG(master_th));
2093 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2096 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2097 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2098 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2099 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2100 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2102 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2105 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2108 if (!master_th->th.th_teams_microtask || level > teams_level) {
2110 int new_level = parent_team->t.t_level + 1;
2111 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2112 new_level = parent_team->t.t_active_level + 1;
2113 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2117 int new_level = parent_team->t.t_level;
2118 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2119 new_level = parent_team->t.t_active_level;
2120 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2123 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2125 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2128 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2131 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2135 propagateFPControl(team);
2137 if (__kmp_tasking_mode != tskm_immediate_exec) {
2140 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2141 parent_team->t.t_task_team[master_th->th.th_task_state]);
2142 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team "
2143 "%p, new task_team %p / team %p\n",
2144 __kmp_gtid_from_thread(master_th),
2145 master_th->th.th_task_team, parent_team,
2146 team->t.t_task_team[master_th->th.th_task_state], team));
2148 if (active_level || master_th->th.th_task_team) {
2150 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2151 if (master_th->th.th_task_state_top >=
2152 master_th->th.th_task_state_stack_sz) {
2153 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2154 kmp_uint8 *old_stack, *new_stack;
2156 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2157 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2158 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2160 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2164 old_stack = master_th->th.th_task_state_memo_stack;
2165 master_th->th.th_task_state_memo_stack = new_stack;
2166 master_th->th.th_task_state_stack_sz = new_size;
2167 __kmp_free(old_stack);
2171 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2172 master_th->th.th_task_state;
2173 master_th->th.th_task_state_top++;
2174 #if KMP_NESTED_HOT_TEAMS
2175 if (master_th->th.th_hot_teams &&
2176 active_level < __kmp_hot_teams_max_level &&
2177 team == master_th->th.th_hot_teams[active_level].hot_team) {
2179 master_th->th.th_task_state =
2181 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2184 master_th->th.th_task_state = 0;
2185 #if KMP_NESTED_HOT_TEAMS
2189 #if !KMP_NESTED_HOT_TEAMS
2190 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2191 (team == root->r.r_hot_team));
2197 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2198 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2200 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2201 (team->t.t_master_tid == 0 &&
2202 (team->t.t_parent == root->r.r_root_team ||
2203 team->t.t_parent->t.t_serialized)));
2207 argv = (
void **)team->t.t_argv;
2211 for (i = argc - 1; i >= 0; --i) {
2213 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
2214 void *new_argv = va_arg(*ap,
void *);
2216 void *new_argv = va_arg(ap,
void *);
2218 KMP_CHECK_UPDATE(*argv, new_argv);
2223 for (i = 0; i < argc; ++i) {
2225 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2231 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2232 if (!root->r.r_active)
2233 root->r.r_active = TRUE;
2235 __kmp_fork_team_threads(root, team, master_th, gtid);
2236 __kmp_setup_icv_copy(team, nthreads,
2237 &master_th->th.th_current_task->td_icvs, loc);
2240 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2243 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2246 if (team->t.t_active_level == 1
2248 && !master_th->th.th_teams_microtask
2252 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2253 (__kmp_forkjoin_frames_mode == 3 ||
2254 __kmp_forkjoin_frames_mode == 1)) {
2255 kmp_uint64 tmp_time = 0;
2256 if (__itt_get_timestamp_ptr)
2257 tmp_time = __itt_get_timestamp();
2259 master_th->th.th_frame_time = tmp_time;
2260 if (__kmp_forkjoin_frames_mode == 3)
2261 team->t.t_region_time = tmp_time;
2265 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2266 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2268 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2274 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2277 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2278 root, team, master_th, gtid));
2281 if (__itt_stack_caller_create_ptr) {
2282 team->t.t_stack_id =
2283 __kmp_itt_stack_caller_create();
2294 __kmp_internal_fork(loc, gtid, team);
2295 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2296 "master_th=%p, gtid=%d\n",
2297 root, team, master_th, gtid));
2300 if (call_context == fork_context_gnu) {
2301 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2306 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2307 team->t.t_id, team->t.t_pkfn));
2310 if (!team->t.t_invoke(gtid)) {
2311 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2313 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2314 team->t.t_id, team->t.t_pkfn));
2317 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2320 if (ompt_enabled.enabled) {
2321 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2329 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2332 thread->th.ompt_thread_info.state =
2333 ((team->t.t_serialized) ? ompt_state_work_serial
2334 : ompt_state_work_parallel);
2337 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2338 kmp_team_t *team, ompt_data_t *parallel_data,
2339 fork_context_e fork_context,
void *codeptr) {
2340 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2341 if (ompt_enabled.ompt_callback_parallel_end) {
2342 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2343 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2347 task_info->frame.enter_frame = ompt_data_none;
2348 __kmp_join_restore_state(thread, team);
2352 void __kmp_join_call(
ident_t *loc,
int gtid
2355 enum fork_context_e fork_context
2362 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2364 kmp_team_t *parent_team;
2365 kmp_info_t *master_th;
2369 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2372 master_th = __kmp_threads[gtid];
2373 root = master_th->th.th_root;
2374 team = master_th->th.th_team;
2375 parent_team = team->t.t_parent;
2377 master_th->th.th_ident = loc;
2380 if (ompt_enabled.enabled) {
2381 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2386 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2387 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2388 "th_task_team = %p\n",
2389 __kmp_gtid_from_thread(master_th), team,
2390 team->t.t_task_team[master_th->th.th_task_state],
2391 master_th->th.th_task_team));
2392 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2393 team->t.t_task_team[master_th->th.th_task_state]);
2397 if (team->t.t_serialized) {
2399 if (master_th->th.th_teams_microtask) {
2401 int level = team->t.t_level;
2402 int tlevel = master_th->th.th_teams_level;
2403 if (level == tlevel) {
2407 }
else if (level == tlevel + 1) {
2411 team->t.t_serialized++;
2418 if (ompt_enabled.enabled) {
2419 __kmp_join_restore_state(master_th, parent_team);
2426 master_active = team->t.t_master_active;
2434 __kmp_internal_join(loc, gtid, team);
2438 master_th->th.th_task_state =
2446 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2447 void *codeptr = team->t.ompt_team_info.master_return_address;
2451 if (__itt_stack_caller_create_ptr) {
2452 __kmp_itt_stack_caller_destroy(
2453 (__itt_caller)team->t
2458 if (team->t.t_active_level == 1
2460 && !master_th->th.th_teams_microtask
2463 master_th->th.th_ident = loc;
2466 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2467 __kmp_forkjoin_frames_mode == 3)
2468 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2469 master_th->th.th_frame_time, 0, loc,
2470 master_th->th.th_team_nproc, 1);
2471 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2472 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2473 __kmp_itt_region_joined(gtid);
2478 if (master_th->th.th_teams_microtask && !exit_teams &&
2479 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2480 team->t.t_level == master_th->th.th_teams_level + 1) {
2487 team->t.t_active_level--;
2488 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2494 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2495 int old_num = master_th->th.th_team_nproc;
2496 int new_num = master_th->th.th_teams_size.nth;
2497 kmp_info_t **other_threads = team->t.t_threads;
2498 team->t.t_nproc = new_num;
2499 for (
int i = 0; i < old_num; ++i) {
2500 other_threads[i]->th.th_team_nproc = new_num;
2503 for (
int i = old_num; i < new_num; ++i) {
2505 KMP_DEBUG_ASSERT(other_threads[i]);
2506 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2507 for (
int b = 0; b < bs_last_barrier; ++b) {
2508 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2509 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2511 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2514 if (__kmp_tasking_mode != tskm_immediate_exec) {
2516 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2522 if (ompt_enabled.enabled) {
2523 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2533 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2534 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2536 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2541 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2544 if (!master_th->th.th_teams_microtask ||
2545 team->t.t_level > master_th->th.th_teams_level)
2549 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2551 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2554 if (ompt_enabled.enabled) {
2555 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2556 if (ompt_enabled.ompt_callback_implicit_task) {
2557 int ompt_team_size = team->t.t_nproc;
2558 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2559 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2560 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2563 task_info->frame.exit_frame = ompt_data_none;
2564 task_info->task_data = ompt_data_none;
2568 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2570 __kmp_pop_current_task_from_thread(master_th);
2572 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2574 master_th->th.th_first_place = team->t.t_first_place;
2575 master_th->th.th_last_place = team->t.t_last_place;
2578 master_th->th.th_def_allocator = team->t.t_def_allocator;
2581 updateHWFPControl(team);
2583 if (root->r.r_active != master_active)
2584 root->r.r_active = master_active;
2586 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2594 master_th->th.th_team = parent_team;
2595 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2596 master_th->th.th_team_master = parent_team->t.t_threads[0];
2597 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2600 if (parent_team->t.t_serialized &&
2601 parent_team != master_th->th.th_serial_team &&
2602 parent_team != root->r.r_root_team) {
2603 __kmp_free_team(root,
2604 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2605 master_th->th.th_serial_team = parent_team;
2608 if (__kmp_tasking_mode != tskm_immediate_exec) {
2609 if (master_th->th.th_task_state_top >
2611 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2613 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2614 master_th->th.th_task_state;
2615 --master_th->th.th_task_state_top;
2617 master_th->th.th_task_state =
2619 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2622 master_th->th.th_task_team =
2623 parent_team->t.t_task_team[master_th->th.th_task_state];
2625 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2626 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2633 master_th->th.th_current_task->td_flags.executing = 1;
2635 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2638 if (ompt_enabled.enabled) {
2639 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2645 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2650 void __kmp_save_internal_controls(kmp_info_t *thread) {
2652 if (thread->th.th_team != thread->th.th_serial_team) {
2655 if (thread->th.th_team->t.t_serialized > 1) {
2658 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2661 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2662 thread->th.th_team->t.t_serialized) {
2667 kmp_internal_control_t *control =
2668 (kmp_internal_control_t *)__kmp_allocate(
2669 sizeof(kmp_internal_control_t));
2671 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2673 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2675 control->next = thread->th.th_team->t.t_control_stack_top;
2676 thread->th.th_team->t.t_control_stack_top = control;
2682 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2686 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2687 KMP_DEBUG_ASSERT(__kmp_init_serial);
2691 else if (new_nth > __kmp_max_nth)
2692 new_nth = __kmp_max_nth;
2695 thread = __kmp_threads[gtid];
2696 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2699 __kmp_save_internal_controls(thread);
2701 set__nproc(thread, new_nth);
2706 root = thread->th.th_root;
2707 if (__kmp_init_parallel && (!root->r.r_active) &&
2708 (root->r.r_hot_team->t.t_nproc > new_nth)
2709 #
if KMP_NESTED_HOT_TEAMS
2710 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2713 kmp_team_t *hot_team = root->r.r_hot_team;
2716 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2719 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2720 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2721 if (__kmp_tasking_mode != tskm_immediate_exec) {
2724 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2726 __kmp_free_thread(hot_team->t.t_threads[f]);
2727 hot_team->t.t_threads[f] = NULL;
2729 hot_team->t.t_nproc = new_nth;
2730 #if KMP_NESTED_HOT_TEAMS
2731 if (thread->th.th_hot_teams) {
2732 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2733 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2737 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2740 for (f = 0; f < new_nth; f++) {
2741 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2742 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2745 hot_team->t.t_size_changed = -1;
2750 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2753 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2755 gtid, max_active_levels));
2756 KMP_DEBUG_ASSERT(__kmp_init_serial);
2759 if (max_active_levels < 0) {
2760 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2765 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2766 "max_active_levels for thread %d = (%d)\n",
2767 gtid, max_active_levels));
2770 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2775 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2776 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2777 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2783 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2784 "max_active_levels for thread %d = (%d)\n",
2785 gtid, max_active_levels));
2787 thread = __kmp_threads[gtid];
2789 __kmp_save_internal_controls(thread);
2791 set__max_active_levels(thread, max_active_levels);
2795 int __kmp_get_max_active_levels(
int gtid) {
2798 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2799 KMP_DEBUG_ASSERT(__kmp_init_serial);
2801 thread = __kmp_threads[gtid];
2802 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2803 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2804 "curtask_maxaclevel=%d\n",
2805 gtid, thread->th.th_current_task,
2806 thread->th.th_current_task->td_icvs.max_active_levels));
2807 return thread->th.th_current_task->td_icvs.max_active_levels;
2811 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2815 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2816 gtid, (
int)kind, chunk));
2817 KMP_DEBUG_ASSERT(__kmp_init_serial);
2823 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2824 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2826 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2827 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2829 kind = kmp_sched_default;
2833 thread = __kmp_threads[gtid];
2835 __kmp_save_internal_controls(thread);
2837 if (kind < kmp_sched_upper_std) {
2838 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2841 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2843 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2844 __kmp_sch_map[kind - kmp_sched_lower - 1];
2849 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2850 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2851 kmp_sched_lower - 2];
2853 if (kind == kmp_sched_auto || chunk < 1) {
2855 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2857 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2862 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2866 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2867 KMP_DEBUG_ASSERT(__kmp_init_serial);
2869 thread = __kmp_threads[gtid];
2871 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2875 case kmp_sch_static_greedy:
2876 case kmp_sch_static_balanced:
2877 *kind = kmp_sched_static;
2880 case kmp_sch_static_chunked:
2881 *kind = kmp_sched_static;
2883 case kmp_sch_dynamic_chunked:
2884 *kind = kmp_sched_dynamic;
2887 case kmp_sch_guided_iterative_chunked:
2888 case kmp_sch_guided_analytical_chunked:
2889 *kind = kmp_sched_guided;
2892 *kind = kmp_sched_auto;
2894 case kmp_sch_trapezoidal:
2895 *kind = kmp_sched_trapezoidal;
2897 #if KMP_STATIC_STEAL_ENABLED
2898 case kmp_sch_static_steal:
2899 *kind = kmp_sched_static_steal;
2903 KMP_FATAL(UnknownSchedulingType, th_type);
2906 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2909 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2915 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2916 KMP_DEBUG_ASSERT(__kmp_init_serial);
2923 thr = __kmp_threads[gtid];
2924 team = thr->th.th_team;
2925 ii = team->t.t_level;
2930 if (thr->th.th_teams_microtask) {
2932 int tlevel = thr->th.th_teams_level;
2935 KMP_DEBUG_ASSERT(ii >= tlevel);
2948 return __kmp_tid_from_gtid(gtid);
2950 dd = team->t.t_serialized;
2952 while (ii > level) {
2953 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2955 if ((team->t.t_serialized) && (!dd)) {
2956 team = team->t.t_parent;
2960 team = team->t.t_parent;
2961 dd = team->t.t_serialized;
2966 return (dd > 1) ? (0) : (team->t.t_master_tid);
2969 int __kmp_get_team_size(
int gtid,
int level) {
2975 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2976 KMP_DEBUG_ASSERT(__kmp_init_serial);
2983 thr = __kmp_threads[gtid];
2984 team = thr->th.th_team;
2985 ii = team->t.t_level;
2990 if (thr->th.th_teams_microtask) {
2992 int tlevel = thr->th.th_teams_level;
2995 KMP_DEBUG_ASSERT(ii >= tlevel);
3007 while (ii > level) {
3008 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3010 if (team->t.t_serialized && (!dd)) {
3011 team = team->t.t_parent;
3015 team = team->t.t_parent;
3020 return team->t.t_nproc;
3023 kmp_r_sched_t __kmp_get_schedule_global() {
3028 kmp_r_sched_t r_sched;
3036 r_sched.r_sched_type = __kmp_static;
3039 r_sched.r_sched_type = __kmp_guided;
3041 r_sched.r_sched_type = __kmp_sched;
3044 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3046 r_sched.chunk = KMP_DEFAULT_CHUNK;
3048 r_sched.chunk = __kmp_chunk;
3056 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3058 KMP_DEBUG_ASSERT(team);
3059 if (!realloc || argc > team->t.t_max_argc) {
3061 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3062 "current entries=%d\n",
3063 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3065 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3066 __kmp_free((
void *)team->t.t_argv);
3068 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3070 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3071 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3073 team->t.t_id, team->t.t_max_argc));
3074 team->t.t_argv = &team->t.t_inline_argv[0];
3075 if (__kmp_storage_map) {
3076 __kmp_print_storage_map_gtid(
3077 -1, &team->t.t_inline_argv[0],
3078 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3079 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3084 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3085 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3087 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3089 team->t.t_id, team->t.t_max_argc));
3091 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3092 if (__kmp_storage_map) {
3093 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3094 &team->t.t_argv[team->t.t_max_argc],
3095 sizeof(
void *) * team->t.t_max_argc,
3096 "team_%d.t_argv", team->t.t_id);
3102 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3104 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3106 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3107 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3108 sizeof(dispatch_shared_info_t) * num_disp_buff);
3109 team->t.t_dispatch =
3110 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3111 team->t.t_implicit_task_taskdata =
3112 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3113 team->t.t_max_nproc = max_nth;
3116 for (i = 0; i < num_disp_buff; ++i) {
3117 team->t.t_disp_buffer[i].buffer_index = i;
3119 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3124 static void __kmp_free_team_arrays(kmp_team_t *team) {
3127 for (i = 0; i < team->t.t_max_nproc; ++i) {
3128 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3129 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3130 team->t.t_dispatch[i].th_disp_buffer = NULL;
3133 #if KMP_USE_HIER_SCHED
3134 __kmp_dispatch_free_hierarchies(team);
3136 __kmp_free(team->t.t_threads);
3137 __kmp_free(team->t.t_disp_buffer);
3138 __kmp_free(team->t.t_dispatch);
3139 __kmp_free(team->t.t_implicit_task_taskdata);
3140 team->t.t_threads = NULL;
3141 team->t.t_disp_buffer = NULL;
3142 team->t.t_dispatch = NULL;
3143 team->t.t_implicit_task_taskdata = 0;
3146 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3147 kmp_info_t **oldThreads = team->t.t_threads;
3149 __kmp_free(team->t.t_disp_buffer);
3150 __kmp_free(team->t.t_dispatch);
3151 __kmp_free(team->t.t_implicit_task_taskdata);
3152 __kmp_allocate_team_arrays(team, max_nth);
3154 KMP_MEMCPY(team->t.t_threads, oldThreads,
3155 team->t.t_nproc *
sizeof(kmp_info_t *));
3157 __kmp_free(oldThreads);
3160 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3162 kmp_r_sched_t r_sched =
3163 __kmp_get_schedule_global();
3166 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3169 kmp_internal_control_t g_icvs = {
3171 (kmp_int8)__kmp_dflt_nested,
3173 (kmp_int8)__kmp_global.g.g_dynamic,
3175 (kmp_int8)__kmp_env_blocktime,
3177 __kmp_dflt_blocktime,
3182 __kmp_dflt_team_nth,
3186 __kmp_dflt_max_active_levels,
3191 __kmp_nested_proc_bind.bind_types[0],
3192 __kmp_default_device,
3200 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3202 kmp_internal_control_t gx_icvs;
3203 gx_icvs.serial_nesting_level =
3205 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3206 gx_icvs.next = NULL;
3211 static void __kmp_initialize_root(kmp_root_t *root) {
3213 kmp_team_t *root_team;
3214 kmp_team_t *hot_team;
3215 int hot_team_max_nth;
3216 kmp_r_sched_t r_sched =
3217 __kmp_get_schedule_global();
3218 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3219 KMP_DEBUG_ASSERT(root);
3220 KMP_ASSERT(!root->r.r_begin);
3223 __kmp_init_lock(&root->r.r_begin_lock);
3224 root->r.r_begin = FALSE;
3225 root->r.r_active = FALSE;
3226 root->r.r_in_parallel = 0;
3227 root->r.r_blocktime = __kmp_dflt_blocktime;
3228 root->r.r_nested = __kmp_dflt_nested;
3232 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3235 __kmp_allocate_team(root,
3242 __kmp_nested_proc_bind.bind_types[0],
3246 USE_NESTED_HOT_ARG(NULL)
3251 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3254 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3256 root->r.r_root_team = root_team;
3257 root_team->t.t_control_stack_top = NULL;
3260 root_team->t.t_threads[0] = NULL;
3261 root_team->t.t_nproc = 1;
3262 root_team->t.t_serialized = 1;
3264 root_team->t.t_sched.sched = r_sched.sched;
3267 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3268 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3272 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3275 __kmp_allocate_team(root,
3277 __kmp_dflt_team_nth_ub * 2,
3282 __kmp_nested_proc_bind.bind_types[0],
3286 USE_NESTED_HOT_ARG(NULL)
3288 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3290 root->r.r_hot_team = hot_team;
3291 root_team->t.t_control_stack_top = NULL;
3294 hot_team->t.t_parent = root_team;
3297 hot_team_max_nth = hot_team->t.t_max_nproc;
3298 for (f = 0; f < hot_team_max_nth; ++f) {
3299 hot_team->t.t_threads[f] = NULL;
3301 hot_team->t.t_nproc = 1;
3303 hot_team->t.t_sched.sched = r_sched.sched;
3304 hot_team->t.t_size_changed = 0;
3309 typedef struct kmp_team_list_item {
3310 kmp_team_p
const *entry;
3311 struct kmp_team_list_item *next;
3312 } kmp_team_list_item_t;
3313 typedef kmp_team_list_item_t *kmp_team_list_t;
3315 static void __kmp_print_structure_team_accum(
3316 kmp_team_list_t list,
3317 kmp_team_p
const *team
3327 KMP_DEBUG_ASSERT(list != NULL);
3332 __kmp_print_structure_team_accum(list, team->t.t_parent);
3333 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3337 while (l->next != NULL && l->entry != team) {
3340 if (l->next != NULL) {
3346 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3352 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3353 sizeof(kmp_team_list_item_t));
3360 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3363 __kmp_printf(
"%s", title);
3365 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3367 __kmp_printf(
" - (nil)\n");
3371 static void __kmp_print_structure_thread(
char const *title,
3372 kmp_info_p
const *thread) {
3373 __kmp_printf(
"%s", title);
3374 if (thread != NULL) {
3375 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3377 __kmp_printf(
" - (nil)\n");
3381 void __kmp_print_structure(
void) {
3383 kmp_team_list_t list;
3387 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3391 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3392 "Table\n------------------------------\n");
3395 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3396 __kmp_printf(
"%2d", gtid);
3397 if (__kmp_threads != NULL) {
3398 __kmp_printf(
" %p", __kmp_threads[gtid]);
3400 if (__kmp_root != NULL) {
3401 __kmp_printf(
" %p", __kmp_root[gtid]);
3408 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3410 if (__kmp_threads != NULL) {
3412 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3413 kmp_info_t
const *thread = __kmp_threads[gtid];
3414 if (thread != NULL) {
3415 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3416 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3417 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3418 __kmp_print_structure_team(
" Serial Team: ",
3419 thread->th.th_serial_team);
3420 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3421 __kmp_print_structure_thread(
" Master: ",
3422 thread->th.th_team_master);
3423 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3424 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3426 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3428 __kmp_print_structure_thread(
" Next in pool: ",
3429 thread->th.th_next_pool);
3431 __kmp_print_structure_team_accum(list, thread->th.th_team);
3432 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3436 __kmp_printf(
"Threads array is not allocated.\n");
3440 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3442 if (__kmp_root != NULL) {
3444 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3445 kmp_root_t
const *root = __kmp_root[gtid];
3447 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3448 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3449 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3450 __kmp_print_structure_thread(
" Uber Thread: ",
3451 root->r.r_uber_thread);
3452 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3453 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3454 __kmp_printf(
" In Parallel: %2d\n",
3455 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3457 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3458 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3462 __kmp_printf(
"Ubers array is not allocated.\n");
3465 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3467 while (list->next != NULL) {
3468 kmp_team_p
const *team = list->entry;
3470 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3471 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3472 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3473 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3474 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3475 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3476 for (i = 0; i < team->t.t_nproc; ++i) {
3477 __kmp_printf(
" Thread %2d: ", i);
3478 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3480 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3486 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3488 __kmp_print_structure_thread(
"Thread pool: ",
3489 CCAST(kmp_info_t *, __kmp_thread_pool));
3490 __kmp_print_structure_team(
"Team pool: ",
3491 CCAST(kmp_team_t *, __kmp_team_pool));
3495 while (list != NULL) {
3496 kmp_team_list_item_t *item = list;
3498 KMP_INTERNAL_FREE(item);
3507 static const unsigned __kmp_primes[] = {
3508 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3509 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3510 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3511 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3512 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3513 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3514 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3515 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3516 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3517 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3518 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3522 unsigned short __kmp_get_random(kmp_info_t *thread) {
3523 unsigned x = thread->th.th_x;
3524 unsigned short r = x >> 16;
3526 thread->th.th_x = x * thread->th.th_a + 1;
3528 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3529 thread->th.th_info.ds.ds_tid, r));
3535 void __kmp_init_random(kmp_info_t *thread) {
3536 unsigned seed = thread->th.th_info.ds.ds_tid;
3539 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3540 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3542 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3548 static int __kmp_reclaim_dead_roots(
void) {
3551 for (i = 0; i < __kmp_threads_capacity; ++i) {
3552 if (KMP_UBER_GTID(i) &&
3553 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3556 r += __kmp_unregister_root_other_thread(i);
3581 static int __kmp_expand_threads(
int nNeed) {
3583 int minimumRequiredCapacity;
3585 kmp_info_t **newThreads;
3586 kmp_root_t **newRoot;
3592 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3595 added = __kmp_reclaim_dead_roots();
3624 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3627 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3631 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3633 newCapacity = __kmp_threads_capacity;
3635 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3636 : __kmp_sys_max_nth;
3637 }
while (newCapacity < minimumRequiredCapacity);
3638 newThreads = (kmp_info_t **)__kmp_allocate(
3639 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3641 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3642 KMP_MEMCPY(newThreads, __kmp_threads,
3643 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3644 KMP_MEMCPY(newRoot, __kmp_root,
3645 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3647 kmp_info_t **temp_threads = __kmp_threads;
3648 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3649 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3650 __kmp_free(temp_threads);
3651 added += newCapacity - __kmp_threads_capacity;
3652 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3654 if (newCapacity > __kmp_tp_capacity) {
3655 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3656 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3657 __kmp_threadprivate_resize_cache(newCapacity);
3659 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3661 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3670 int __kmp_register_root(
int initial_thread) {
3671 kmp_info_t *root_thread;
3675 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3676 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3693 capacity = __kmp_threads_capacity;
3694 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3699 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3700 if (__kmp_tp_cached) {
3701 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3702 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3703 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3705 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3713 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3717 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3718 KMP_ASSERT(gtid < __kmp_threads_capacity);
3722 TCW_4(__kmp_nth, __kmp_nth + 1);
3726 if (__kmp_adjust_gtid_mode) {
3727 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3728 if (TCR_4(__kmp_gtid_mode) != 2) {
3729 TCW_4(__kmp_gtid_mode, 2);
3732 if (TCR_4(__kmp_gtid_mode) != 1) {
3733 TCW_4(__kmp_gtid_mode, 1);
3738 #ifdef KMP_ADJUST_BLOCKTIME
3741 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3742 if (__kmp_nth > __kmp_avail_proc) {
3743 __kmp_zero_bt = TRUE;
3749 if (!(root = __kmp_root[gtid])) {
3750 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3751 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3754 #if KMP_STATS_ENABLED
3756 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3757 __kmp_stats_thread_ptr->startLife();
3758 KMP_SET_THREAD_STATE(SERIAL_REGION);
3761 __kmp_initialize_root(root);
3764 if (root->r.r_uber_thread) {
3765 root_thread = root->r.r_uber_thread;
3767 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3768 if (__kmp_storage_map) {
3769 __kmp_print_thread_storage_map(root_thread, gtid);
3771 root_thread->th.th_info.ds.ds_gtid = gtid;
3773 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3775 root_thread->th.th_root = root;
3776 if (__kmp_env_consistency_check) {
3777 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3780 __kmp_initialize_fast_memory(root_thread);
3784 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3785 __kmp_initialize_bget(root_thread);
3787 __kmp_init_random(root_thread);
3791 if (!root_thread->th.th_serial_team) {
3792 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3793 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3794 root_thread->th.th_serial_team =
3795 __kmp_allocate_team(root, 1, 1,
3802 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3804 KMP_ASSERT(root_thread->th.th_serial_team);
3805 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3806 root_thread->th.th_serial_team));
3809 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3811 root->r.r_root_team->t.t_threads[0] = root_thread;
3812 root->r.r_hot_team->t.t_threads[0] = root_thread;
3813 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3815 root_thread->th.th_serial_team->t.t_serialized = 0;
3816 root->r.r_uber_thread = root_thread;
3819 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3820 TCW_4(__kmp_init_gtid, TRUE);
3823 __kmp_gtid_set_specific(gtid);
3826 __kmp_itt_thread_name(gtid);
3829 #ifdef KMP_TDATA_GTID
3832 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3833 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3835 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3837 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3838 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3839 KMP_INIT_BARRIER_STATE));
3842 for (b = 0; b < bs_last_barrier; ++b) {
3843 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3845 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3849 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3850 KMP_INIT_BARRIER_STATE);
3852 #if KMP_AFFINITY_SUPPORTED
3854 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3855 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3856 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3857 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3859 if (TCR_4(__kmp_init_middle)) {
3860 __kmp_affinity_set_init_mask(gtid, TRUE);
3864 root_thread->th.th_def_allocator = __kmp_def_allocator;
3865 root_thread->th.th_prev_level = 0;
3866 root_thread->th.th_prev_num_threads = 1;
3869 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3870 tmp->cg_root = root_thread;
3871 tmp->cg_thread_limit = __kmp_cg_max_nth;
3872 tmp->cg_nthreads = 1;
3873 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3874 " cg_nthreads init to 1\n",
3877 root_thread->th.th_cg_roots = tmp;
3879 __kmp_root_counter++;
3882 if (!initial_thread && ompt_enabled.enabled) {
3884 kmp_info_t *root_thread = ompt_get_thread();
3886 ompt_set_thread_state(root_thread, ompt_state_overhead);
3888 if (ompt_enabled.ompt_callback_thread_begin) {
3889 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3890 ompt_thread_initial, __ompt_get_thread_data_internal());
3892 ompt_data_t *task_data;
3893 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3894 if (ompt_enabled.ompt_callback_task_create) {
3895 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3896 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3900 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3905 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3910 #if KMP_NESTED_HOT_TEAMS
3911 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3912 const int max_level) {
3914 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3915 if (!hot_teams || !hot_teams[level].hot_team) {
3918 KMP_DEBUG_ASSERT(level < max_level);
3919 kmp_team_t *team = hot_teams[level].hot_team;
3920 nth = hot_teams[level].hot_team_nth;
3922 if (level < max_level - 1) {
3923 for (i = 0; i < nth; ++i) {
3924 kmp_info_t *th = team->t.t_threads[i];
3925 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3926 if (i > 0 && th->th.th_hot_teams) {
3927 __kmp_free(th->th.th_hot_teams);
3928 th->th.th_hot_teams = NULL;
3932 __kmp_free_team(root, team, NULL);
3939 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3940 kmp_team_t *root_team = root->r.r_root_team;
3941 kmp_team_t *hot_team = root->r.r_hot_team;
3942 int n = hot_team->t.t_nproc;
3945 KMP_DEBUG_ASSERT(!root->r.r_active);
3947 root->r.r_root_team = NULL;
3948 root->r.r_hot_team = NULL;
3951 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3952 #if KMP_NESTED_HOT_TEAMS
3953 if (__kmp_hot_teams_max_level >
3955 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3956 kmp_info_t *th = hot_team->t.t_threads[i];
3957 if (__kmp_hot_teams_max_level > 1) {
3958 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3960 if (th->th.th_hot_teams) {
3961 __kmp_free(th->th.th_hot_teams);
3962 th->th.th_hot_teams = NULL;
3967 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3972 if (__kmp_tasking_mode != tskm_immediate_exec) {
3973 __kmp_wait_to_unref_task_teams();
3979 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3981 (LPVOID) & (root->r.r_uber_thread->th),
3982 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3983 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3987 if (ompt_enabled.ompt_callback_thread_end) {
3988 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3989 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3995 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3996 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
3998 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3999 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4001 __kmp_reap_thread(root->r.r_uber_thread, 1);
4005 root->r.r_uber_thread = NULL;
4007 root->r.r_begin = FALSE;
4012 void __kmp_unregister_root_current_thread(
int gtid) {
4013 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4017 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4018 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4019 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4022 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4025 kmp_root_t *root = __kmp_root[gtid];
4027 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4028 KMP_ASSERT(KMP_UBER_GTID(gtid));
4029 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4030 KMP_ASSERT(root->r.r_active == FALSE);
4035 kmp_info_t *thread = __kmp_threads[gtid];
4036 kmp_team_t *team = thread->th.th_team;
4037 kmp_task_team_t *task_team = thread->th.th_task_team;
4040 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4043 thread->th.ompt_thread_info.state = ompt_state_undefined;
4045 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4049 __kmp_reset_root(gtid, root);
4052 __kmp_gtid_set_specific(KMP_GTID_DNE);
4053 #ifdef KMP_TDATA_GTID
4054 __kmp_gtid = KMP_GTID_DNE;
4059 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4061 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4068 static int __kmp_unregister_root_other_thread(
int gtid) {
4069 kmp_root_t *root = __kmp_root[gtid];
4072 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4073 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4074 KMP_ASSERT(KMP_UBER_GTID(gtid));
4075 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4076 KMP_ASSERT(root->r.r_active == FALSE);
4078 r = __kmp_reset_root(gtid, root);
4080 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4086 void __kmp_task_info() {
4088 kmp_int32 gtid = __kmp_entry_gtid();
4089 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4090 kmp_info_t *this_thr = __kmp_threads[gtid];
4091 kmp_team_t *steam = this_thr->th.th_serial_team;
4092 kmp_team_t *team = this_thr->th.th_team;
4095 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4097 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4098 team->t.t_implicit_task_taskdata[tid].td_parent);
4105 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4106 int tid,
int gtid) {
4110 kmp_info_t *master = team->t.t_threads[0];
4111 KMP_DEBUG_ASSERT(this_thr != NULL);
4112 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4113 KMP_DEBUG_ASSERT(team);
4114 KMP_DEBUG_ASSERT(team->t.t_threads);
4115 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4116 KMP_DEBUG_ASSERT(master);
4117 KMP_DEBUG_ASSERT(master->th.th_root);
4121 TCW_SYNC_PTR(this_thr->th.th_team, team);
4123 this_thr->th.th_info.ds.ds_tid = tid;
4124 this_thr->th.th_set_nproc = 0;
4125 if (__kmp_tasking_mode != tskm_immediate_exec)
4128 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4130 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4132 this_thr->th.th_set_proc_bind = proc_bind_default;
4133 #if KMP_AFFINITY_SUPPORTED
4134 this_thr->th.th_new_place = this_thr->th.th_current_place;
4137 this_thr->th.th_root = master->th.th_root;
4140 this_thr->th.th_team_nproc = team->t.t_nproc;
4141 this_thr->th.th_team_master = master;
4142 this_thr->th.th_team_serialized = team->t.t_serialized;
4143 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4145 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4147 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4148 tid, gtid, this_thr, this_thr->th.th_current_task));
4150 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4153 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4154 tid, gtid, this_thr, this_thr->th.th_current_task));
4159 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4161 this_thr->th.th_local.this_construct = 0;
4163 if (!this_thr->th.th_pri_common) {
4164 this_thr->th.th_pri_common =
4165 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4166 if (__kmp_storage_map) {
4167 __kmp_print_storage_map_gtid(
4168 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4169 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4171 this_thr->th.th_pri_head = NULL;
4174 if (this_thr != master &&
4175 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4177 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4178 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4180 this_thr->th.th_cg_roots->cg_nthreads++;
4181 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4182 " node %p of thread %p to %d\n",
4183 this_thr, this_thr->th.th_cg_roots,
4184 this_thr->th.th_cg_roots->cg_root,
4185 this_thr->th.th_cg_roots->cg_nthreads));
4186 this_thr->th.th_current_task->td_icvs.thread_limit =
4187 this_thr->th.th_cg_roots->cg_thread_limit;
4192 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4195 sizeof(dispatch_private_info_t) *
4196 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4197 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4198 team->t.t_max_nproc));
4199 KMP_ASSERT(dispatch);
4200 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4201 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4203 dispatch->th_disp_index = 0;
4205 dispatch->th_doacross_buf_idx = 0;
4207 if (!dispatch->th_disp_buffer) {
4208 dispatch->th_disp_buffer =
4209 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4211 if (__kmp_storage_map) {
4212 __kmp_print_storage_map_gtid(
4213 gtid, &dispatch->th_disp_buffer[0],
4214 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4216 : __kmp_dispatch_num_buffers],
4217 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4218 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4219 gtid, team->t.t_id, gtid);
4222 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4225 dispatch->th_dispatch_pr_current = 0;
4226 dispatch->th_dispatch_sh_current = 0;
4228 dispatch->th_deo_fcn = 0;
4229 dispatch->th_dxo_fcn = 0;
4232 this_thr->th.th_next_pool = NULL;
4234 if (!this_thr->th.th_task_state_memo_stack) {
4236 this_thr->th.th_task_state_memo_stack =
4237 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4238 this_thr->th.th_task_state_top = 0;
4239 this_thr->th.th_task_state_stack_sz = 4;
4240 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4242 this_thr->th.th_task_state_memo_stack[i] = 0;
4245 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4246 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4256 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4258 kmp_team_t *serial_team;
4259 kmp_info_t *new_thr;
4262 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4263 KMP_DEBUG_ASSERT(root && team);
4264 #if !KMP_NESTED_HOT_TEAMS
4265 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4270 if (__kmp_thread_pool) {
4271 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4272 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4273 if (new_thr == __kmp_thread_pool_insert_pt) {
4274 __kmp_thread_pool_insert_pt = NULL;
4276 TCW_4(new_thr->th.th_in_pool, FALSE);
4279 __kmp_thread_pool_nth--;
4281 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4282 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4283 KMP_ASSERT(!new_thr->th.th_team);
4284 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4285 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4288 __kmp_initialize_info(new_thr, team, new_tid,
4289 new_thr->th.th_info.ds.ds_gtid);
4290 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4292 TCW_4(__kmp_nth, __kmp_nth + 1);
4294 new_thr->th.th_task_state = 0;
4295 new_thr->th.th_task_state_top = 0;
4296 new_thr->th.th_task_state_stack_sz = 4;
4298 #ifdef KMP_ADJUST_BLOCKTIME
4301 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4302 if (__kmp_nth > __kmp_avail_proc) {
4303 __kmp_zero_bt = TRUE;
4312 kmp_balign_t *balign = new_thr->th.th_bar;
4313 for (b = 0; b < bs_last_barrier; ++b)
4314 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4317 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4318 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4325 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4326 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4331 if (!TCR_4(__kmp_init_monitor)) {
4332 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4333 if (!TCR_4(__kmp_init_monitor)) {
4334 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4335 TCW_4(__kmp_init_monitor, 1);
4336 __kmp_create_monitor(&__kmp_monitor);
4337 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4348 while (TCR_4(__kmp_init_monitor) < 2) {
4351 KF_TRACE(10, (
"after monitor thread has started\n"));
4354 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4359 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4360 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4364 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4366 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4368 if (__kmp_storage_map) {
4369 __kmp_print_thread_storage_map(new_thr, new_gtid);
4374 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4375 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4376 new_thr->th.th_serial_team = serial_team =
4377 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4384 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4386 KMP_ASSERT(serial_team);
4387 serial_team->t.t_serialized = 0;
4389 serial_team->t.t_threads[0] = new_thr;
4391 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4395 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4398 __kmp_initialize_fast_memory(new_thr);
4402 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4403 __kmp_initialize_bget(new_thr);
4406 __kmp_init_random(new_thr);
4410 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4411 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4414 kmp_balign_t *balign = new_thr->th.th_bar;
4415 for (b = 0; b < bs_last_barrier; ++b) {
4416 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4417 balign[b].bb.team = NULL;
4418 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4419 balign[b].bb.use_oncore_barrier = 0;
4422 new_thr->th.th_spin_here = FALSE;
4423 new_thr->th.th_next_waiting = 0;
4425 new_thr->th.th_blocking =
false;
4428 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4429 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4430 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4431 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4432 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4435 new_thr->th.th_def_allocator = __kmp_def_allocator;
4436 new_thr->th.th_prev_level = 0;
4437 new_thr->th.th_prev_num_threads = 1;
4440 TCW_4(new_thr->th.th_in_pool, FALSE);
4441 new_thr->th.th_active_in_pool = FALSE;
4442 TCW_4(new_thr->th.th_active, TRUE);
4450 if (__kmp_adjust_gtid_mode) {
4451 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4452 if (TCR_4(__kmp_gtid_mode) != 2) {
4453 TCW_4(__kmp_gtid_mode, 2);
4456 if (TCR_4(__kmp_gtid_mode) != 1) {
4457 TCW_4(__kmp_gtid_mode, 1);
4462 #ifdef KMP_ADJUST_BLOCKTIME
4465 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4466 if (__kmp_nth > __kmp_avail_proc) {
4467 __kmp_zero_bt = TRUE;
4474 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4475 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4477 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4479 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4490 static void __kmp_reinitialize_team(kmp_team_t *team,
4491 kmp_internal_control_t *new_icvs,
4493 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4494 team->t.t_threads[0], team));
4495 KMP_DEBUG_ASSERT(team && new_icvs);
4496 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4497 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4499 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4501 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4502 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4504 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4505 team->t.t_threads[0], team));
4511 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4512 kmp_internal_control_t *new_icvs,
4514 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4517 KMP_DEBUG_ASSERT(team);
4518 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4519 KMP_DEBUG_ASSERT(team->t.t_threads);
4522 team->t.t_master_tid = 0;
4524 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4525 team->t.t_nproc = new_nproc;
4528 team->t.t_next_pool = NULL;
4532 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4533 team->t.t_invoke = NULL;
4536 team->t.t_sched.sched = new_icvs->sched.sched;
4538 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4539 team->t.t_fp_control_saved = FALSE;
4540 team->t.t_x87_fpu_control_word = 0;
4541 team->t.t_mxcsr = 0;
4544 team->t.t_construct = 0;
4546 team->t.t_ordered.dt.t_value = 0;
4547 team->t.t_master_active = FALSE;
4549 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4552 team->t.t_copypriv_data = NULL;
4555 team->t.t_copyin_counter = 0;
4558 team->t.t_control_stack_top = NULL;
4560 __kmp_reinitialize_team(team, new_icvs, loc);
4563 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4566 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4569 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4570 if (KMP_AFFINITY_CAPABLE()) {
4572 if (old_mask != NULL) {
4573 status = __kmp_get_system_affinity(old_mask, TRUE);
4576 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4580 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4585 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4591 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4593 kmp_info_t *master_th = team->t.t_threads[0];
4594 KMP_DEBUG_ASSERT(master_th != NULL);
4595 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4596 int first_place = master_th->th.th_first_place;
4597 int last_place = master_th->th.th_last_place;
4598 int masters_place = master_th->th.th_current_place;
4599 team->t.t_first_place = first_place;
4600 team->t.t_last_place = last_place;
4602 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4603 "bound to place %d partition = [%d,%d]\n",
4604 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4605 team->t.t_id, masters_place, first_place, last_place));
4607 switch (proc_bind) {
4609 case proc_bind_default:
4612 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4615 case proc_bind_master: {
4617 int n_th = team->t.t_nproc;
4618 for (f = 1; f < n_th; f++) {
4619 kmp_info_t *th = team->t.t_threads[f];
4620 KMP_DEBUG_ASSERT(th != NULL);
4621 th->th.th_first_place = first_place;
4622 th->th.th_last_place = last_place;
4623 th->th.th_new_place = masters_place;
4625 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4626 team->t.t_display_affinity != 1) {
4627 team->t.t_display_affinity = 1;
4631 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d "
4632 "partition = [%d,%d]\n",
4633 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4634 f, masters_place, first_place, last_place));
4638 case proc_bind_close: {
4640 int n_th = team->t.t_nproc;
4642 if (first_place <= last_place) {
4643 n_places = last_place - first_place + 1;
4645 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4647 if (n_th <= n_places) {
4648 int place = masters_place;
4649 for (f = 1; f < n_th; f++) {
4650 kmp_info_t *th = team->t.t_threads[f];
4651 KMP_DEBUG_ASSERT(th != NULL);
4653 if (place == last_place) {
4654 place = first_place;
4655 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4660 th->th.th_first_place = first_place;
4661 th->th.th_last_place = last_place;
4662 th->th.th_new_place = place;
4664 if (__kmp_display_affinity && place != th->th.th_current_place &&
4665 team->t.t_display_affinity != 1) {
4666 team->t.t_display_affinity = 1;
4670 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4671 "partition = [%d,%d]\n",
4672 __kmp_gtid_from_thread(team->t.t_threads[f]),
4673 team->t.t_id, f, place, first_place, last_place));
4676 int S, rem, gap, s_count;
4677 S = n_th / n_places;
4679 rem = n_th - (S * n_places);
4680 gap = rem > 0 ? n_places / rem : n_places;
4681 int place = masters_place;
4683 for (f = 0; f < n_th; f++) {
4684 kmp_info_t *th = team->t.t_threads[f];
4685 KMP_DEBUG_ASSERT(th != NULL);
4687 th->th.th_first_place = first_place;
4688 th->th.th_last_place = last_place;
4689 th->th.th_new_place = place;
4691 if (__kmp_display_affinity && place != th->th.th_current_place &&
4692 team->t.t_display_affinity != 1) {
4693 team->t.t_display_affinity = 1;
4698 if ((s_count == S) && rem && (gap_ct == gap)) {
4700 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4702 if (place == last_place) {
4703 place = first_place;
4704 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4712 }
else if (s_count == S) {
4713 if (place == last_place) {
4714 place = first_place;
4715 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4725 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4726 "partition = [%d,%d]\n",
4727 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4728 th->th.th_new_place, first_place, last_place));
4730 KMP_DEBUG_ASSERT(place == masters_place);
4734 case proc_bind_spread: {
4736 int n_th = team->t.t_nproc;
4739 if (first_place <= last_place) {
4740 n_places = last_place - first_place + 1;
4742 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4744 if (n_th <= n_places) {
4747 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4748 int S = n_places / n_th;
4749 int s_count, rem, gap, gap_ct;
4751 place = masters_place;
4752 rem = n_places - n_th * S;
4753 gap = rem ? n_th / rem : 1;
4756 if (update_master_only == 1)
4758 for (f = 0; f < thidx; f++) {
4759 kmp_info_t *th = team->t.t_threads[f];
4760 KMP_DEBUG_ASSERT(th != NULL);
4762 th->th.th_first_place = place;
4763 th->th.th_new_place = place;
4765 if (__kmp_display_affinity && place != th->th.th_current_place &&
4766 team->t.t_display_affinity != 1) {
4767 team->t.t_display_affinity = 1;
4771 while (s_count < S) {
4772 if (place == last_place) {
4773 place = first_place;
4774 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4781 if (rem && (gap_ct == gap)) {
4782 if (place == last_place) {
4783 place = first_place;
4784 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4792 th->th.th_last_place = place;
4795 if (place == last_place) {
4796 place = first_place;
4797 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4804 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4805 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4806 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4807 f, th->th.th_new_place, th->th.th_first_place,
4808 th->th.th_last_place, __kmp_affinity_num_masks));
4814 double current =
static_cast<double>(masters_place);
4816 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4821 if (update_master_only == 1)
4823 for (f = 0; f < thidx; f++) {
4824 first =
static_cast<int>(current);
4825 last =
static_cast<int>(current + spacing) - 1;
4826 KMP_DEBUG_ASSERT(last >= first);
4827 if (first >= n_places) {
4828 if (masters_place) {
4831 if (first == (masters_place + 1)) {
4832 KMP_DEBUG_ASSERT(f == n_th);
4835 if (last == masters_place) {
4836 KMP_DEBUG_ASSERT(f == (n_th - 1));
4840 KMP_DEBUG_ASSERT(f == n_th);
4845 if (last >= n_places) {
4846 last = (n_places - 1);
4851 KMP_DEBUG_ASSERT(0 <= first);
4852 KMP_DEBUG_ASSERT(n_places > first);
4853 KMP_DEBUG_ASSERT(0 <= last);
4854 KMP_DEBUG_ASSERT(n_places > last);
4855 KMP_DEBUG_ASSERT(last_place >= first_place);
4856 th = team->t.t_threads[f];
4857 KMP_DEBUG_ASSERT(th);
4858 th->th.th_first_place = first;
4859 th->th.th_new_place = place;
4860 th->th.th_last_place = last;
4862 if (__kmp_display_affinity && place != th->th.th_current_place &&
4863 team->t.t_display_affinity != 1) {
4864 team->t.t_display_affinity = 1;
4868 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4869 "partition = [%d,%d], spacing = %.4f\n",
4870 __kmp_gtid_from_thread(team->t.t_threads[f]),
4871 team->t.t_id, f, th->th.th_new_place,
4872 th->th.th_first_place, th->th.th_last_place, spacing));
4876 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4878 int S, rem, gap, s_count;
4879 S = n_th / n_places;
4881 rem = n_th - (S * n_places);
4882 gap = rem > 0 ? n_places / rem : n_places;
4883 int place = masters_place;
4886 if (update_master_only == 1)
4888 for (f = 0; f < thidx; f++) {
4889 kmp_info_t *th = team->t.t_threads[f];
4890 KMP_DEBUG_ASSERT(th != NULL);
4892 th->th.th_first_place = place;
4893 th->th.th_last_place = place;
4894 th->th.th_new_place = place;
4896 if (__kmp_display_affinity && place != th->th.th_current_place &&
4897 team->t.t_display_affinity != 1) {
4898 team->t.t_display_affinity = 1;
4903 if ((s_count == S) && rem && (gap_ct == gap)) {
4905 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4907 if (place == last_place) {
4908 place = first_place;
4909 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4917 }
else if (s_count == S) {
4918 if (place == last_place) {
4919 place = first_place;
4920 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4929 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4930 "partition = [%d,%d]\n",
4931 __kmp_gtid_from_thread(team->t.t_threads[f]),
4932 team->t.t_id, f, th->th.th_new_place,
4933 th->th.th_first_place, th->th.th_last_place));
4935 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4943 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4951 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4953 ompt_data_t ompt_parallel_data,
4956 kmp_proc_bind_t new_proc_bind,
4958 kmp_internal_control_t *new_icvs,
4959 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4960 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4963 int use_hot_team = !root->r.r_active;
4966 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4967 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4968 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4971 #if KMP_NESTED_HOT_TEAMS
4972 kmp_hot_team_ptr_t *hot_teams;
4974 team = master->th.th_team;
4975 level = team->t.t_active_level;
4976 if (master->th.th_teams_microtask) {
4977 if (master->th.th_teams_size.nteams > 1 &&
4980 (microtask_t)__kmp_teams_master ||
4981 master->th.th_teams_level <
4987 hot_teams = master->th.th_hot_teams;
4988 if (level < __kmp_hot_teams_max_level && hot_teams &&
4998 if (use_hot_team && new_nproc > 1) {
4999 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5000 #if KMP_NESTED_HOT_TEAMS
5001 team = hot_teams[level].hot_team;
5003 team = root->r.r_hot_team;
5006 if (__kmp_tasking_mode != tskm_immediate_exec) {
5007 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5008 "task_team[1] = %p before reinit\n",
5009 team->t.t_task_team[0], team->t.t_task_team[1]));
5016 if (team->t.t_nproc == new_nproc) {
5017 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5020 if (team->t.t_size_changed == -1) {
5021 team->t.t_size_changed = 1;
5023 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5027 kmp_r_sched_t new_sched = new_icvs->sched;
5029 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5031 __kmp_reinitialize_team(team, new_icvs,
5032 root->r.r_uber_thread->th.th_ident);
5034 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5035 team->t.t_threads[0], team));
5036 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5039 #if KMP_AFFINITY_SUPPORTED
5040 if ((team->t.t_size_changed == 0) &&
5041 (team->t.t_proc_bind == new_proc_bind)) {
5042 if (new_proc_bind == proc_bind_spread) {
5043 __kmp_partition_places(
5046 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5047 "proc_bind = %d, partition = [%d,%d]\n",
5048 team->t.t_id, new_proc_bind, team->t.t_first_place,
5049 team->t.t_last_place));
5051 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5052 __kmp_partition_places(team);
5055 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5058 }
else if (team->t.t_nproc > new_nproc) {
5060 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5063 team->t.t_size_changed = 1;
5064 #if KMP_NESTED_HOT_TEAMS
5065 if (__kmp_hot_teams_mode == 0) {
5068 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5069 hot_teams[level].hot_team_nth = new_nproc;
5070 #endif // KMP_NESTED_HOT_TEAMS
5072 for (f = new_nproc; f < team->t.t_nproc; f++) {
5073 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5074 if (__kmp_tasking_mode != tskm_immediate_exec) {
5077 team->t.t_threads[f]->th.th_task_team = NULL;
5079 __kmp_free_thread(team->t.t_threads[f]);
5080 team->t.t_threads[f] = NULL;
5082 #if KMP_NESTED_HOT_TEAMS
5087 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5088 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5089 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5090 for (
int b = 0; b < bs_last_barrier; ++b) {
5091 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5092 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5094 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5098 #endif // KMP_NESTED_HOT_TEAMS
5099 team->t.t_nproc = new_nproc;
5101 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5102 __kmp_reinitialize_team(team, new_icvs,
5103 root->r.r_uber_thread->th.th_ident);
5106 for (f = 0; f < new_nproc; ++f) {
5107 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5112 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5113 team->t.t_threads[0], team));
5115 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5118 for (f = 0; f < team->t.t_nproc; f++) {
5119 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5120 team->t.t_threads[f]->th.th_team_nproc ==
5126 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5127 #if KMP_AFFINITY_SUPPORTED
5128 __kmp_partition_places(team);
5132 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5133 kmp_affin_mask_t *old_mask;
5134 if (KMP_AFFINITY_CAPABLE()) {
5135 KMP_CPU_ALLOC(old_mask);
5140 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5143 team->t.t_size_changed = 1;
5145 #if KMP_NESTED_HOT_TEAMS
5146 int avail_threads = hot_teams[level].hot_team_nth;
5147 if (new_nproc < avail_threads)
5148 avail_threads = new_nproc;
5149 kmp_info_t **other_threads = team->t.t_threads;
5150 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5154 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5155 for (b = 0; b < bs_last_barrier; ++b) {
5156 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5157 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5159 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5163 if (hot_teams[level].hot_team_nth >= new_nproc) {
5166 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5167 team->t.t_nproc = new_nproc;
5173 hot_teams[level].hot_team_nth = new_nproc;
5174 #endif // KMP_NESTED_HOT_TEAMS
5175 if (team->t.t_max_nproc < new_nproc) {
5177 __kmp_reallocate_team_arrays(team, new_nproc);
5178 __kmp_reinitialize_team(team, new_icvs, NULL);
5181 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5186 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5190 for (f = team->t.t_nproc; f < new_nproc; f++) {
5191 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5192 KMP_DEBUG_ASSERT(new_worker);
5193 team->t.t_threads[f] = new_worker;
5196 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5197 "join=%llu, plain=%llu\n",
5198 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5199 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5200 team->t.t_bar[bs_plain_barrier].b_arrived));
5204 kmp_balign_t *balign = new_worker->th.th_bar;
5205 for (b = 0; b < bs_last_barrier; ++b) {
5206 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5207 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5208 KMP_BARRIER_PARENT_FLAG);
5210 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5216 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5217 if (KMP_AFFINITY_CAPABLE()) {
5219 __kmp_set_system_affinity(old_mask, TRUE);
5220 KMP_CPU_FREE(old_mask);
5223 #if KMP_NESTED_HOT_TEAMS
5225 #endif // KMP_NESTED_HOT_TEAMS
5227 int old_nproc = team->t.t_nproc;
5229 __kmp_initialize_team(team, new_nproc, new_icvs,
5230 root->r.r_uber_thread->th.th_ident);
5233 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5234 for (f = 0; f < team->t.t_nproc; ++f)
5235 __kmp_initialize_info(team->t.t_threads[f], team, f,
5236 __kmp_gtid_from_tid(f, team));
5244 for (f = old_nproc; f < team->t.t_nproc; ++f)
5245 team->t.t_threads[f]->th.th_task_state =
5246 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5249 team->t.t_threads[0]->th.th_task_state;
5250 for (f = old_nproc; f < team->t.t_nproc; ++f)
5251 team->t.t_threads[f]->th.th_task_state = old_state;
5255 for (f = 0; f < team->t.t_nproc; ++f) {
5256 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5257 team->t.t_threads[f]->th.th_team_nproc ==
5263 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5264 #if KMP_AFFINITY_SUPPORTED
5265 __kmp_partition_places(team);
5271 kmp_info_t *master = team->t.t_threads[0];
5272 if (master->th.th_teams_microtask) {
5273 for (f = 1; f < new_nproc; ++f) {
5275 kmp_info_t *thr = team->t.t_threads[f];
5276 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5277 thr->th.th_teams_level = master->th.th_teams_level;
5278 thr->th.th_teams_size = master->th.th_teams_size;
5282 #if KMP_NESTED_HOT_TEAMS
5286 for (f = 1; f < new_nproc; ++f) {
5287 kmp_info_t *thr = team->t.t_threads[f];
5289 kmp_balign_t *balign = thr->th.th_bar;
5290 for (b = 0; b < bs_last_barrier; ++b) {
5291 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5292 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5294 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5299 #endif // KMP_NESTED_HOT_TEAMS
5302 __kmp_alloc_argv_entries(argc, team, TRUE);
5303 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5307 KF_TRACE(10, (
" hot_team = %p\n", team));
5310 if (__kmp_tasking_mode != tskm_immediate_exec) {
5311 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5312 "task_team[1] = %p after reinit\n",
5313 team->t.t_task_team[0], team->t.t_task_team[1]));
5318 __ompt_team_assign_id(team, ompt_parallel_data);
5328 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5331 if (team->t.t_max_nproc >= max_nproc) {
5333 __kmp_team_pool = team->t.t_next_pool;
5336 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5338 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5339 "task_team[1] %p to NULL\n",
5340 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5341 team->t.t_task_team[0] = NULL;
5342 team->t.t_task_team[1] = NULL;
5345 __kmp_alloc_argv_entries(argc, team, TRUE);
5346 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5349 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5350 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5353 for (b = 0; b < bs_last_barrier; ++b) {
5354 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5356 team->t.t_bar[b].b_master_arrived = 0;
5357 team->t.t_bar[b].b_team_arrived = 0;
5363 team->t.t_proc_bind = new_proc_bind;
5366 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5370 __ompt_team_assign_id(team, ompt_parallel_data);
5382 team = __kmp_reap_team(team);
5383 __kmp_team_pool = team;
5388 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5391 team->t.t_max_nproc = max_nproc;
5394 __kmp_allocate_team_arrays(team, max_nproc);
5396 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5397 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5399 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5401 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5402 team->t.t_task_team[0] = NULL;
5404 team->t.t_task_team[1] = NULL;
5407 if (__kmp_storage_map) {
5408 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5412 __kmp_alloc_argv_entries(argc, team, FALSE);
5413 team->t.t_argc = argc;
5416 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5417 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5420 for (b = 0; b < bs_last_barrier; ++b) {
5421 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5423 team->t.t_bar[b].b_master_arrived = 0;
5424 team->t.t_bar[b].b_team_arrived = 0;
5430 team->t.t_proc_bind = new_proc_bind;
5434 __ompt_team_assign_id(team, ompt_parallel_data);
5435 team->t.ompt_serialized_team_info = NULL;
5440 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5451 void __kmp_free_team(kmp_root_t *root,
5452 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5454 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5458 KMP_DEBUG_ASSERT(root);
5459 KMP_DEBUG_ASSERT(team);
5460 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5461 KMP_DEBUG_ASSERT(team->t.t_threads);
5463 int use_hot_team = team == root->r.r_hot_team;
5464 #if KMP_NESTED_HOT_TEAMS
5466 kmp_hot_team_ptr_t *hot_teams;
5468 level = team->t.t_active_level - 1;
5469 if (master->th.th_teams_microtask) {
5470 if (master->th.th_teams_size.nteams > 1) {
5474 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5475 master->th.th_teams_level == team->t.t_level) {
5480 hot_teams = master->th.th_hot_teams;
5481 if (level < __kmp_hot_teams_max_level) {
5482 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5486 #endif // KMP_NESTED_HOT_TEAMS
5489 TCW_SYNC_PTR(team->t.t_pkfn,
5492 team->t.t_copyin_counter = 0;
5497 if (!use_hot_team) {
5498 if (__kmp_tasking_mode != tskm_immediate_exec) {
5500 for (f = 1; f < team->t.t_nproc; ++f) {
5501 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5502 kmp_info_t *th = team->t.t_threads[f];
5503 volatile kmp_uint32 *state = &th->th.th_reap_state;
5504 while (*state != KMP_SAFE_TO_REAP) {
5508 if (!__kmp_is_thread_alive(th, &ecode)) {
5509 *state = KMP_SAFE_TO_REAP;
5514 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5515 if (fl.is_sleeping())
5516 fl.resume(__kmp_gtid_from_thread(th));
5523 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5524 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5525 if (task_team != NULL) {
5526 for (f = 0; f < team->t.t_nproc; ++f) {
5527 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5528 team->t.t_threads[f]->th.th_task_team = NULL;
5532 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5533 __kmp_get_gtid(), task_team, team->t.t_id));
5534 #if KMP_NESTED_HOT_TEAMS
5535 __kmp_free_task_team(master, task_team);
5537 team->t.t_task_team[tt_idx] = NULL;
5543 team->t.t_parent = NULL;
5544 team->t.t_level = 0;
5545 team->t.t_active_level = 0;
5548 for (f = 1; f < team->t.t_nproc; ++f) {
5549 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5550 __kmp_free_thread(team->t.t_threads[f]);
5551 team->t.t_threads[f] = NULL;
5556 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5557 __kmp_team_pool = (
volatile kmp_team_t *)team;
5560 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5561 team->t.t_threads[1]->th.th_cg_roots);
5562 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5564 for (f = 1; f < team->t.t_nproc; ++f) {
5565 kmp_info_t *thr = team->t.t_threads[f];
5566 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5567 thr->th.th_cg_roots->cg_root == thr);
5569 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5570 thr->th.th_cg_roots = tmp->up;
5571 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5572 " up to node %p. cg_nthreads was %d\n",
5573 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5576 if (thr->th.th_cg_roots)
5577 thr->th.th_current_task->td_icvs.thread_limit =
5578 thr->th.th_cg_roots->cg_thread_limit;
5587 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5588 kmp_team_t *next_pool = team->t.t_next_pool;
5590 KMP_DEBUG_ASSERT(team);
5591 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5592 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5593 KMP_DEBUG_ASSERT(team->t.t_threads);
5594 KMP_DEBUG_ASSERT(team->t.t_argv);
5599 __kmp_free_team_arrays(team);
5600 if (team->t.t_argv != &team->t.t_inline_argv[0])
5601 __kmp_free((
void *)team->t.t_argv);
5633 void __kmp_free_thread(kmp_info_t *this_th) {
5637 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5638 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5640 KMP_DEBUG_ASSERT(this_th);
5645 kmp_balign_t *balign = this_th->th.th_bar;
5646 for (b = 0; b < bs_last_barrier; ++b) {
5647 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5648 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5649 balign[b].bb.team = NULL;
5650 balign[b].bb.leaf_kids = 0;
5652 this_th->th.th_task_state = 0;
5653 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5656 TCW_PTR(this_th->th.th_team, NULL);
5657 TCW_PTR(this_th->th.th_root, NULL);
5658 TCW_PTR(this_th->th.th_dispatch, NULL);
5660 while (this_th->th.th_cg_roots) {
5661 this_th->th.th_cg_roots->cg_nthreads--;
5662 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5663 " %p of thread %p to %d\n",
5664 this_th, this_th->th.th_cg_roots,
5665 this_th->th.th_cg_roots->cg_root,
5666 this_th->th.th_cg_roots->cg_nthreads));
5667 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5668 if (tmp->cg_root == this_th) {
5669 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5671 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5672 this_th->th.th_cg_roots = tmp->up;
5675 this_th->th.th_cg_roots = NULL;
5685 __kmp_free_implicit_task(this_th);
5686 this_th->th.th_current_task = NULL;
5690 gtid = this_th->th.th_info.ds.ds_gtid;
5691 if (__kmp_thread_pool_insert_pt != NULL) {
5692 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5693 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5694 __kmp_thread_pool_insert_pt = NULL;
5703 if (__kmp_thread_pool_insert_pt != NULL) {
5704 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5706 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5708 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5709 scan = &((*scan)->th.th_next_pool))
5714 TCW_PTR(this_th->th.th_next_pool, *scan);
5715 __kmp_thread_pool_insert_pt = *scan = this_th;
5716 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5717 (this_th->th.th_info.ds.ds_gtid <
5718 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5719 TCW_4(this_th->th.th_in_pool, TRUE);
5720 __kmp_thread_pool_nth++;
5722 TCW_4(__kmp_nth, __kmp_nth - 1);
5724 #ifdef KMP_ADJUST_BLOCKTIME
5727 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5728 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5729 if (__kmp_nth <= __kmp_avail_proc) {
5730 __kmp_zero_bt = FALSE;
5740 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5741 int gtid = this_thr->th.th_info.ds.ds_gtid;
5743 kmp_team_t *(*
volatile pteam);
5746 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5748 if (__kmp_env_consistency_check) {
5749 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5753 ompt_data_t *thread_data;
5754 if (ompt_enabled.enabled) {
5755 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5756 *thread_data = ompt_data_none;
5758 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5759 this_thr->th.ompt_thread_info.wait_id = 0;
5760 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5761 if (ompt_enabled.ompt_callback_thread_begin) {
5762 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5763 ompt_thread_worker, thread_data);
5769 if (ompt_enabled.enabled) {
5770 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5774 while (!TCR_4(__kmp_global.g.g_done)) {
5775 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5779 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5782 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5785 if (ompt_enabled.enabled) {
5786 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5790 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5793 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5795 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5798 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5799 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5800 (*pteam)->t.t_pkfn));
5802 updateHWFPControl(*pteam);
5805 if (ompt_enabled.enabled) {
5806 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5810 rc = (*pteam)->t.t_invoke(gtid);
5814 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5815 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5816 (*pteam)->t.t_pkfn));
5819 if (ompt_enabled.enabled) {
5821 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5823 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5827 __kmp_join_barrier(gtid);
5830 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5833 if (ompt_enabled.ompt_callback_thread_end) {
5834 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5838 this_thr->th.th_task_team = NULL;
5840 __kmp_common_destroy_gtid(gtid);
5842 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5849 void __kmp_internal_end_dest(
void *specific_gtid) {
5850 #if KMP_COMPILER_ICC
5851 #pragma warning(push)
5852 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5856 int gtid = (kmp_intptr_t)specific_gtid - 1;
5857 #if KMP_COMPILER_ICC
5858 #pragma warning(pop)
5861 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5874 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5875 __kmp_gtid_set_specific(gtid);
5876 #ifdef KMP_TDATA_GTID
5879 __kmp_internal_end_thread(gtid);
5882 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5888 __attribute__((destructor)) void __kmp_internal_end_dtor(
void) {
5889 __kmp_internal_end_atexit();
5892 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5898 void __kmp_internal_end_atexit(
void) {
5899 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5923 __kmp_internal_end_library(-1);
5925 __kmp_close_console();
5929 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5934 KMP_DEBUG_ASSERT(thread != NULL);
5936 gtid = thread->th.th_info.ds.ds_gtid;
5939 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5942 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5946 ANNOTATE_HAPPENS_BEFORE(thread);
5947 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5948 __kmp_release_64(&flag);
5952 __kmp_reap_worker(thread);
5964 if (thread->th.th_active_in_pool) {
5965 thread->th.th_active_in_pool = FALSE;
5966 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5967 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5971 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5972 --__kmp_thread_pool_nth;
5975 __kmp_free_implicit_task(thread);
5979 __kmp_free_fast_memory(thread);
5982 __kmp_suspend_uninitialize_thread(thread);
5984 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5985 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5990 #ifdef KMP_ADJUST_BLOCKTIME
5993 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5994 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5995 if (__kmp_nth <= __kmp_avail_proc) {
5996 __kmp_zero_bt = FALSE;
6002 if (__kmp_env_consistency_check) {
6003 if (thread->th.th_cons) {
6004 __kmp_free_cons_stack(thread->th.th_cons);
6005 thread->th.th_cons = NULL;
6009 if (thread->th.th_pri_common != NULL) {
6010 __kmp_free(thread->th.th_pri_common);
6011 thread->th.th_pri_common = NULL;
6014 if (thread->th.th_task_state_memo_stack != NULL) {
6015 __kmp_free(thread->th.th_task_state_memo_stack);
6016 thread->th.th_task_state_memo_stack = NULL;
6020 if (thread->th.th_local.bget_data != NULL) {
6021 __kmp_finalize_bget(thread);
6025 #if KMP_AFFINITY_SUPPORTED
6026 if (thread->th.th_affin_mask != NULL) {
6027 KMP_CPU_FREE(thread->th.th_affin_mask);
6028 thread->th.th_affin_mask = NULL;
6032 #if KMP_USE_HIER_SCHED
6033 if (thread->th.th_hier_bar_data != NULL) {
6034 __kmp_free(thread->th.th_hier_bar_data);
6035 thread->th.th_hier_bar_data = NULL;
6039 __kmp_reap_team(thread->th.th_serial_team);
6040 thread->th.th_serial_team = NULL;
6047 static void __kmp_internal_end(
void) {
6051 __kmp_unregister_library();
6058 __kmp_reclaim_dead_roots();
6062 for (i = 0; i < __kmp_threads_capacity; i++)
6064 if (__kmp_root[i]->r.r_active)
6067 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6069 if (i < __kmp_threads_capacity) {
6081 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6082 if (TCR_4(__kmp_init_monitor)) {
6083 __kmp_reap_monitor(&__kmp_monitor);
6084 TCW_4(__kmp_init_monitor, 0);
6086 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6087 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6088 #endif // KMP_USE_MONITOR
6093 for (i = 0; i < __kmp_threads_capacity; i++) {
6094 if (__kmp_root[i]) {
6097 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6106 while (__kmp_thread_pool != NULL) {
6108 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6109 __kmp_thread_pool = thread->th.th_next_pool;
6111 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6112 thread->th.th_next_pool = NULL;
6113 thread->th.th_in_pool = FALSE;
6114 __kmp_reap_thread(thread, 0);
6116 __kmp_thread_pool_insert_pt = NULL;
6119 while (__kmp_team_pool != NULL) {
6121 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6122 __kmp_team_pool = team->t.t_next_pool;
6124 team->t.t_next_pool = NULL;
6125 __kmp_reap_team(team);
6128 __kmp_reap_task_teams();
6135 for (i = 0; i < __kmp_threads_capacity; i++) {
6136 kmp_info_t *thr = __kmp_threads[i];
6137 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6142 for (i = 0; i < __kmp_threads_capacity; ++i) {
6149 TCW_SYNC_4(__kmp_init_common, FALSE);
6151 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6159 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6160 if (TCR_4(__kmp_init_monitor)) {
6161 __kmp_reap_monitor(&__kmp_monitor);
6162 TCW_4(__kmp_init_monitor, 0);
6164 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6165 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6168 TCW_4(__kmp_init_gtid, FALSE);
6177 void __kmp_internal_end_library(
int gtid_req) {
6184 if (__kmp_global.g.g_abort) {
6185 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6189 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6190 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6198 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6200 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6201 if (gtid == KMP_GTID_SHUTDOWN) {
6202 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6203 "already shutdown\n"));
6205 }
else if (gtid == KMP_GTID_MONITOR) {
6206 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6207 "registered, or system shutdown\n"));
6209 }
else if (gtid == KMP_GTID_DNE) {
6210 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6213 }
else if (KMP_UBER_GTID(gtid)) {
6215 if (__kmp_root[gtid]->r.r_active) {
6216 __kmp_global.g.g_abort = -1;
6217 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6219 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6225 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6226 __kmp_unregister_root_current_thread(gtid);
6233 #ifdef DUMP_DEBUG_ON_EXIT
6234 if (__kmp_debug_buf)
6235 __kmp_dump_debug_buffer();
6241 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6244 if (__kmp_global.g.g_abort) {
6245 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6247 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6250 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6260 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6263 __kmp_internal_end();
6265 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6266 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6268 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6270 #ifdef DUMP_DEBUG_ON_EXIT
6271 if (__kmp_debug_buf)
6272 __kmp_dump_debug_buffer();
6276 __kmp_close_console();
6279 __kmp_fini_allocator();
6283 void __kmp_internal_end_thread(
int gtid_req) {
6292 if (__kmp_global.g.g_abort) {
6293 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6297 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6298 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6306 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6308 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6309 if (gtid == KMP_GTID_SHUTDOWN) {
6310 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6311 "already shutdown\n"));
6313 }
else if (gtid == KMP_GTID_MONITOR) {
6314 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6315 "registered, or system shutdown\n"));
6317 }
else if (gtid == KMP_GTID_DNE) {
6318 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6322 }
else if (KMP_UBER_GTID(gtid)) {
6324 if (__kmp_root[gtid]->r.r_active) {
6325 __kmp_global.g.g_abort = -1;
6326 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6328 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6332 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6334 __kmp_unregister_root_current_thread(gtid);
6338 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6341 __kmp_threads[gtid]->th.th_task_team = NULL;
6345 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6359 if (__kmp_pause_status != kmp_hard_paused) {
6360 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6365 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6368 if (__kmp_global.g.g_abort) {
6369 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6371 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6374 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6375 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6386 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6388 for (i = 0; i < __kmp_threads_capacity; ++i) {
6389 if (KMP_UBER_GTID(i)) {
6392 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6393 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6394 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6401 __kmp_internal_end();
6403 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6404 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6406 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6408 #ifdef DUMP_DEBUG_ON_EXIT
6409 if (__kmp_debug_buf)
6410 __kmp_dump_debug_buffer();
6417 static long __kmp_registration_flag = 0;
6419 static char *__kmp_registration_str = NULL;
6422 static inline char *__kmp_reg_status_name() {
6427 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6430 void __kmp_register_library_startup(
void) {
6432 char *name = __kmp_reg_status_name();
6438 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6439 __kmp_initialize_system_tick();
6441 __kmp_read_system_time(&time.dtime);
6442 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6443 __kmp_registration_str =
6444 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6445 __kmp_registration_flag, KMP_LIBRARY_FILE);
6447 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6448 __kmp_registration_str));
6455 __kmp_env_set(name, __kmp_registration_str, 0);
6457 value = __kmp_env_get(name);
6458 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6468 char *flag_addr_str = NULL;
6469 char *flag_val_str = NULL;
6470 char const *file_name = NULL;
6471 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6472 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6475 long *flag_addr = 0;
6477 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6478 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6479 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6483 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6497 file_name =
"unknown library";
6502 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6503 if (!__kmp_str_match_true(duplicate_ok)) {
6505 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6506 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6508 KMP_INTERNAL_FREE(duplicate_ok);
6509 __kmp_duplicate_library_ok = 1;
6514 __kmp_env_unset(name);
6516 default: { KMP_DEBUG_ASSERT(0); }
break;
6519 KMP_INTERNAL_FREE((
void *)value);
6521 KMP_INTERNAL_FREE((
void *)name);
6525 void __kmp_unregister_library(
void) {
6527 char *name = __kmp_reg_status_name();
6528 char *value = __kmp_env_get(name);
6530 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6531 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6532 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6534 __kmp_env_unset(name);
6537 KMP_INTERNAL_FREE(__kmp_registration_str);
6538 KMP_INTERNAL_FREE(value);
6539 KMP_INTERNAL_FREE(name);
6541 __kmp_registration_flag = 0;
6542 __kmp_registration_str = NULL;
6549 #if KMP_MIC_SUPPORTED
6551 static void __kmp_check_mic_type() {
6552 kmp_cpuid_t cpuid_state = {0};
6553 kmp_cpuid_t *cs_p = &cpuid_state;
6554 __kmp_x86_cpuid(1, 0, cs_p);
6556 if ((cs_p->eax & 0xff0) == 0xB10) {
6557 __kmp_mic_type = mic2;
6558 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6559 __kmp_mic_type = mic3;
6561 __kmp_mic_type = non_mic;
6567 static void __kmp_do_serial_initialize(
void) {
6571 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6573 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6574 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6575 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6576 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6577 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6583 __kmp_validate_locks();
6586 __kmp_init_allocator();
6591 __kmp_register_library_startup();
6594 if (TCR_4(__kmp_global.g.g_done)) {
6595 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6598 __kmp_global.g.g_abort = 0;
6599 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6602 #if KMP_USE_ADAPTIVE_LOCKS
6603 #if KMP_DEBUG_ADAPTIVE_LOCKS
6604 __kmp_init_speculative_stats();
6607 #if KMP_STATS_ENABLED
6610 __kmp_init_lock(&__kmp_global_lock);
6611 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6612 __kmp_init_lock(&__kmp_debug_lock);
6613 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6614 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6615 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6616 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6617 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6618 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6619 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6620 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6621 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6622 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6623 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6624 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6625 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6626 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6627 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6629 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6631 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6635 __kmp_runtime_initialize();
6637 #if KMP_MIC_SUPPORTED
6638 __kmp_check_mic_type();
6645 __kmp_abort_delay = 0;
6649 __kmp_dflt_team_nth_ub = __kmp_xproc;
6650 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6651 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6653 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6654 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6656 __kmp_max_nth = __kmp_sys_max_nth;
6657 __kmp_cg_max_nth = __kmp_sys_max_nth;
6658 __kmp_teams_max_nth = __kmp_xproc;
6659 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6660 __kmp_teams_max_nth = __kmp_sys_max_nth;
6665 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6667 __kmp_monitor_wakeups =
6668 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6669 __kmp_bt_intervals =
6670 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6673 __kmp_library = library_throughput;
6675 __kmp_static = kmp_sch_static_balanced;
6682 #if KMP_FAST_REDUCTION_BARRIER
6683 #define kmp_reduction_barrier_gather_bb ((int)1)
6684 #define kmp_reduction_barrier_release_bb ((int)1)
6685 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6686 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6687 #endif // KMP_FAST_REDUCTION_BARRIER
6688 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6689 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6690 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6691 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6692 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6693 #if KMP_FAST_REDUCTION_BARRIER
6694 if (i == bs_reduction_barrier) {
6696 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6697 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6698 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6699 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6701 #endif // KMP_FAST_REDUCTION_BARRIER
6703 #if KMP_FAST_REDUCTION_BARRIER
6704 #undef kmp_reduction_barrier_release_pat
6705 #undef kmp_reduction_barrier_gather_pat
6706 #undef kmp_reduction_barrier_release_bb
6707 #undef kmp_reduction_barrier_gather_bb
6708 #endif // KMP_FAST_REDUCTION_BARRIER
6709 #if KMP_MIC_SUPPORTED
6710 if (__kmp_mic_type == mic2) {
6712 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6713 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6715 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6716 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6718 #if KMP_FAST_REDUCTION_BARRIER
6719 if (__kmp_mic_type == mic2) {
6720 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6721 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6723 #endif // KMP_FAST_REDUCTION_BARRIER
6724 #endif // KMP_MIC_SUPPORTED
6728 __kmp_env_checks = TRUE;
6730 __kmp_env_checks = FALSE;
6734 __kmp_foreign_tp = TRUE;
6736 __kmp_global.g.g_dynamic = FALSE;
6737 __kmp_global.g.g_dynamic_mode = dynamic_default;
6739 __kmp_env_initialize(NULL);
6743 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6744 if (__kmp_str_match_true(val)) {
6745 kmp_str_buf_t buffer;
6746 __kmp_str_buf_init(&buffer);
6747 __kmp_i18n_dump_catalog(&buffer);
6748 __kmp_printf(
"%s", buffer.str);
6749 __kmp_str_buf_free(&buffer);
6751 __kmp_env_free(&val);
6754 __kmp_threads_capacity =
6755 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6757 __kmp_tp_capacity = __kmp_default_tp_capacity(
6758 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6763 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6764 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6765 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6766 __kmp_thread_pool = NULL;
6767 __kmp_thread_pool_insert_pt = NULL;
6768 __kmp_team_pool = NULL;
6775 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6777 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6778 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6779 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6782 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6784 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6789 gtid = __kmp_register_root(TRUE);
6790 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6791 KMP_ASSERT(KMP_UBER_GTID(gtid));
6792 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6796 __kmp_common_initialize();
6800 __kmp_register_atfork();
6803 #if !KMP_DYNAMIC_LIB
6807 int rc = atexit(__kmp_internal_end_atexit);
6809 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6815 #if KMP_HANDLE_SIGNALS
6821 __kmp_install_signals(FALSE);
6824 __kmp_install_signals(TRUE);
6829 __kmp_init_counter++;
6831 __kmp_init_serial = TRUE;
6833 if (__kmp_settings) {
6838 if (__kmp_display_env || __kmp_display_env_verbose) {
6839 __kmp_env_print_2();
6841 #endif // OMP_40_ENABLED
6849 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6852 void __kmp_serial_initialize(
void) {
6853 if (__kmp_init_serial) {
6856 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6857 if (__kmp_init_serial) {
6858 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6861 __kmp_do_serial_initialize();
6862 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6865 static void __kmp_do_middle_initialize(
void) {
6867 int prev_dflt_team_nth;
6869 if (!__kmp_init_serial) {
6870 __kmp_do_serial_initialize();
6873 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6877 prev_dflt_team_nth = __kmp_dflt_team_nth;
6879 #if KMP_AFFINITY_SUPPORTED
6882 __kmp_affinity_initialize();
6886 for (i = 0; i < __kmp_threads_capacity; i++) {
6887 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6888 __kmp_affinity_set_init_mask(i, TRUE);
6893 KMP_ASSERT(__kmp_xproc > 0);
6894 if (__kmp_avail_proc == 0) {
6895 __kmp_avail_proc = __kmp_xproc;
6901 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6902 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6907 if (__kmp_dflt_team_nth == 0) {
6908 #ifdef KMP_DFLT_NTH_CORES
6910 __kmp_dflt_team_nth = __kmp_ncores;
6911 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6912 "__kmp_ncores (%d)\n",
6913 __kmp_dflt_team_nth));
6916 __kmp_dflt_team_nth = __kmp_avail_proc;
6917 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6918 "__kmp_avail_proc(%d)\n",
6919 __kmp_dflt_team_nth));
6923 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6924 __kmp_dflt_team_nth = KMP_MIN_NTH;
6926 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6927 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6932 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6934 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6939 for (i = 0; i < __kmp_threads_capacity; i++) {
6940 kmp_info_t *thread = __kmp_threads[i];
6943 if (thread->th.th_current_task->td_icvs.nproc != 0)
6946 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6951 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6952 __kmp_dflt_team_nth));
6954 #ifdef KMP_ADJUST_BLOCKTIME
6956 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6957 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6958 if (__kmp_nth > __kmp_avail_proc) {
6959 __kmp_zero_bt = TRUE;
6965 TCW_SYNC_4(__kmp_init_middle, TRUE);
6967 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6970 void __kmp_middle_initialize(
void) {
6971 if (__kmp_init_middle) {
6974 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6975 if (__kmp_init_middle) {
6976 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6979 __kmp_do_middle_initialize();
6980 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6983 void __kmp_parallel_initialize(
void) {
6984 int gtid = __kmp_entry_gtid();
6987 if (TCR_4(__kmp_init_parallel))
6989 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6990 if (TCR_4(__kmp_init_parallel)) {
6991 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6996 if (TCR_4(__kmp_global.g.g_done)) {
6999 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7000 __kmp_infinite_loop();
7006 if (!__kmp_init_middle) {
7007 __kmp_do_middle_initialize();
7011 __kmp_resume_if_hard_paused();
7015 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7016 KMP_ASSERT(KMP_UBER_GTID(gtid));
7018 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7021 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7022 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7023 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7027 #if KMP_HANDLE_SIGNALS
7029 __kmp_install_signals(TRUE);
7033 __kmp_suspend_initialize();
7035 #if defined(USE_LOAD_BALANCE)
7036 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7037 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7040 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7041 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7045 if (__kmp_version) {
7046 __kmp_print_version_2();
7050 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7053 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7055 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7060 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7062 kmp_disp_t *dispatch;
7067 this_thr->th.th_local.this_construct = 0;
7068 #if KMP_CACHE_MANAGE
7069 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7071 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7072 KMP_DEBUG_ASSERT(dispatch);
7073 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7077 dispatch->th_disp_index = 0;
7079 dispatch->th_doacross_buf_idx =
7082 if (__kmp_env_consistency_check)
7083 __kmp_push_parallel(gtid, team->t.t_ident);
7088 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7090 if (__kmp_env_consistency_check)
7091 __kmp_pop_parallel(gtid, team->t.t_ident);
7093 __kmp_finish_implicit_task(this_thr);
7096 int __kmp_invoke_task_func(
int gtid) {
7098 int tid = __kmp_tid_from_gtid(gtid);
7099 kmp_info_t *this_thr = __kmp_threads[gtid];
7100 kmp_team_t *team = this_thr->th.th_team;
7102 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7104 if (__itt_stack_caller_create_ptr) {
7105 __kmp_itt_stack_callee_enter(
7107 team->t.t_stack_id);
7110 #if INCLUDE_SSC_MARKS
7111 SSC_MARK_INVOKING();
7116 void **exit_runtime_p;
7117 ompt_data_t *my_task_data;
7118 ompt_data_t *my_parallel_data;
7121 if (ompt_enabled.enabled) {
7123 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
7125 exit_runtime_p = &dummy;
7129 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7130 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7131 if (ompt_enabled.ompt_callback_implicit_task) {
7132 ompt_team_size = team->t.t_nproc;
7133 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7134 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7135 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7136 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7141 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
7142 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
7144 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7145 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7152 *exit_runtime_p = NULL;
7157 if (__itt_stack_caller_create_ptr) {
7158 __kmp_itt_stack_callee_leave(
7160 team->t.t_stack_id);
7163 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7169 void __kmp_teams_master(
int gtid) {
7171 kmp_info_t *thr = __kmp_threads[gtid];
7172 kmp_team_t *team = thr->th.th_team;
7173 ident_t *loc = team->t.t_ident;
7174 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7175 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7176 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7177 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7178 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7181 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7184 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7185 tmp->cg_nthreads = 1;
7186 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7187 " cg_threads to 1\n",
7189 tmp->up = thr->th.th_cg_roots;
7190 thr->th.th_cg_roots = tmp;
7194 #if INCLUDE_SSC_MARKS
7197 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7198 (microtask_t)thr->th.th_teams_microtask,
7199 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7200 #if INCLUDE_SSC_MARKS
7204 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7205 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7208 __kmp_join_call(loc, gtid
7217 int __kmp_invoke_teams_master(
int gtid) {
7218 kmp_info_t *this_thr = __kmp_threads[gtid];
7219 kmp_team_t *team = this_thr->th.th_team;
7221 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7222 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7223 (
void *)__kmp_teams_master);
7225 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7226 __kmp_teams_master(gtid);
7227 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7237 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7238 kmp_info_t *thr = __kmp_threads[gtid];
7240 if (num_threads > 0)
7241 thr->th.th_set_nproc = num_threads;
7248 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7250 kmp_info_t *thr = __kmp_threads[gtid];
7251 KMP_DEBUG_ASSERT(num_teams >= 0);
7252 KMP_DEBUG_ASSERT(num_threads >= 0);
7256 if (num_teams > __kmp_teams_max_nth) {
7257 if (!__kmp_reserve_warn) {
7258 __kmp_reserve_warn = 1;
7259 __kmp_msg(kmp_ms_warning,
7260 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7261 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7263 num_teams = __kmp_teams_max_nth;
7267 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7270 if (num_threads == 0) {
7271 if (!TCR_4(__kmp_init_middle))
7272 __kmp_middle_initialize();
7273 num_threads = __kmp_avail_proc / num_teams;
7274 if (num_teams * num_threads > __kmp_teams_max_nth) {
7276 num_threads = __kmp_teams_max_nth / num_teams;
7281 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7283 if (num_teams * num_threads > __kmp_teams_max_nth) {
7284 int new_threads = __kmp_teams_max_nth / num_teams;
7285 if (!__kmp_reserve_warn) {
7286 __kmp_reserve_warn = 1;
7287 __kmp_msg(kmp_ms_warning,
7288 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7289 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7291 num_threads = new_threads;
7294 thr->th.th_teams_size.nth = num_threads;
7298 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7299 kmp_info_t *thr = __kmp_threads[gtid];
7300 thr->th.th_set_proc_bind = proc_bind;
7307 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7308 kmp_info_t *this_thr = __kmp_threads[gtid];
7314 KMP_DEBUG_ASSERT(team);
7315 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7316 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7319 team->t.t_construct = 0;
7320 team->t.t_ordered.dt.t_value =
7324 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7325 if (team->t.t_max_nproc > 1) {
7327 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7328 team->t.t_disp_buffer[i].buffer_index = i;
7330 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7334 team->t.t_disp_buffer[0].buffer_index = 0;
7336 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7341 KMP_ASSERT(this_thr->th.th_team == team);
7344 for (f = 0; f < team->t.t_nproc; f++) {
7345 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7346 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7351 __kmp_fork_barrier(gtid, 0);
7354 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7355 kmp_info_t *this_thr = __kmp_threads[gtid];
7357 KMP_DEBUG_ASSERT(team);
7358 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7359 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7365 if (__kmp_threads[gtid] &&
7366 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7367 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7368 __kmp_threads[gtid]);
7369 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7370 "team->t.t_nproc=%d\n",
7371 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7373 __kmp_print_structure();
7375 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7376 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7379 __kmp_join_barrier(gtid);
7381 if (ompt_enabled.enabled &&
7382 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7383 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7384 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7385 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7387 void *codeptr = NULL;
7388 if (KMP_MASTER_TID(ds_tid) &&
7389 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7390 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7391 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7393 if (ompt_enabled.ompt_callback_sync_region_wait) {
7394 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7395 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7397 if (ompt_enabled.ompt_callback_sync_region) {
7398 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7399 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7402 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7403 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7404 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7410 KMP_ASSERT(this_thr->th.th_team == team);
7415 #ifdef USE_LOAD_BALANCE
7419 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7422 kmp_team_t *hot_team;
7424 if (root->r.r_active) {
7427 hot_team = root->r.r_hot_team;
7428 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7429 return hot_team->t.t_nproc - 1;
7434 for (i = 1; i < hot_team->t.t_nproc; i++) {
7435 if (hot_team->t.t_threads[i]->th.th_active) {
7444 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7447 int hot_team_active;
7448 int team_curr_active;
7451 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7453 KMP_DEBUG_ASSERT(root);
7454 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7455 ->th.th_current_task->td_icvs.dynamic == TRUE);
7456 KMP_DEBUG_ASSERT(set_nproc > 1);
7458 if (set_nproc == 1) {
7459 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7468 pool_active = __kmp_thread_pool_active_nth;
7469 hot_team_active = __kmp_active_hot_team_nproc(root);
7470 team_curr_active = pool_active + hot_team_active + 1;
7473 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7474 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7475 "hot team active = %d\n",
7476 system_active, pool_active, hot_team_active));
7478 if (system_active < 0) {
7482 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7483 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7486 retval = __kmp_avail_proc - __kmp_nth +
7487 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7488 if (retval > set_nproc) {
7491 if (retval < KMP_MIN_NTH) {
7492 retval = KMP_MIN_NTH;
7495 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7503 if (system_active < team_curr_active) {
7504 system_active = team_curr_active;
7506 retval = __kmp_avail_proc - system_active + team_curr_active;
7507 if (retval > set_nproc) {
7510 if (retval < KMP_MIN_NTH) {
7511 retval = KMP_MIN_NTH;
7514 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7523 void __kmp_cleanup(
void) {
7526 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7528 if (TCR_4(__kmp_init_parallel)) {
7529 #if KMP_HANDLE_SIGNALS
7530 __kmp_remove_signals();
7532 TCW_4(__kmp_init_parallel, FALSE);
7535 if (TCR_4(__kmp_init_middle)) {
7536 #if KMP_AFFINITY_SUPPORTED
7537 __kmp_affinity_uninitialize();
7539 __kmp_cleanup_hierarchy();
7540 TCW_4(__kmp_init_middle, FALSE);
7543 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7545 if (__kmp_init_serial) {
7546 __kmp_runtime_destroy();
7547 __kmp_init_serial = FALSE;
7550 __kmp_cleanup_threadprivate_caches();
7552 for (f = 0; f < __kmp_threads_capacity; f++) {
7553 if (__kmp_root[f] != NULL) {
7554 __kmp_free(__kmp_root[f]);
7555 __kmp_root[f] = NULL;
7558 __kmp_free(__kmp_threads);
7561 __kmp_threads = NULL;
7563 __kmp_threads_capacity = 0;
7565 #if KMP_USE_DYNAMIC_LOCK
7566 __kmp_cleanup_indirect_user_locks();
7568 __kmp_cleanup_user_locks();
7571 #if KMP_AFFINITY_SUPPORTED
7572 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7573 __kmp_cpuinfo_file = NULL;
7576 #if KMP_USE_ADAPTIVE_LOCKS
7577 #if KMP_DEBUG_ADAPTIVE_LOCKS
7578 __kmp_print_speculative_stats();
7581 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7582 __kmp_nested_nth.nth = NULL;
7583 __kmp_nested_nth.size = 0;
7584 __kmp_nested_nth.used = 0;
7585 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7586 __kmp_nested_proc_bind.bind_types = NULL;
7587 __kmp_nested_proc_bind.size = 0;
7588 __kmp_nested_proc_bind.used = 0;
7590 if (__kmp_affinity_format) {
7591 KMP_INTERNAL_FREE(__kmp_affinity_format);
7592 __kmp_affinity_format = NULL;
7596 __kmp_i18n_catclose();
7598 #if KMP_USE_HIER_SCHED
7599 __kmp_hier_scheds.deallocate();
7602 #if KMP_STATS_ENABLED
7606 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7611 int __kmp_ignore_mppbeg(
void) {
7614 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7615 if (__kmp_str_match_false(env))
7622 int __kmp_ignore_mppend(
void) {
7625 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7626 if (__kmp_str_match_false(env))
7633 void __kmp_internal_begin(
void) {
7639 gtid = __kmp_entry_gtid();
7640 root = __kmp_threads[gtid]->th.th_root;
7641 KMP_ASSERT(KMP_UBER_GTID(gtid));
7643 if (root->r.r_begin)
7645 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7646 if (root->r.r_begin) {
7647 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7651 root->r.r_begin = TRUE;
7653 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7658 void __kmp_user_set_library(
enum library_type arg) {
7665 gtid = __kmp_entry_gtid();
7666 thread = __kmp_threads[gtid];
7668 root = thread->th.th_root;
7670 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7672 if (root->r.r_in_parallel) {
7674 KMP_WARNING(SetLibraryIncorrectCall);
7679 case library_serial:
7680 thread->th.th_set_nproc = 0;
7681 set__nproc(thread, 1);
7683 case library_turnaround:
7684 thread->th.th_set_nproc = 0;
7685 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7686 : __kmp_dflt_team_nth_ub);
7688 case library_throughput:
7689 thread->th.th_set_nproc = 0;
7690 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7691 : __kmp_dflt_team_nth_ub);
7694 KMP_FATAL(UnknownLibraryType, arg);
7697 __kmp_aux_set_library(arg);
7700 void __kmp_aux_set_stacksize(
size_t arg) {
7701 if (!__kmp_init_serial)
7702 __kmp_serial_initialize();
7705 if (arg & (0x1000 - 1)) {
7706 arg &= ~(0x1000 - 1);
7711 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7714 if (!TCR_4(__kmp_init_parallel)) {
7717 if (value < __kmp_sys_min_stksize)
7718 value = __kmp_sys_min_stksize;
7719 else if (value > KMP_MAX_STKSIZE)
7720 value = KMP_MAX_STKSIZE;
7722 __kmp_stksize = value;
7724 __kmp_env_stksize = TRUE;
7727 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7732 void __kmp_aux_set_library(
enum library_type arg) {
7733 __kmp_library = arg;
7735 switch (__kmp_library) {
7736 case library_serial: {
7737 KMP_INFORM(LibraryIsSerial);
7738 (void)__kmp_change_library(TRUE);
7740 case library_turnaround:
7741 (void)__kmp_change_library(TRUE);
7743 case library_throughput:
7744 (void)__kmp_change_library(FALSE);
7747 KMP_FATAL(UnknownLibraryType, arg);
7753 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7754 kmp_info_t *thr = __kmp_entry_thread();
7755 teams_serialized = 0;
7756 if (thr->th.th_teams_microtask) {
7757 kmp_team_t *team = thr->th.th_team;
7758 int tlevel = thr->th.th_teams_level;
7759 int ii = team->t.t_level;
7760 teams_serialized = team->t.t_serialized;
7761 int level = tlevel + 1;
7762 KMP_DEBUG_ASSERT(ii >= tlevel);
7763 while (ii > level) {
7764 for (teams_serialized = team->t.t_serialized;
7765 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7767 if (team->t.t_serialized && (!teams_serialized)) {
7768 team = team->t.t_parent;
7772 team = team->t.t_parent;
7781 int __kmp_aux_get_team_num() {
7783 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7785 if (serialized > 1) {
7788 return team->t.t_master_tid;
7794 int __kmp_aux_get_num_teams() {
7796 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7798 if (serialized > 1) {
7801 return team->t.t_parent->t.t_nproc;
7841 typedef struct kmp_affinity_format_field_t {
7843 const char *long_name;
7846 } kmp_affinity_format_field_t;
7848 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7849 #if KMP_AFFINITY_SUPPORTED
7850 {
'A',
"thread_affinity",
's'},
7852 {
't',
"team_num",
'd'},
7853 {
'T',
"num_teams",
'd'},
7854 {
'L',
"nesting_level",
'd'},
7855 {
'n',
"thread_num",
'd'},
7856 {
'N',
"num_threads",
'd'},
7857 {
'a',
"ancestor_tnum",
'd'},
7859 {
'P',
"process_id",
'd'},
7860 {
'i',
"native_thread_id",
'd'}};
7863 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
7865 kmp_str_buf_t *field_buffer) {
7866 int rc, format_index, field_value;
7867 const char *width_left, *width_right;
7868 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7869 static const int FORMAT_SIZE = 20;
7870 char format[FORMAT_SIZE] = {0};
7871 char absolute_short_name = 0;
7873 KMP_DEBUG_ASSERT(gtid >= 0);
7874 KMP_DEBUG_ASSERT(th);
7875 KMP_DEBUG_ASSERT(**ptr ==
'%');
7876 KMP_DEBUG_ASSERT(field_buffer);
7878 __kmp_str_buf_clear(field_buffer);
7885 __kmp_str_buf_cat(field_buffer,
"%", 1);
7896 right_justify =
false;
7898 right_justify =
true;
7902 width_left = width_right = NULL;
7903 if (**ptr >=
'0' && **ptr <=
'9') {
7911 format[format_index++] =
'%';
7913 format[format_index++] =
'-';
7915 format[format_index++] =
'0';
7916 if (width_left && width_right) {
7920 while (i < 8 && width_left < width_right) {
7921 format[format_index++] = *width_left;
7929 found_valid_name =
false;
7930 parse_long_name = (**ptr ==
'{');
7931 if (parse_long_name)
7933 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
7934 sizeof(__kmp_affinity_format_table[0]);
7936 char short_name = __kmp_affinity_format_table[i].short_name;
7937 const char *long_name = __kmp_affinity_format_table[i].long_name;
7938 char field_format = __kmp_affinity_format_table[i].field_format;
7939 if (parse_long_name) {
7940 int length = KMP_STRLEN(long_name);
7941 if (strncmp(*ptr, long_name, length) == 0) {
7942 found_valid_name =
true;
7945 }
else if (**ptr == short_name) {
7946 found_valid_name =
true;
7949 if (found_valid_name) {
7950 format[format_index++] = field_format;
7951 format[format_index++] =
'\0';
7952 absolute_short_name = short_name;
7956 if (parse_long_name) {
7958 absolute_short_name = 0;
7966 switch (absolute_short_name) {
7968 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
7971 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
7974 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
7977 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
7980 static const int BUFFER_SIZE = 256;
7981 char buf[BUFFER_SIZE];
7982 __kmp_expand_host_name(buf, BUFFER_SIZE);
7983 rc = __kmp_str_buf_print(field_buffer, format, buf);
7986 rc = __kmp_str_buf_print(field_buffer, format, getpid());
7989 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
7992 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
7996 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
7997 rc = __kmp_str_buf_print(field_buffer, format, field_value);
7999 #if KMP_AFFINITY_SUPPORTED
8002 __kmp_str_buf_init(&buf);
8003 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8004 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8005 __kmp_str_buf_free(&buf);
8011 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8013 if (parse_long_name) {
8022 KMP_ASSERT(format_index <= FORMAT_SIZE);
8032 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8033 kmp_str_buf_t *buffer) {
8034 const char *parse_ptr;
8036 const kmp_info_t *th;
8037 kmp_str_buf_t field;
8039 KMP_DEBUG_ASSERT(buffer);
8040 KMP_DEBUG_ASSERT(gtid >= 0);
8042 __kmp_str_buf_init(&field);
8043 __kmp_str_buf_clear(buffer);
8045 th = __kmp_threads[gtid];
8051 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8052 parse_ptr = __kmp_affinity_format;
8054 KMP_DEBUG_ASSERT(parse_ptr);
8056 while (*parse_ptr !=
'\0') {
8058 if (*parse_ptr ==
'%') {
8060 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8061 __kmp_str_buf_catbuf(buffer, &field);
8065 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8070 __kmp_str_buf_free(&field);
8075 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8077 __kmp_str_buf_init(&buf);
8078 __kmp_aux_capture_affinity(gtid, format, &buf);
8079 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8080 __kmp_str_buf_free(&buf);
8082 #endif // OMP_50_ENABLED
8086 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8087 int blocktime = arg;
8093 __kmp_save_internal_controls(thread);
8096 if (blocktime < KMP_MIN_BLOCKTIME)
8097 blocktime = KMP_MIN_BLOCKTIME;
8098 else if (blocktime > KMP_MAX_BLOCKTIME)
8099 blocktime = KMP_MAX_BLOCKTIME;
8101 set__blocktime_team(thread->th.th_team, tid, blocktime);
8102 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8106 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8108 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8109 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8115 set__bt_set_team(thread->th.th_team, tid, bt_set);
8116 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8118 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8119 "bt_intervals=%d, monitor_updates=%d\n",
8120 __kmp_gtid_from_tid(tid, thread->th.th_team),
8121 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8122 __kmp_monitor_wakeups));
8124 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8125 __kmp_gtid_from_tid(tid, thread->th.th_team),
8126 thread->th.th_team->t.t_id, tid, blocktime));
8130 void __kmp_aux_set_defaults(
char const *str,
int len) {
8131 if (!__kmp_init_serial) {
8132 __kmp_serial_initialize();
8134 __kmp_env_initialize(str);
8138 || __kmp_display_env || __kmp_display_env_verbose
8148 PACKED_REDUCTION_METHOD_T
8149 __kmp_determine_reduction_method(
8150 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8151 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8152 kmp_critical_name *lck) {
8163 PACKED_REDUCTION_METHOD_T retval;
8167 KMP_DEBUG_ASSERT(loc);
8168 KMP_DEBUG_ASSERT(lck);
8170 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8171 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8172 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8174 retval = critical_reduce_block;
8177 team_size = __kmp_get_team_num_threads(global_tid);
8178 if (team_size == 1) {
8180 retval = empty_reduce_block;
8184 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8186 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
8188 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8189 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || KMP_OS_KFREEBSD
8191 int teamsize_cutoff = 4;
8193 #if KMP_MIC_SUPPORTED
8194 if (__kmp_mic_type != non_mic) {
8195 teamsize_cutoff = 8;
8198 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8199 if (tree_available) {
8200 if (team_size <= teamsize_cutoff) {
8201 if (atomic_available) {
8202 retval = atomic_reduce_block;
8205 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8207 }
else if (atomic_available) {
8208 retval = atomic_reduce_block;
8211 #error "Unknown or unsupported OS"
8212 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8215 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8217 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_KFREEBSD
8221 if (atomic_available) {
8222 if (num_vars <= 2) {
8223 retval = atomic_reduce_block;
8229 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8230 if (atomic_available && (num_vars <= 3)) {
8231 retval = atomic_reduce_block;
8232 }
else if (tree_available) {
8233 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8234 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8235 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8240 #error "Unknown or unsupported OS"
8244 #error "Unknown or unsupported architecture"
8252 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8255 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8257 int atomic_available, tree_available;
8259 switch ((forced_retval = __kmp_force_reduction_method)) {
8260 case critical_reduce_block:
8264 case atomic_reduce_block:
8265 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8266 if (!atomic_available) {
8267 KMP_WARNING(RedMethodNotSupported,
"atomic");
8268 forced_retval = critical_reduce_block;
8272 case tree_reduce_block:
8273 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8274 if (!tree_available) {
8275 KMP_WARNING(RedMethodNotSupported,
"tree");
8276 forced_retval = critical_reduce_block;
8278 #if KMP_FAST_REDUCTION_BARRIER
8279 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8288 retval = forced_retval;
8291 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8293 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8294 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8300 kmp_int32 __kmp_get_reduce_method(
void) {
8301 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8308 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8312 void __kmp_hard_pause() {
8313 __kmp_pause_status = kmp_hard_paused;
8314 __kmp_internal_end_thread(-1);
8318 void __kmp_resume_if_soft_paused() {
8319 if (__kmp_pause_status == kmp_soft_paused) {
8320 __kmp_pause_status = kmp_not_paused;
8322 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8323 kmp_info_t *thread = __kmp_threads[gtid];
8325 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8326 if (fl.is_sleeping())
8328 else if (__kmp_try_suspend_mx(thread)) {
8329 __kmp_unlock_suspend_mx(thread);
8332 if (fl.is_sleeping()) {
8335 }
else if (__kmp_try_suspend_mx(thread)) {
8336 __kmp_unlock_suspend_mx(thread);
8348 int __kmp_pause_resource(kmp_pause_status_t level) {
8349 if (level == kmp_not_paused) {
8350 if (__kmp_pause_status == kmp_not_paused) {
8354 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8355 __kmp_pause_status == kmp_hard_paused);
8356 __kmp_pause_status = kmp_not_paused;
8359 }
else if (level == kmp_soft_paused) {
8360 if (__kmp_pause_status != kmp_not_paused) {
8367 }
else if (level == kmp_hard_paused) {
8368 if (__kmp_pause_status != kmp_not_paused) {
8381 #endif // OMP_50_ENABLED
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)