14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119int __kmp_get_global_thread_id() {
121 kmp_info_t **other_threads;
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth));
137 if (!TCR_4(__kmp_init_gtid))
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
148 return __kmp_gtid_get_specific();
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
153 other_threads = __kmp_threads;
166 for (i = 0; i < __kmp_threads_capacity; i++) {
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
187 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
188 __kmp_gtid_get_specific() == i);
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
198 i = __kmp_gtid_get_specific();
209 if (!TCR_SYNC_PTR(other_threads[i]))
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
218 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
230 if (__kmp_storage_map) {
231 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
240int __kmp_get_global_thread_id_reg() {
243 if (!__kmp_init_serial) {
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
254 gtid = __kmp_gtid_get_specific();
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
258 gtid = __kmp_get_global_thread_id();
262 if (gtid == KMP_GTID_DNE) {
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
266 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
267 if (!__kmp_init_serial) {
268 __kmp_do_serial_initialize();
269 gtid = __kmp_gtid_get_specific();
271 gtid = __kmp_register_root(FALSE);
273 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
277 KMP_DEBUG_ASSERT(gtid >= 0);
283void __kmp_check_stack_overlap(kmp_info_t *th) {
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 gtid = __kmp_gtid_from_thread(th);
296 if (gtid == KMP_GTID_MONITOR) {
297 __kmp_print_storage_map_gtid(
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
302 __kmp_print_storage_map_gtid(
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
320 for (f = 0; f < __kmp_threads_capacity; f++) {
321 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
332 if (__kmp_storage_map)
333 __kmp_print_storage_map_gtid(
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
349void __kmp_infinite_loop(
void) {
350 static int done = FALSE;
357#define MAX_MESSAGE 512
359void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)size, format);
367 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
373 if (__kmp_storage_map_verbose) {
374 node = __kmp_get_host_node(p1);
376 __kmp_storage_map_verbose = FALSE;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
382 const int page_size = KMP_GET_PAGE_SIZE();
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
387 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
390 __kmp_printf_no_lock(
" GTID %d\n", gtid);
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
405 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
409 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
417 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
420 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
425void __kmp_warn(
char const *format, ...) {
426 char buffer[MAX_MESSAGE];
429 if (__kmp_generate_warnings == kmp_warnings_off) {
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
436 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
437 __kmp_vprintf(kmp_err, buffer, ap);
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_abort_process() {
445 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
447 if (__kmp_debug_buf) {
448 __kmp_dump_debug_buffer();
454 __kmp_global.g.g_abort = SIGABRT;
468 __kmp_unregister_library();
472 __kmp_infinite_loop();
473 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
477void __kmp_abort_thread(
void) {
480 __kmp_infinite_loop();
486static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
496 __kmp_print_storage_map_gtid(
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
510#if KMP_FAST_REDUCTION_BARRIER
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
521static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
522 int team_id,
int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
529 sizeof(kmp_balign_team_t) * bs_last_barrier,
530 "%s_%d.t_bar", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
537 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
543 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
549 __kmp_print_storage_map_gtid(
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
553 __kmp_print_storage_map_gtid(
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
563static void __kmp_init_allocator() {
564 __kmp_init_memkind();
565 __kmp_init_target_mem();
567static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 if (lpReserved == NULL)
607 __kmp_internal_end_library(__kmp_gtid_get_specific());
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 __kmp_internal_end_thread(__kmp_gtid_get_specific());
632void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
638 if (__kmp_env_consistency_check) {
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
657void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
664 if (__kmp_env_consistency_check) {
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
666 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
684int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
689 if (!TCR_4(__kmp_init_parallel))
690 __kmp_parallel_initialize();
691 __kmp_resume_if_soft_paused();
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
722 if (__kmp_env_consistency_check) {
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
731 __kmp_itt_single_start(gtid);
737void __kmp_exit_single(
int gtid) {
739 __kmp_itt_single_end(gtid);
741 if (__kmp_env_consistency_check)
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid,
int set_nthreads,
756 KMP_DEBUG_ASSERT(__kmp_init_serial);
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
798 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
830 __kmp_reserve_warn = 1;
831 __kmp_msg(kmp_ms_warning,
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
861 __kmp_reserve_warn = 1;
862 __kmp_msg(kmp_ms_warning,
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
888 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
889 capacity -= __kmp_hidden_helper_threads_num;
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
906 __kmp_reserve_warn = 1;
907 if (__kmp_tp_cached) {
908 __kmp_msg(kmp_ms_warning,
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
913 __kmp_msg(kmp_ms_warning,
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
939static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
940 kmp_info_t *master_th,
int master_gtid,
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
982 hot_teams[level].hot_team = team;
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
999 for (i = 1; i < team->t.t_nproc; i++) {
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->t.t_nproc > 1 &&
1039 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1045 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1046 for (i = 0; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = team->t.t_threads[i];
1048 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1049 thr->th.th_prev_level != team->t.t_level) {
1050 team->t.t_display_affinity = 1;
1059#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1063inline static void propagateFPControl(kmp_team_t *team) {
1064 if (__kmp_inherit_fp_control) {
1065 kmp_int16 x87_fpu_control_word;
1069 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1070 __kmp_store_mxcsr(&mxcsr);
1071 mxcsr &= KMP_X86_MXCSR_MASK;
1082 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1083 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1086 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1090 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1096inline static void updateHWFPControl(kmp_team_t *team) {
1097 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1100 kmp_int16 x87_fpu_control_word;
1102 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1103 __kmp_store_mxcsr(&mxcsr);
1104 mxcsr &= KMP_X86_MXCSR_MASK;
1106 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1107 __kmp_clear_x87_fpu_status_word();
1108 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1111 if (team->t.t_mxcsr != mxcsr) {
1112 __kmp_load_mxcsr(&team->t.t_mxcsr);
1117#define propagateFPControl(x) ((void)0)
1118#define updateHWFPControl(x) ((void)0)
1121static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1126void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1127 kmp_info_t *this_thr;
1128 kmp_team_t *serial_team;
1130 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1137 if (!TCR_4(__kmp_init_parallel))
1138 __kmp_parallel_initialize();
1139 __kmp_resume_if_soft_paused();
1141 this_thr = __kmp_threads[global_tid];
1142 serial_team = this_thr->th.th_serial_team;
1145 KMP_DEBUG_ASSERT(serial_team);
1148 if (__kmp_tasking_mode != tskm_immediate_exec) {
1150 this_thr->th.th_task_team ==
1151 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1152 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1154 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1155 "team %p, new task_team = NULL\n",
1156 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1157 this_thr->th.th_task_team = NULL;
1160 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1161 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1162 proc_bind = proc_bind_false;
1163 }
else if (proc_bind == proc_bind_default) {
1166 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1169 this_thr->th.th_set_proc_bind = proc_bind_default;
1172 this_thr->th.th_set_nproc = 0;
1175 ompt_data_t ompt_parallel_data = ompt_data_none;
1176 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1177 if (ompt_enabled.enabled &&
1178 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1180 ompt_task_info_t *parent_task_info;
1181 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1183 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1184 if (ompt_enabled.ompt_callback_parallel_begin) {
1187 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1188 &(parent_task_info->task_data), &(parent_task_info->frame),
1189 &ompt_parallel_data, team_size,
1190 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1195 if (this_thr->th.th_team != serial_team) {
1197 int level = this_thr->th.th_team->t.t_level;
1199 if (serial_team->t.t_serialized) {
1202 kmp_team_t *new_team;
1204 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1207 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1211 proc_bind, &this_thr->th.th_current_task->td_icvs,
1212 0 USE_NESTED_HOT_ARG(NULL));
1213 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1214 KMP_ASSERT(new_team);
1217 new_team->t.t_threads[0] = this_thr;
1218 new_team->t.t_parent = this_thr->th.th_team;
1219 serial_team = new_team;
1220 this_thr->th.th_serial_team = serial_team;
1224 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1225 global_tid, serial_team));
1233 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1234 global_tid, serial_team));
1238 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1239 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1240 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1241 serial_team->t.t_ident = loc;
1242 serial_team->t.t_serialized = 1;
1243 serial_team->t.t_nproc = 1;
1244 serial_team->t.t_parent = this_thr->th.th_team;
1245 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1246 this_thr->th.th_team = serial_team;
1247 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1249 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1250 this_thr->th.th_current_task));
1251 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1252 this_thr->th.th_current_task->td_flags.executing = 0;
1254 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1259 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1260 &this_thr->th.th_current_task->td_parent->td_icvs);
1264 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1265 this_thr->th.th_current_task->td_icvs.nproc =
1266 __kmp_nested_nth.nth[level + 1];
1269 if (__kmp_nested_proc_bind.used &&
1270 (level + 1 < __kmp_nested_proc_bind.used)) {
1271 this_thr->th.th_current_task->td_icvs.proc_bind =
1272 __kmp_nested_proc_bind.bind_types[level + 1];
1276 serial_team->t.t_pkfn = (microtask_t)(~0);
1278 this_thr->th.th_info.ds.ds_tid = 0;
1281 this_thr->th.th_team_nproc = 1;
1282 this_thr->th.th_team_master = this_thr;
1283 this_thr->th.th_team_serialized = 1;
1285 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1286 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1287 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1289 propagateFPControl(serial_team);
1292 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1293 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1294 serial_team->t.t_dispatch->th_disp_buffer =
1295 (dispatch_private_info_t *)__kmp_allocate(
1296 sizeof(dispatch_private_info_t));
1298 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1306 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1307 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1308 ++serial_team->t.t_serialized;
1309 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1312 int level = this_thr->th.th_team->t.t_level;
1315 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1316 this_thr->th.th_current_task->td_icvs.nproc =
1317 __kmp_nested_nth.nth[level + 1];
1319 serial_team->t.t_level++;
1320 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1321 "of serial team %p to %d\n",
1322 global_tid, serial_team, serial_team->t.t_level));
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1327 dispatch_private_info_t *disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1330 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1331 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1337 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1341 if (__kmp_display_affinity) {
1342 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1343 this_thr->th.th_prev_num_threads != 1) {
1345 __kmp_aux_display_affinity(global_tid, NULL);
1346 this_thr->th.th_prev_level = serial_team->t.t_level;
1347 this_thr->th.th_prev_num_threads = 1;
1351 if (__kmp_env_consistency_check)
1352 __kmp_push_parallel(global_tid, NULL);
1354 serial_team->t.ompt_team_info.master_return_address = codeptr;
1355 if (ompt_enabled.enabled &&
1356 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1357 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1358 OMPT_GET_FRAME_ADDRESS(0);
1360 ompt_lw_taskteam_t lw_taskteam;
1361 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1362 &ompt_parallel_data, codeptr);
1364 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1368 if (ompt_enabled.ompt_callback_implicit_task) {
1369 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1370 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1371 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1372 ompt_task_implicit);
1373 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1374 __kmp_tid_from_gtid(global_tid);
1378 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1379 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1380 OMPT_GET_FRAME_ADDRESS(0);
1386static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1387 microtask_t microtask,
int level,
1388 int teams_level, kmp_va_list ap) {
1389 return (master_th->th.th_teams_microtask && ap &&
1390 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1395static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1396 int teams_level, kmp_va_list ap) {
1397 return ((ap == NULL && active_level == 0) ||
1398 (ap && teams_level > 0 && teams_level == level));
1405__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1406 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1407 enum fork_context_e call_context, microtask_t microtask,
1408 launch_t invoker,
int master_set_numthreads,
int level,
1410 ompt_data_t ompt_parallel_data,
void *return_address,
1416 parent_team->t.t_ident = loc;
1417 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1418 parent_team->t.t_argc = argc;
1419 argv = (
void **)parent_team->t.t_argv;
1420 for (i = argc - 1; i >= 0; --i) {
1421 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1424 if (parent_team == master_th->th.th_serial_team) {
1427 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1429 if (call_context == fork_context_gnu) {
1432 parent_team->t.t_serialized--;
1437 parent_team->t.t_pkfn = microtask;
1442 void **exit_frame_p;
1443 ompt_data_t *implicit_task_data;
1444 ompt_lw_taskteam_t lw_taskteam;
1446 if (ompt_enabled.enabled) {
1447 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1448 &ompt_parallel_data, return_address);
1449 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1451 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1455 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1456 if (ompt_enabled.ompt_callback_implicit_task) {
1457 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1458 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1459 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1460 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1464 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1466 exit_frame_p = &dummy;
1472 parent_team->t.t_serialized--;
1475 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1476 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1477 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1486 if (ompt_enabled.enabled) {
1487 *exit_frame_p = NULL;
1488 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1489 if (ompt_enabled.ompt_callback_implicit_task) {
1490 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1491 ompt_scope_end, NULL, implicit_task_data, 1,
1492 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1495 __ompt_lw_taskteam_unlink(master_th);
1496 if (ompt_enabled.ompt_callback_parallel_end) {
1497 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1498 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1499 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1501 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1507 parent_team->t.t_pkfn = microtask;
1508 parent_team->t.t_invoke = invoker;
1509 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1510 parent_team->t.t_active_level++;
1511 parent_team->t.t_level++;
1512 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1519 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1522 if (ompt_enabled.enabled) {
1523 ompt_lw_taskteam_t lw_taskteam;
1524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1531 if (master_set_numthreads) {
1532 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1534 kmp_info_t **other_threads = parent_team->t.t_threads;
1537 int old_proc = master_th->th.th_teams_size.nth;
1538 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1539 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1540 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1542 parent_team->t.t_nproc = master_set_numthreads;
1543 for (i = 0; i < master_set_numthreads; ++i) {
1544 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1548 master_th->th.th_set_nproc = 0;
1552 if (__kmp_debugging) {
1553 int nth = __kmp_omp_num_threads(loc);
1555 master_set_numthreads = nth;
1561 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1563 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1564 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1565 proc_bind = proc_bind_false;
1568 if (proc_bind == proc_bind_default) {
1569 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1575 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1576 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1577 master_th->th.th_current_task->td_icvs.proc_bind)) {
1578 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1581 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1583 if (proc_bind_icv != proc_bind_default &&
1584 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1585 kmp_info_t **other_threads = parent_team->t.t_threads;
1586 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1587 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1591 master_th->th.th_set_proc_bind = proc_bind_default;
1593#if USE_ITT_BUILD && USE_ITT_NOTIFY
1594 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1596 __kmp_forkjoin_frames_mode == 3 &&
1597 parent_team->t.t_active_level == 1
1598 && master_th->th.th_teams_size.nteams == 1) {
1599 kmp_uint64 tmp_time = __itt_get_timestamp();
1600 master_th->th.th_frame_time = tmp_time;
1601 parent_team->t.t_region_time = tmp_time;
1603 if (__itt_stack_caller_create_ptr) {
1604 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1606 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1609#if KMP_AFFINITY_SUPPORTED
1610 __kmp_partition_places(parent_team);
1613 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1614 "master_th=%p, gtid=%d\n",
1615 root, parent_team, master_th, gtid));
1616 __kmp_internal_fork(loc, gtid, parent_team);
1617 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1618 "master_th=%p, gtid=%d\n",
1619 root, parent_team, master_th, gtid));
1621 if (call_context == fork_context_gnu)
1625 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1626 parent_team->t.t_id, parent_team->t.t_pkfn));
1628 if (!parent_team->t.t_invoke(gtid)) {
1629 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1631 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1632 parent_team->t.t_id, parent_team->t.t_pkfn));
1635 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1642__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1643 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1644 kmp_info_t *master_th, kmp_team_t *parent_team,
1646 ompt_data_t *ompt_parallel_data,
void **return_address,
1647 ompt_data_t **parent_task_data,
1655#if KMP_OS_LINUX && \
1656 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1659 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1664 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1669 master_th->th.th_serial_team->t.t_pkfn = microtask;
1672 if (call_context == fork_context_intel) {
1674 master_th->th.th_serial_team->t.t_ident = loc;
1677 master_th->th.th_serial_team->t.t_level--;
1682 void **exit_frame_p;
1683 ompt_task_info_t *task_info;
1684 ompt_lw_taskteam_t lw_taskteam;
1686 if (ompt_enabled.enabled) {
1687 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1688 ompt_parallel_data, *return_address);
1690 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1692 task_info = OMPT_CUR_TASK_INFO(master_th);
1693 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1694 if (ompt_enabled.ompt_callback_implicit_task) {
1695 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1696 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1697 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1698 &(task_info->task_data), 1,
1699 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1703 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1705 exit_frame_p = &dummy;
1710 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1711 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1712 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1721 if (ompt_enabled.enabled) {
1722 *exit_frame_p = NULL;
1723 if (ompt_enabled.ompt_callback_implicit_task) {
1724 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1725 ompt_scope_end, NULL, &(task_info->task_data), 1,
1726 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1728 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1729 __ompt_lw_taskteam_unlink(master_th);
1730 if (ompt_enabled.ompt_callback_parallel_end) {
1731 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1732 ompt_parallel_data, *parent_task_data,
1733 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1735 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1738 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1739 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1740 team = master_th->th.th_team;
1742 team->t.t_invoke = invoker;
1743 __kmp_alloc_argv_entries(argc, team, TRUE);
1744 team->t.t_argc = argc;
1745 argv = (
void **)team->t.t_argv;
1746 for (i = argc - 1; i >= 0; --i)
1747 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1754 if (ompt_enabled.enabled) {
1755 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1756 if (ompt_enabled.ompt_callback_implicit_task) {
1757 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1758 ompt_scope_end, NULL, &(task_info->task_data), 0,
1759 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1761 if (ompt_enabled.ompt_callback_parallel_end) {
1762 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1763 ompt_parallel_data, *parent_task_data,
1764 OMPT_INVOKER(call_context) | ompt_parallel_league,
1767 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1772 for (i = argc - 1; i >= 0; --i)
1773 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1778 void **exit_frame_p;
1779 ompt_task_info_t *task_info;
1780 ompt_lw_taskteam_t lw_taskteam;
1781 ompt_data_t *implicit_task_data;
1783 if (ompt_enabled.enabled) {
1784 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1785 ompt_parallel_data, *return_address);
1786 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1788 task_info = OMPT_CUR_TASK_INFO(master_th);
1789 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1792 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1793 if (ompt_enabled.ompt_callback_implicit_task) {
1794 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1795 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1796 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1797 ompt_task_implicit);
1798 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1802 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1804 exit_frame_p = &dummy;
1809 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1810 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1811 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1820 if (ompt_enabled.enabled) {
1821 *exit_frame_p = NULL;
1822 if (ompt_enabled.ompt_callback_implicit_task) {
1823 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1824 ompt_scope_end, NULL, &(task_info->task_data), 1,
1825 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1828 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1829 __ompt_lw_taskteam_unlink(master_th);
1830 if (ompt_enabled.ompt_callback_parallel_end) {
1831 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1832 ompt_parallel_data, *parent_task_data,
1833 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1835 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1839 }
else if (call_context == fork_context_gnu) {
1841 if (ompt_enabled.enabled) {
1842 ompt_lw_taskteam_t lwt;
1843 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1846 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1847 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1853 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1856 KMP_ASSERT2(call_context < fork_context_last,
1857 "__kmp_serial_fork_call: unknown fork_context parameter");
1860 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1867int __kmp_fork_call(
ident_t *loc,
int gtid,
1868 enum fork_context_e call_context,
1869 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1874 int master_this_cons;
1876 kmp_team_t *parent_team;
1877 kmp_info_t *master_th;
1881 int master_set_numthreads;
1882 int task_thread_limit = 0;
1886#if KMP_NESTED_HOT_TEAMS
1887 kmp_hot_team_ptr_t **p_hot_teams;
1890 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1893 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1894 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1897 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1899 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1900 __kmp_stkpadding += (short)((kmp_int64)dummy);
1906 if (!TCR_4(__kmp_init_parallel))
1907 __kmp_parallel_initialize();
1908 __kmp_resume_if_soft_paused();
1913 master_th = __kmp_threads[gtid];
1915 parent_team = master_th->th.th_team;
1916 master_tid = master_th->th.th_info.ds.ds_tid;
1917 master_this_cons = master_th->th.th_local.this_construct;
1918 root = master_th->th.th_root;
1919 master_active = root->r.r_active;
1920 master_set_numthreads = master_th->th.th_set_nproc;
1922 master_th->th.th_current_task->td_icvs.task_thread_limit;
1925 ompt_data_t ompt_parallel_data = ompt_data_none;
1926 ompt_data_t *parent_task_data;
1927 ompt_frame_t *ompt_frame;
1928 void *return_address = NULL;
1930 if (ompt_enabled.enabled) {
1931 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1933 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1938 __kmp_assign_root_init_mask();
1941 level = parent_team->t.t_level;
1943 active_level = parent_team->t.t_active_level;
1945 teams_level = master_th->th.th_teams_level;
1946#if KMP_NESTED_HOT_TEAMS
1947 p_hot_teams = &master_th->th.th_hot_teams;
1948 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1949 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1950 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1951 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1953 (*p_hot_teams)[0].hot_team_nth = 1;
1958 if (ompt_enabled.enabled) {
1959 if (ompt_enabled.ompt_callback_parallel_begin) {
1960 int team_size = master_set_numthreads
1961 ? master_set_numthreads
1962 : get__nproc_2(parent_team, master_tid);
1963 int flags = OMPT_INVOKER(call_context) |
1964 ((microtask == (microtask_t)__kmp_teams_master)
1965 ? ompt_parallel_league
1966 : ompt_parallel_team);
1967 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1968 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1971 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1975 master_th->th.th_ident = loc;
1978 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1979 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1980 call_context, microtask, invoker,
1981 master_set_numthreads, level,
1983 ompt_parallel_data, return_address,
1989 if (__kmp_tasking_mode != tskm_immediate_exec) {
1990 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1991 parent_team->t.t_task_team[master_th->th.th_task_state]);
2001 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2002 if ((!enter_teams &&
2003 (parent_team->t.t_active_level >=
2004 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2005 (__kmp_library == library_serial)) {
2006 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2009 nthreads = master_set_numthreads
2010 ? master_set_numthreads
2012 : get__nproc_2(parent_team, master_tid);
2015 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2022 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2027 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2028 nthreads, enter_teams);
2029 if (nthreads == 1) {
2033 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2037 KMP_DEBUG_ASSERT(nthreads > 0);
2040 master_th->th.th_set_nproc = 0;
2042 if (nthreads == 1) {
2043 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2044 invoker, master_th, parent_team,
2046 &ompt_parallel_data, &return_address,
2054 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2055 "curtask=%p, curtask_max_aclevel=%d\n",
2056 parent_team->t.t_active_level, master_th,
2057 master_th->th.th_current_task,
2058 master_th->th.th_current_task->td_icvs.max_active_levels));
2062 master_th->th.th_current_task->td_flags.executing = 0;
2064 if (!master_th->th.th_teams_microtask || level > teams_level) {
2066 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2070 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2071 if ((level + 1 < __kmp_nested_nth.used) &&
2072 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2073 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2079 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2081 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2082 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2083 proc_bind = proc_bind_false;
2087 if (proc_bind == proc_bind_default) {
2088 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2091 if (master_th->th.th_teams_microtask &&
2092 microtask == (microtask_t)__kmp_teams_master) {
2093 proc_bind = __kmp_teams_proc_bind;
2099 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2100 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2101 master_th->th.th_current_task->td_icvs.proc_bind)) {
2104 if (!master_th->th.th_teams_microtask ||
2105 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2106 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2111 master_th->th.th_set_proc_bind = proc_bind_default;
2113 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2114 kmp_internal_control_t new_icvs;
2115 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2116 new_icvs.next = NULL;
2117 if (nthreads_icv > 0) {
2118 new_icvs.nproc = nthreads_icv;
2120 if (proc_bind_icv != proc_bind_default) {
2121 new_icvs.proc_bind = proc_bind_icv;
2125 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2126 team = __kmp_allocate_team(root, nthreads, nthreads,
2130 proc_bind, &new_icvs,
2131 argc USE_NESTED_HOT_ARG(master_th));
2132 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2133 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2136 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2137 team = __kmp_allocate_team(root, nthreads, nthreads,
2142 &master_th->th.th_current_task->td_icvs,
2143 argc USE_NESTED_HOT_ARG(master_th));
2144 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2145 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2146 &master_th->th.th_current_task->td_icvs);
2149 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2152 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2153 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2154 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2155 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2156 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2158 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2161 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2163 if (!master_th->th.th_teams_microtask || level > teams_level) {
2164 int new_level = parent_team->t.t_level + 1;
2165 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2166 new_level = parent_team->t.t_active_level + 1;
2167 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2170 int new_level = parent_team->t.t_level;
2171 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2172 new_level = parent_team->t.t_active_level;
2173 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2175 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2177 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2179 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2180 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2183 propagateFPControl(team);
2185 if (ompd_state & OMPD_ENABLE_BP)
2186 ompd_bp_parallel_begin();
2189 if (__kmp_tasking_mode != tskm_immediate_exec) {
2192 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2193 parent_team->t.t_task_team[master_th->th.th_task_state]);
2194 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2195 "%p, new task_team %p / team %p\n",
2196 __kmp_gtid_from_thread(master_th),
2197 master_th->th.th_task_team, parent_team,
2198 team->t.t_task_team[master_th->th.th_task_state], team));
2200 if (active_level || master_th->th.th_task_team) {
2202 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2203 if (master_th->th.th_task_state_top >=
2204 master_th->th.th_task_state_stack_sz) {
2205 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2206 kmp_uint8 *old_stack, *new_stack;
2208 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2209 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2210 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2212 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2216 old_stack = master_th->th.th_task_state_memo_stack;
2217 master_th->th.th_task_state_memo_stack = new_stack;
2218 master_th->th.th_task_state_stack_sz = new_size;
2219 __kmp_free(old_stack);
2223 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2224 master_th->th.th_task_state;
2225 master_th->th.th_task_state_top++;
2226#if KMP_NESTED_HOT_TEAMS
2227 if (master_th->th.th_hot_teams &&
2228 active_level < __kmp_hot_teams_max_level &&
2229 team == master_th->th.th_hot_teams[active_level].hot_team) {
2231 master_th->th.th_task_state =
2233 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2236 master_th->th.th_task_state = 0;
2237#if KMP_NESTED_HOT_TEAMS
2241#if !KMP_NESTED_HOT_TEAMS
2242 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2243 (team == root->r.r_hot_team));
2249 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2250 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2252 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2253 (team->t.t_master_tid == 0 &&
2254 (team->t.t_parent == root->r.r_root_team ||
2255 team->t.t_parent->t.t_serialized)));
2259 argv = (
void **)team->t.t_argv;
2261 for (i = argc - 1; i >= 0; --i) {
2262 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2263 KMP_CHECK_UPDATE(*argv, new_argv);
2267 for (i = 0; i < argc; ++i) {
2269 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2274 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2275 if (!root->r.r_active)
2276 root->r.r_active = TRUE;
2278 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2279 __kmp_setup_icv_copy(team, nthreads,
2280 &master_th->th.th_current_task->td_icvs, loc);
2283 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2286 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2289 if (team->t.t_active_level == 1
2290 && !master_th->th.th_teams_microtask) {
2292 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2293 (__kmp_forkjoin_frames_mode == 3 ||
2294 __kmp_forkjoin_frames_mode == 1)) {
2295 kmp_uint64 tmp_time = 0;
2296 if (__itt_get_timestamp_ptr)
2297 tmp_time = __itt_get_timestamp();
2299 master_th->th.th_frame_time = tmp_time;
2300 if (__kmp_forkjoin_frames_mode == 3)
2301 team->t.t_region_time = tmp_time;
2305 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2306 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2308 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2314 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2317 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2318 root, team, master_th, gtid));
2321 if (__itt_stack_caller_create_ptr) {
2324 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2325 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2326 }
else if (parent_team->t.t_serialized) {
2331 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2332 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2340 __kmp_internal_fork(loc, gtid, team);
2341 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2342 "master_th=%p, gtid=%d\n",
2343 root, team, master_th, gtid));
2346 if (call_context == fork_context_gnu) {
2347 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2352 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2353 team->t.t_id, team->t.t_pkfn));
2356#if KMP_STATS_ENABLED
2360 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2364 if (!team->t.t_invoke(gtid)) {
2365 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2368#if KMP_STATS_ENABLED
2371 KMP_SET_THREAD_STATE(previous_state);
2375 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2376 team->t.t_id, team->t.t_pkfn));
2379 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2381 if (ompt_enabled.enabled) {
2382 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2390static inline void __kmp_join_restore_state(kmp_info_t *thread,
2393 thread->th.ompt_thread_info.state =
2394 ((team->t.t_serialized) ? ompt_state_work_serial
2395 : ompt_state_work_parallel);
2398static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2399 kmp_team_t *team, ompt_data_t *parallel_data,
2400 int flags,
void *codeptr) {
2401 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2402 if (ompt_enabled.ompt_callback_parallel_end) {
2403 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2404 parallel_data, &(task_info->task_data), flags, codeptr);
2407 task_info->frame.enter_frame = ompt_data_none;
2408 __kmp_join_restore_state(thread, team);
2412void __kmp_join_call(
ident_t *loc,
int gtid
2415 enum fork_context_e fork_context
2419 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2421 kmp_team_t *parent_team;
2422 kmp_info_t *master_th;
2426 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2429 master_th = __kmp_threads[gtid];
2430 root = master_th->th.th_root;
2431 team = master_th->th.th_team;
2432 parent_team = team->t.t_parent;
2434 master_th->th.th_ident = loc;
2437 void *team_microtask = (
void *)team->t.t_pkfn;
2441 if (ompt_enabled.enabled &&
2442 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2443 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2448 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2449 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2450 "th_task_team = %p\n",
2451 __kmp_gtid_from_thread(master_th), team,
2452 team->t.t_task_team[master_th->th.th_task_state],
2453 master_th->th.th_task_team));
2454 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2455 team->t.t_task_team[master_th->th.th_task_state]);
2459 if (team->t.t_serialized) {
2460 if (master_th->th.th_teams_microtask) {
2462 int level = team->t.t_level;
2463 int tlevel = master_th->th.th_teams_level;
2464 if (level == tlevel) {
2468 }
else if (level == tlevel + 1) {
2472 team->t.t_serialized++;
2478 if (ompt_enabled.enabled) {
2479 if (fork_context == fork_context_gnu) {
2480 __ompt_lw_taskteam_unlink(master_th);
2482 __kmp_join_restore_state(master_th, parent_team);
2489 master_active = team->t.t_master_active;
2494 __kmp_internal_join(loc, gtid, team);
2496 if (__itt_stack_caller_create_ptr) {
2497 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2499 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2500 team->t.t_stack_id = NULL;
2504 master_th->th.th_task_state =
2507 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2508 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2512 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2513 parent_team->t.t_stack_id = NULL;
2521 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2522 void *codeptr = team->t.ompt_team_info.master_return_address;
2527 if (team->t.t_active_level == 1 &&
2528 (!master_th->th.th_teams_microtask ||
2529 master_th->th.th_teams_size.nteams == 1)) {
2530 master_th->th.th_ident = loc;
2533 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2534 __kmp_forkjoin_frames_mode == 3)
2535 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2536 master_th->th.th_frame_time, 0, loc,
2537 master_th->th.th_team_nproc, 1);
2538 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2539 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2540 __kmp_itt_region_joined(gtid);
2544#if KMP_AFFINITY_SUPPORTED
2547 master_th->th.th_first_place = team->t.t_first_place;
2548 master_th->th.th_last_place = team->t.t_last_place;
2552 if (master_th->th.th_teams_microtask && !exit_teams &&
2553 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2554 team->t.t_level == master_th->th.th_teams_level + 1) {
2559 ompt_data_t ompt_parallel_data = ompt_data_none;
2560 if (ompt_enabled.enabled) {
2561 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2562 if (ompt_enabled.ompt_callback_implicit_task) {
2563 int ompt_team_size = team->t.t_nproc;
2564 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2565 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2566 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2568 task_info->frame.exit_frame = ompt_data_none;
2569 task_info->task_data = ompt_data_none;
2570 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2571 __ompt_lw_taskteam_unlink(master_th);
2576 team->t.t_active_level--;
2577 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2583 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2584 int old_num = master_th->th.th_team_nproc;
2585 int new_num = master_th->th.th_teams_size.nth;
2586 kmp_info_t **other_threads = team->t.t_threads;
2587 team->t.t_nproc = new_num;
2588 for (
int i = 0; i < old_num; ++i) {
2589 other_threads[i]->th.th_team_nproc = new_num;
2592 for (
int i = old_num; i < new_num; ++i) {
2594 KMP_DEBUG_ASSERT(other_threads[i]);
2595 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2596 for (
int b = 0; b < bs_last_barrier; ++b) {
2597 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2598 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2600 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2603 if (__kmp_tasking_mode != tskm_immediate_exec) {
2605 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2611 if (ompt_enabled.enabled) {
2612 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2613 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2621 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2622 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2624 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2629 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2631 if (!master_th->th.th_teams_microtask ||
2632 team->t.t_level > master_th->th.th_teams_level) {
2634 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2636 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2639 if (ompt_enabled.enabled) {
2640 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2641 if (ompt_enabled.ompt_callback_implicit_task) {
2642 int flags = (team_microtask == (
void *)__kmp_teams_master)
2644 : ompt_task_implicit;
2645 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2646 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2647 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2648 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2650 task_info->frame.exit_frame = ompt_data_none;
2651 task_info->task_data = ompt_data_none;
2655 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2657 __kmp_pop_current_task_from_thread(master_th);
2659 master_th->th.th_def_allocator = team->t.t_def_allocator;
2662 if (ompd_state & OMPD_ENABLE_BP)
2663 ompd_bp_parallel_end();
2665 updateHWFPControl(team);
2667 if (root->r.r_active != master_active)
2668 root->r.r_active = master_active;
2670 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2678 master_th->th.th_team = parent_team;
2679 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2680 master_th->th.th_team_master = parent_team->t.t_threads[0];
2681 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2684 if (parent_team->t.t_serialized &&
2685 parent_team != master_th->th.th_serial_team &&
2686 parent_team != root->r.r_root_team) {
2687 __kmp_free_team(root,
2688 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2689 master_th->th.th_serial_team = parent_team;
2692 if (__kmp_tasking_mode != tskm_immediate_exec) {
2693 if (master_th->th.th_task_state_top >
2695 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2697 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2698 master_th->th.th_task_state;
2699 --master_th->th.th_task_state_top;
2701 master_th->th.th_task_state =
2703 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2704 }
else if (team != root->r.r_hot_team) {
2709 master_th->th.th_task_state = 0;
2712 master_th->th.th_task_team =
2713 parent_team->t.t_task_team[master_th->th.th_task_state];
2715 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2716 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2723 master_th->th.th_current_task->td_flags.executing = 1;
2725 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2727#if KMP_AFFINITY_SUPPORTED
2728 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2729 __kmp_reset_root_init_mask(gtid);
2734 OMPT_INVOKER(fork_context) |
2735 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2736 : ompt_parallel_team);
2737 if (ompt_enabled.enabled) {
2738 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2744 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2749void __kmp_save_internal_controls(kmp_info_t *thread) {
2751 if (thread->th.th_team != thread->th.th_serial_team) {
2754 if (thread->th.th_team->t.t_serialized > 1) {
2757 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2760 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2761 thread->th.th_team->t.t_serialized) {
2766 kmp_internal_control_t *control =
2767 (kmp_internal_control_t *)__kmp_allocate(
2768 sizeof(kmp_internal_control_t));
2770 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2772 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2774 control->next = thread->th.th_team->t.t_control_stack_top;
2775 thread->th.th_team->t.t_control_stack_top = control;
2781void __kmp_set_num_threads(
int new_nth,
int gtid) {
2785 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2786 KMP_DEBUG_ASSERT(__kmp_init_serial);
2790 else if (new_nth > __kmp_max_nth)
2791 new_nth = __kmp_max_nth;
2794 thread = __kmp_threads[gtid];
2795 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2798 __kmp_save_internal_controls(thread);
2800 set__nproc(thread, new_nth);
2805 root = thread->th.th_root;
2806 if (__kmp_init_parallel && (!root->r.r_active) &&
2807 (root->r.r_hot_team->t.t_nproc > new_nth)
2808#
if KMP_NESTED_HOT_TEAMS
2809 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2812 kmp_team_t *hot_team = root->r.r_hot_team;
2815 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2817 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2818 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2821 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2822 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2823 if (__kmp_tasking_mode != tskm_immediate_exec) {
2826 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2828 __kmp_free_thread(hot_team->t.t_threads[f]);
2829 hot_team->t.t_threads[f] = NULL;
2831 hot_team->t.t_nproc = new_nth;
2832#if KMP_NESTED_HOT_TEAMS
2833 if (thread->th.th_hot_teams) {
2834 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2835 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2839 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2840 hot_team->t.b->update_num_threads(new_nth);
2841 __kmp_add_threads_to_team(hot_team, new_nth);
2844 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2847 for (f = 0; f < new_nth; f++) {
2848 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2849 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2852 hot_team->t.t_size_changed = -1;
2857void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2860 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2862 gtid, max_active_levels));
2863 KMP_DEBUG_ASSERT(__kmp_init_serial);
2866 if (max_active_levels < 0) {
2867 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2872 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2873 "max_active_levels for thread %d = (%d)\n",
2874 gtid, max_active_levels));
2877 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2882 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2883 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2884 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2890 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2891 "max_active_levels for thread %d = (%d)\n",
2892 gtid, max_active_levels));
2894 thread = __kmp_threads[gtid];
2896 __kmp_save_internal_controls(thread);
2898 set__max_active_levels(thread, max_active_levels);
2902int __kmp_get_max_active_levels(
int gtid) {
2905 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2906 KMP_DEBUG_ASSERT(__kmp_init_serial);
2908 thread = __kmp_threads[gtid];
2909 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2910 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2911 "curtask_maxaclevel=%d\n",
2912 gtid, thread->th.th_current_task,
2913 thread->th.th_current_task->td_icvs.max_active_levels));
2914 return thread->th.th_current_task->td_icvs.max_active_levels;
2918void __kmp_set_num_teams(
int num_teams) {
2920 __kmp_nteams = num_teams;
2922int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2924void __kmp_set_teams_thread_limit(
int limit) {
2926 __kmp_teams_thread_limit = limit;
2928int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2930KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2931KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2934void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2936 kmp_sched_t orig_kind;
2939 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2940 gtid, (
int)kind, chunk));
2941 KMP_DEBUG_ASSERT(__kmp_init_serial);
2948 kind = __kmp_sched_without_mods(kind);
2950 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2951 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2953 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2954 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2956 kind = kmp_sched_default;
2960 thread = __kmp_threads[gtid];
2962 __kmp_save_internal_controls(thread);
2964 if (kind < kmp_sched_upper_std) {
2965 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2968 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2970 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2971 __kmp_sch_map[kind - kmp_sched_lower - 1];
2976 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2977 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2978 kmp_sched_lower - 2];
2980 __kmp_sched_apply_mods_intkind(
2981 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2982 if (kind == kmp_sched_auto || chunk < 1) {
2984 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2986 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2991void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2995 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2996 KMP_DEBUG_ASSERT(__kmp_init_serial);
2998 thread = __kmp_threads[gtid];
3000 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3001 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3003 case kmp_sch_static_greedy:
3004 case kmp_sch_static_balanced:
3005 *kind = kmp_sched_static;
3006 __kmp_sched_apply_mods_stdkind(kind, th_type);
3009 case kmp_sch_static_chunked:
3010 *kind = kmp_sched_static;
3012 case kmp_sch_dynamic_chunked:
3013 *kind = kmp_sched_dynamic;
3016 case kmp_sch_guided_iterative_chunked:
3017 case kmp_sch_guided_analytical_chunked:
3018 *kind = kmp_sched_guided;
3021 *kind = kmp_sched_auto;
3023 case kmp_sch_trapezoidal:
3024 *kind = kmp_sched_trapezoidal;
3026#if KMP_STATIC_STEAL_ENABLED
3027 case kmp_sch_static_steal:
3028 *kind = kmp_sched_static_steal;
3032 KMP_FATAL(UnknownSchedulingType, th_type);
3035 __kmp_sched_apply_mods_stdkind(kind, th_type);
3036 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3039int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3045 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3046 KMP_DEBUG_ASSERT(__kmp_init_serial);
3053 thr = __kmp_threads[gtid];
3054 team = thr->th.th_team;
3055 ii = team->t.t_level;
3059 if (thr->th.th_teams_microtask) {
3061 int tlevel = thr->th.th_teams_level;
3064 KMP_DEBUG_ASSERT(ii >= tlevel);
3076 return __kmp_tid_from_gtid(gtid);
3078 dd = team->t.t_serialized;
3080 while (ii > level) {
3081 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3083 if ((team->t.t_serialized) && (!dd)) {
3084 team = team->t.t_parent;
3088 team = team->t.t_parent;
3089 dd = team->t.t_serialized;
3094 return (dd > 1) ? (0) : (team->t.t_master_tid);
3097int __kmp_get_team_size(
int gtid,
int level) {
3103 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3104 KMP_DEBUG_ASSERT(__kmp_init_serial);
3111 thr = __kmp_threads[gtid];
3112 team = thr->th.th_team;
3113 ii = team->t.t_level;
3117 if (thr->th.th_teams_microtask) {
3119 int tlevel = thr->th.th_teams_level;
3122 KMP_DEBUG_ASSERT(ii >= tlevel);
3133 while (ii > level) {
3134 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3136 if (team->t.t_serialized && (!dd)) {
3137 team = team->t.t_parent;
3141 team = team->t.t_parent;
3146 return team->t.t_nproc;
3149kmp_r_sched_t __kmp_get_schedule_global() {
3154 kmp_r_sched_t r_sched;
3160 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3161 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3164 r_sched.r_sched_type = __kmp_static;
3167 r_sched.r_sched_type = __kmp_guided;
3169 r_sched.r_sched_type = __kmp_sched;
3171 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3173 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3175 r_sched.chunk = KMP_DEFAULT_CHUNK;
3177 r_sched.chunk = __kmp_chunk;
3185static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3187 KMP_DEBUG_ASSERT(team);
3188 if (!realloc || argc > team->t.t_max_argc) {
3190 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3191 "current entries=%d\n",
3192 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3194 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3195 __kmp_free((
void *)team->t.t_argv);
3197 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3199 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3200 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3202 team->t.t_id, team->t.t_max_argc));
3203 team->t.t_argv = &team->t.t_inline_argv[0];
3204 if (__kmp_storage_map) {
3205 __kmp_print_storage_map_gtid(
3206 -1, &team->t.t_inline_argv[0],
3207 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3208 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3213 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3214 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3216 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3218 team->t.t_id, team->t.t_max_argc));
3220 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3221 if (__kmp_storage_map) {
3222 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3223 &team->t.t_argv[team->t.t_max_argc],
3224 sizeof(
void *) * team->t.t_max_argc,
3225 "team_%d.t_argv", team->t.t_id);
3231static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3233 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3235 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3236 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3237 sizeof(dispatch_shared_info_t) * num_disp_buff);
3238 team->t.t_dispatch =
3239 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3240 team->t.t_implicit_task_taskdata =
3241 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3242 team->t.t_max_nproc = max_nth;
3245 for (i = 0; i < num_disp_buff; ++i) {
3246 team->t.t_disp_buffer[i].buffer_index = i;
3247 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3251static void __kmp_free_team_arrays(kmp_team_t *team) {
3254 for (i = 0; i < team->t.t_max_nproc; ++i) {
3255 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3256 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3257 team->t.t_dispatch[i].th_disp_buffer = NULL;
3260#if KMP_USE_HIER_SCHED
3261 __kmp_dispatch_free_hierarchies(team);
3263 __kmp_free(team->t.t_threads);
3264 __kmp_free(team->t.t_disp_buffer);
3265 __kmp_free(team->t.t_dispatch);
3266 __kmp_free(team->t.t_implicit_task_taskdata);
3267 team->t.t_threads = NULL;
3268 team->t.t_disp_buffer = NULL;
3269 team->t.t_dispatch = NULL;
3270 team->t.t_implicit_task_taskdata = 0;
3273static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3274 kmp_info_t **oldThreads = team->t.t_threads;
3276 __kmp_free(team->t.t_disp_buffer);
3277 __kmp_free(team->t.t_dispatch);
3278 __kmp_free(team->t.t_implicit_task_taskdata);
3279 __kmp_allocate_team_arrays(team, max_nth);
3281 KMP_MEMCPY(team->t.t_threads, oldThreads,
3282 team->t.t_nproc *
sizeof(kmp_info_t *));
3284 __kmp_free(oldThreads);
3287static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3289 kmp_r_sched_t r_sched =
3290 __kmp_get_schedule_global();
3292 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3294 kmp_internal_control_t g_icvs = {
3296 (kmp_int8)__kmp_global.g.g_dynamic,
3298 (kmp_int8)__kmp_env_blocktime,
3300 __kmp_dflt_blocktime,
3305 __kmp_dflt_team_nth,
3311 __kmp_dflt_max_active_levels,
3315 __kmp_nested_proc_bind.bind_types[0],
3316 __kmp_default_device,
3323static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3325 kmp_internal_control_t gx_icvs;
3326 gx_icvs.serial_nesting_level =
3328 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3329 gx_icvs.next = NULL;
3334static void __kmp_initialize_root(kmp_root_t *root) {
3336 kmp_team_t *root_team;
3337 kmp_team_t *hot_team;
3338 int hot_team_max_nth;
3339 kmp_r_sched_t r_sched =
3340 __kmp_get_schedule_global();
3341 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3342 KMP_DEBUG_ASSERT(root);
3343 KMP_ASSERT(!root->r.r_begin);
3346 __kmp_init_lock(&root->r.r_begin_lock);
3347 root->r.r_begin = FALSE;
3348 root->r.r_active = FALSE;
3349 root->r.r_in_parallel = 0;
3350 root->r.r_blocktime = __kmp_dflt_blocktime;
3351#if KMP_AFFINITY_SUPPORTED
3352 root->r.r_affinity_assigned = FALSE;
3357 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3360 __kmp_allocate_team(root,
3366 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3368 USE_NESTED_HOT_ARG(NULL)
3373 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3376 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3378 root->r.r_root_team = root_team;
3379 root_team->t.t_control_stack_top = NULL;
3382 root_team->t.t_threads[0] = NULL;
3383 root_team->t.t_nproc = 1;
3384 root_team->t.t_serialized = 1;
3386 root_team->t.t_sched.sched = r_sched.sched;
3389 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3390 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3394 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3397 __kmp_allocate_team(root,
3399 __kmp_dflt_team_nth_ub * 2,
3403 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3405 USE_NESTED_HOT_ARG(NULL)
3407 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3409 root->r.r_hot_team = hot_team;
3410 root_team->t.t_control_stack_top = NULL;
3413 hot_team->t.t_parent = root_team;
3416 hot_team_max_nth = hot_team->t.t_max_nproc;
3417 for (f = 0; f < hot_team_max_nth; ++f) {
3418 hot_team->t.t_threads[f] = NULL;
3420 hot_team->t.t_nproc = 1;
3422 hot_team->t.t_sched.sched = r_sched.sched;
3423 hot_team->t.t_size_changed = 0;
3428typedef struct kmp_team_list_item {
3429 kmp_team_p
const *entry;
3430 struct kmp_team_list_item *next;
3431} kmp_team_list_item_t;
3432typedef kmp_team_list_item_t *kmp_team_list_t;
3434static void __kmp_print_structure_team_accum(
3435 kmp_team_list_t list,
3436 kmp_team_p
const *team
3446 KMP_DEBUG_ASSERT(list != NULL);
3451 __kmp_print_structure_team_accum(list, team->t.t_parent);
3452 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3456 while (l->next != NULL && l->entry != team) {
3459 if (l->next != NULL) {
3465 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3471 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3472 sizeof(kmp_team_list_item_t));
3479static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3482 __kmp_printf(
"%s", title);
3484 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3486 __kmp_printf(
" - (nil)\n");
3490static void __kmp_print_structure_thread(
char const *title,
3491 kmp_info_p
const *thread) {
3492 __kmp_printf(
"%s", title);
3493 if (thread != NULL) {
3494 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3496 __kmp_printf(
" - (nil)\n");
3500void __kmp_print_structure(
void) {
3502 kmp_team_list_t list;
3506 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3510 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3511 "Table\n------------------------------\n");
3514 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3515 __kmp_printf(
"%2d", gtid);
3516 if (__kmp_threads != NULL) {
3517 __kmp_printf(
" %p", __kmp_threads[gtid]);
3519 if (__kmp_root != NULL) {
3520 __kmp_printf(
" %p", __kmp_root[gtid]);
3527 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3529 if (__kmp_threads != NULL) {
3531 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3532 kmp_info_t
const *thread = __kmp_threads[gtid];
3533 if (thread != NULL) {
3534 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3535 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3536 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3537 __kmp_print_structure_team(
" Serial Team: ",
3538 thread->th.th_serial_team);
3539 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3540 __kmp_print_structure_thread(
" Primary: ",
3541 thread->th.th_team_master);
3542 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3543 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3544 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3545 __kmp_print_structure_thread(
" Next in pool: ",
3546 thread->th.th_next_pool);
3548 __kmp_print_structure_team_accum(list, thread->th.th_team);
3549 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3553 __kmp_printf(
"Threads array is not allocated.\n");
3557 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3559 if (__kmp_root != NULL) {
3561 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3562 kmp_root_t
const *root = __kmp_root[gtid];
3564 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3565 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3566 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3567 __kmp_print_structure_thread(
" Uber Thread: ",
3568 root->r.r_uber_thread);
3569 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3570 __kmp_printf(
" In Parallel: %2d\n",
3571 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3573 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3574 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3578 __kmp_printf(
"Ubers array is not allocated.\n");
3581 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3583 while (list->next != NULL) {
3584 kmp_team_p
const *team = list->entry;
3586 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3587 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3588 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3589 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3590 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3591 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3592 for (i = 0; i < team->t.t_nproc; ++i) {
3593 __kmp_printf(
" Thread %2d: ", i);
3594 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3596 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3602 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3604 __kmp_print_structure_thread(
"Thread pool: ",
3605 CCAST(kmp_info_t *, __kmp_thread_pool));
3606 __kmp_print_structure_team(
"Team pool: ",
3607 CCAST(kmp_team_t *, __kmp_team_pool));
3611 while (list != NULL) {
3612 kmp_team_list_item_t *item = list;
3614 KMP_INTERNAL_FREE(item);
3623static const unsigned __kmp_primes[] = {
3624 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3625 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3626 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3627 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3628 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3629 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3630 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3631 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3632 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3633 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3634 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3638unsigned short __kmp_get_random(kmp_info_t *thread) {
3639 unsigned x = thread->th.th_x;
3640 unsigned short r = (
unsigned short)(x >> 16);
3642 thread->th.th_x = x * thread->th.th_a + 1;
3644 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3645 thread->th.th_info.ds.ds_tid, r));
3651void __kmp_init_random(kmp_info_t *thread) {
3652 unsigned seed = thread->th.th_info.ds.ds_tid;
3655 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3656 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3658 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3664static int __kmp_reclaim_dead_roots(
void) {
3667 for (i = 0; i < __kmp_threads_capacity; ++i) {
3668 if (KMP_UBER_GTID(i) &&
3669 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3672 r += __kmp_unregister_root_other_thread(i);
3697static int __kmp_expand_threads(
int nNeed) {
3699 int minimumRequiredCapacity;
3701 kmp_info_t **newThreads;
3702 kmp_root_t **newRoot;
3708#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3711 added = __kmp_reclaim_dead_roots();
3740 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3743 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3747 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3749 newCapacity = __kmp_threads_capacity;
3751 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3752 : __kmp_sys_max_nth;
3753 }
while (newCapacity < minimumRequiredCapacity);
3754 newThreads = (kmp_info_t **)__kmp_allocate(
3755 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3757 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3758 KMP_MEMCPY(newThreads, __kmp_threads,
3759 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3760 KMP_MEMCPY(newRoot, __kmp_root,
3761 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3764 kmp_old_threads_list_t *node =
3765 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3766 node->threads = __kmp_threads;
3767 node->next = __kmp_old_threads_list;
3768 __kmp_old_threads_list = node;
3770 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3771 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3772 added += newCapacity - __kmp_threads_capacity;
3773 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3775 if (newCapacity > __kmp_tp_capacity) {
3776 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3777 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3778 __kmp_threadprivate_resize_cache(newCapacity);
3780 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3782 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3791int __kmp_register_root(
int initial_thread) {
3792 kmp_info_t *root_thread;
3796 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3797 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3814 capacity = __kmp_threads_capacity;
3815 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3822 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3823 capacity -= __kmp_hidden_helper_threads_num;
3827 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3828 if (__kmp_tp_cached) {
3829 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3830 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3831 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3833 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3843 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3846 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3847 gtid <= __kmp_hidden_helper_threads_num;
3850 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3851 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3852 "hidden helper thread: T#%d\n",
3858 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3861 for (gtid = __kmp_hidden_helper_threads_num + 1;
3862 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3866 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3867 KMP_ASSERT(gtid < __kmp_threads_capacity);
3872 TCW_4(__kmp_nth, __kmp_nth + 1);
3876 if (__kmp_adjust_gtid_mode) {
3877 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3878 if (TCR_4(__kmp_gtid_mode) != 2) {
3879 TCW_4(__kmp_gtid_mode, 2);
3882 if (TCR_4(__kmp_gtid_mode) != 1) {
3883 TCW_4(__kmp_gtid_mode, 1);
3888#ifdef KMP_ADJUST_BLOCKTIME
3891 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3892 if (__kmp_nth > __kmp_avail_proc) {
3893 __kmp_zero_bt = TRUE;
3899 if (!(root = __kmp_root[gtid])) {
3900 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3901 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3904#if KMP_STATS_ENABLED
3906 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3907 __kmp_stats_thread_ptr->startLife();
3908 KMP_SET_THREAD_STATE(SERIAL_REGION);
3911 __kmp_initialize_root(root);
3914 if (root->r.r_uber_thread) {
3915 root_thread = root->r.r_uber_thread;
3917 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3918 if (__kmp_storage_map) {
3919 __kmp_print_thread_storage_map(root_thread, gtid);
3921 root_thread->th.th_info.ds.ds_gtid = gtid;
3923 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3925 root_thread->th.th_root = root;
3926 if (__kmp_env_consistency_check) {
3927 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3930 __kmp_initialize_fast_memory(root_thread);
3934 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3935 __kmp_initialize_bget(root_thread);
3937 __kmp_init_random(root_thread);
3941 if (!root_thread->th.th_serial_team) {
3942 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3943 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3944 root_thread->th.th_serial_team = __kmp_allocate_team(
3949 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3951 KMP_ASSERT(root_thread->th.th_serial_team);
3952 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3953 root_thread->th.th_serial_team));
3956 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3958 root->r.r_root_team->t.t_threads[0] = root_thread;
3959 root->r.r_hot_team->t.t_threads[0] = root_thread;
3960 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3962 root_thread->th.th_serial_team->t.t_serialized = 0;
3963 root->r.r_uber_thread = root_thread;
3966 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3967 TCW_4(__kmp_init_gtid, TRUE);
3970 __kmp_gtid_set_specific(gtid);
3973 __kmp_itt_thread_name(gtid);
3976#ifdef KMP_TDATA_GTID
3979 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3980 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3982 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3984 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3985 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3986 KMP_INIT_BARRIER_STATE));
3989 for (b = 0; b < bs_last_barrier; ++b) {
3990 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3992 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3996 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3997 KMP_INIT_BARRIER_STATE);
3999#if KMP_AFFINITY_SUPPORTED
4000 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4001 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4002 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4003 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4005 root_thread->th.th_def_allocator = __kmp_def_allocator;
4006 root_thread->th.th_prev_level = 0;
4007 root_thread->th.th_prev_num_threads = 1;
4009 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4010 tmp->cg_root = root_thread;
4011 tmp->cg_thread_limit = __kmp_cg_max_nth;
4012 tmp->cg_nthreads = 1;
4013 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4014 " cg_nthreads init to 1\n",
4017 root_thread->th.th_cg_roots = tmp;
4019 __kmp_root_counter++;
4022 if (!initial_thread && ompt_enabled.enabled) {
4024 kmp_info_t *root_thread = ompt_get_thread();
4026 ompt_set_thread_state(root_thread, ompt_state_overhead);
4028 if (ompt_enabled.ompt_callback_thread_begin) {
4029 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4030 ompt_thread_initial, __ompt_get_thread_data_internal());
4032 ompt_data_t *task_data;
4033 ompt_data_t *parallel_data;
4034 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4036 if (ompt_enabled.ompt_callback_implicit_task) {
4037 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4038 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4041 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4045 if (ompd_state & OMPD_ENABLE_BP)
4046 ompd_bp_thread_begin();
4050 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4055#if KMP_NESTED_HOT_TEAMS
4056static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4057 const int max_level) {
4059 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4060 if (!hot_teams || !hot_teams[level].hot_team) {
4063 KMP_DEBUG_ASSERT(level < max_level);
4064 kmp_team_t *team = hot_teams[level].hot_team;
4065 nth = hot_teams[level].hot_team_nth;
4067 if (level < max_level - 1) {
4068 for (i = 0; i < nth; ++i) {
4069 kmp_info_t *th = team->t.t_threads[i];
4070 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4071 if (i > 0 && th->th.th_hot_teams) {
4072 __kmp_free(th->th.th_hot_teams);
4073 th->th.th_hot_teams = NULL;
4077 __kmp_free_team(root, team, NULL);
4084static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4085 kmp_team_t *root_team = root->r.r_root_team;
4086 kmp_team_t *hot_team = root->r.r_hot_team;
4087 int n = hot_team->t.t_nproc;
4090 KMP_DEBUG_ASSERT(!root->r.r_active);
4092 root->r.r_root_team = NULL;
4093 root->r.r_hot_team = NULL;
4096 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4097#if KMP_NESTED_HOT_TEAMS
4098 if (__kmp_hot_teams_max_level >
4100 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4101 kmp_info_t *th = hot_team->t.t_threads[i];
4102 if (__kmp_hot_teams_max_level > 1) {
4103 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4105 if (th->th.th_hot_teams) {
4106 __kmp_free(th->th.th_hot_teams);
4107 th->th.th_hot_teams = NULL;
4112 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4117 if (__kmp_tasking_mode != tskm_immediate_exec) {
4118 __kmp_wait_to_unref_task_teams();
4124 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4126 (LPVOID) & (root->r.r_uber_thread->th),
4127 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4128 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4132 if (ompd_state & OMPD_ENABLE_BP)
4133 ompd_bp_thread_end();
4137 ompt_data_t *task_data;
4138 ompt_data_t *parallel_data;
4139 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4141 if (ompt_enabled.ompt_callback_implicit_task) {
4142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4143 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4145 if (ompt_enabled.ompt_callback_thread_end) {
4146 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4147 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4153 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4154 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4156 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4157 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4160 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4161 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4162 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4163 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4164 root->r.r_uber_thread->th.th_cg_roots = NULL;
4166 __kmp_reap_thread(root->r.r_uber_thread, 1);
4170 root->r.r_uber_thread = NULL;
4172 root->r.r_begin = FALSE;
4177void __kmp_unregister_root_current_thread(
int gtid) {
4178 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4182 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4183 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4184 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4187 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4190 kmp_root_t *root = __kmp_root[gtid];
4192 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4193 KMP_ASSERT(KMP_UBER_GTID(gtid));
4194 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4195 KMP_ASSERT(root->r.r_active == FALSE);
4199 kmp_info_t *thread = __kmp_threads[gtid];
4200 kmp_team_t *team = thread->th.th_team;
4201 kmp_task_team_t *task_team = thread->th.th_task_team;
4204 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4205 task_team->tt.tt_hidden_helper_task_encountered)) {
4208 thread->th.ompt_thread_info.state = ompt_state_undefined;
4210 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4213 __kmp_reset_root(gtid, root);
4217 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4219 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4226static int __kmp_unregister_root_other_thread(
int gtid) {
4227 kmp_root_t *root = __kmp_root[gtid];
4230 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4231 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4232 KMP_ASSERT(KMP_UBER_GTID(gtid));
4233 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4234 KMP_ASSERT(root->r.r_active == FALSE);
4236 r = __kmp_reset_root(gtid, root);
4238 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4244void __kmp_task_info() {
4246 kmp_int32 gtid = __kmp_entry_gtid();
4247 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4248 kmp_info_t *this_thr = __kmp_threads[gtid];
4249 kmp_team_t *steam = this_thr->th.th_serial_team;
4250 kmp_team_t *team = this_thr->th.th_team;
4253 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4255 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4256 team->t.t_implicit_task_taskdata[tid].td_parent);
4263static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4264 int tid,
int gtid) {
4268 KMP_DEBUG_ASSERT(this_thr != NULL);
4269 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4270 KMP_DEBUG_ASSERT(team);
4271 KMP_DEBUG_ASSERT(team->t.t_threads);
4272 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4273 kmp_info_t *master = team->t.t_threads[0];
4274 KMP_DEBUG_ASSERT(master);
4275 KMP_DEBUG_ASSERT(master->th.th_root);
4279 TCW_SYNC_PTR(this_thr->th.th_team, team);
4281 this_thr->th.th_info.ds.ds_tid = tid;
4282 this_thr->th.th_set_nproc = 0;
4283 if (__kmp_tasking_mode != tskm_immediate_exec)
4286 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4288 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4289 this_thr->th.th_set_proc_bind = proc_bind_default;
4290#if KMP_AFFINITY_SUPPORTED
4291 this_thr->th.th_new_place = this_thr->th.th_current_place;
4293 this_thr->th.th_root = master->th.th_root;
4296 this_thr->th.th_team_nproc = team->t.t_nproc;
4297 this_thr->th.th_team_master = master;
4298 this_thr->th.th_team_serialized = team->t.t_serialized;
4300 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4302 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4303 tid, gtid, this_thr, this_thr->th.th_current_task));
4305 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4308 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4309 tid, gtid, this_thr, this_thr->th.th_current_task));
4314 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4316 this_thr->th.th_local.this_construct = 0;
4318 if (!this_thr->th.th_pri_common) {
4319 this_thr->th.th_pri_common =
4320 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4321 if (__kmp_storage_map) {
4322 __kmp_print_storage_map_gtid(
4323 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4324 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4326 this_thr->th.th_pri_head = NULL;
4329 if (this_thr != master &&
4330 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4332 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4333 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4336 int i = tmp->cg_nthreads--;
4337 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4338 " on node %p of thread %p to %d\n",
4339 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4344 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4346 this_thr->th.th_cg_roots->cg_nthreads++;
4347 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4348 " node %p of thread %p to %d\n",
4349 this_thr, this_thr->th.th_cg_roots,
4350 this_thr->th.th_cg_roots->cg_root,
4351 this_thr->th.th_cg_roots->cg_nthreads));
4352 this_thr->th.th_current_task->td_icvs.thread_limit =
4353 this_thr->th.th_cg_roots->cg_thread_limit;
4358 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4361 sizeof(dispatch_private_info_t) *
4362 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4363 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4364 team->t.t_max_nproc));
4365 KMP_ASSERT(dispatch);
4366 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4367 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4369 dispatch->th_disp_index = 0;
4370 dispatch->th_doacross_buf_idx = 0;
4371 if (!dispatch->th_disp_buffer) {
4372 dispatch->th_disp_buffer =
4373 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4375 if (__kmp_storage_map) {
4376 __kmp_print_storage_map_gtid(
4377 gtid, &dispatch->th_disp_buffer[0],
4378 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4380 : __kmp_dispatch_num_buffers],
4382 "th_%d.th_dispatch.th_disp_buffer "
4383 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4384 gtid, team->t.t_id, gtid);
4387 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4390 dispatch->th_dispatch_pr_current = 0;
4391 dispatch->th_dispatch_sh_current = 0;
4393 dispatch->th_deo_fcn = 0;
4394 dispatch->th_dxo_fcn = 0;
4397 this_thr->th.th_next_pool = NULL;
4399 if (!this_thr->th.th_task_state_memo_stack) {
4401 this_thr->th.th_task_state_memo_stack =
4402 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4403 this_thr->th.th_task_state_top = 0;
4404 this_thr->th.th_task_state_stack_sz = 4;
4405 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4407 this_thr->th.th_task_state_memo_stack[i] = 0;
4410 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4411 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4421kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4423 kmp_team_t *serial_team;
4424 kmp_info_t *new_thr;
4427 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4428 KMP_DEBUG_ASSERT(root && team);
4429#if !KMP_NESTED_HOT_TEAMS
4430 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4437 if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {
4438 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4439 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4440 if (new_thr == __kmp_thread_pool_insert_pt) {
4441 __kmp_thread_pool_insert_pt = NULL;
4443 TCW_4(new_thr->th.th_in_pool, FALSE);
4444 __kmp_suspend_initialize_thread(new_thr);
4445 __kmp_lock_suspend_mx(new_thr);
4446 if (new_thr->th.th_active_in_pool == TRUE) {
4447 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4448 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4449 new_thr->th.th_active_in_pool = FALSE;
4451 __kmp_unlock_suspend_mx(new_thr);
4453 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4454 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4455 KMP_ASSERT(!new_thr->th.th_team);
4456 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4459 __kmp_initialize_info(new_thr, team, new_tid,
4460 new_thr->th.th_info.ds.ds_gtid);
4461 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4463 TCW_4(__kmp_nth, __kmp_nth + 1);
4465 new_thr->th.th_task_state = 0;
4466 new_thr->th.th_task_state_top = 0;
4467 new_thr->th.th_task_state_stack_sz = 4;
4469 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4471 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4475#ifdef KMP_ADJUST_BLOCKTIME
4478 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4479 if (__kmp_nth > __kmp_avail_proc) {
4480 __kmp_zero_bt = TRUE;
4489 kmp_balign_t *balign = new_thr->th.th_bar;
4490 for (b = 0; b < bs_last_barrier; ++b)
4491 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4494 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4495 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4502 KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);
4503 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4508 if (!TCR_4(__kmp_init_monitor)) {
4509 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4510 if (!TCR_4(__kmp_init_monitor)) {
4511 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4512 TCW_4(__kmp_init_monitor, 1);
4513 __kmp_create_monitor(&__kmp_monitor);
4514 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4525 while (TCR_4(__kmp_init_monitor) < 2) {
4528 KF_TRACE(10, (
"after monitor thread has started\n"));
4531 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4538 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4540 : __kmp_hidden_helper_threads_num + 1;
4542 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4544 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4547 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4548 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4553 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4555 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4557#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4560 __itt_suppress_mark_range(
4561 __itt_suppress_range, __itt_suppress_threading_errors,
4562 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4563 __itt_suppress_mark_range(
4564 __itt_suppress_range, __itt_suppress_threading_errors,
4565 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4567 __itt_suppress_mark_range(
4568 __itt_suppress_range, __itt_suppress_threading_errors,
4569 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4571 __itt_suppress_mark_range(__itt_suppress_range,
4572 __itt_suppress_threading_errors,
4573 &new_thr->th.th_suspend_init_count,
4574 sizeof(new_thr->th.th_suspend_init_count));
4577 __itt_suppress_mark_range(__itt_suppress_range,
4578 __itt_suppress_threading_errors,
4579 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4580 sizeof(new_thr->th.th_bar[0].bb.b_go));
4581 __itt_suppress_mark_range(__itt_suppress_range,
4582 __itt_suppress_threading_errors,
4583 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4584 sizeof(new_thr->th.th_bar[1].bb.b_go));
4585 __itt_suppress_mark_range(__itt_suppress_range,
4586 __itt_suppress_threading_errors,
4587 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4588 sizeof(new_thr->th.th_bar[2].bb.b_go));
4590 if (__kmp_storage_map) {
4591 __kmp_print_thread_storage_map(new_thr, new_gtid);
4596 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4597 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4598 new_thr->th.th_serial_team = serial_team =
4599 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4603 proc_bind_default, &r_icvs,
4604 0 USE_NESTED_HOT_ARG(NULL));
4606 KMP_ASSERT(serial_team);
4607 serial_team->t.t_serialized = 0;
4609 serial_team->t.t_threads[0] = new_thr;
4611 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4615 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4618 __kmp_initialize_fast_memory(new_thr);
4622 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4623 __kmp_initialize_bget(new_thr);
4626 __kmp_init_random(new_thr);
4630 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4631 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4634 kmp_balign_t *balign = new_thr->th.th_bar;
4635 for (b = 0; b < bs_last_barrier; ++b) {
4636 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4637 balign[b].bb.team = NULL;
4638 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4639 balign[b].bb.use_oncore_barrier = 0;
4642 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4643 new_thr->th.th_sleep_loc_type = flag_unset;
4645 new_thr->th.th_spin_here = FALSE;
4646 new_thr->th.th_next_waiting = 0;
4648 new_thr->th.th_blocking =
false;
4651#if KMP_AFFINITY_SUPPORTED
4652 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4653 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4654 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4655 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4657 new_thr->th.th_def_allocator = __kmp_def_allocator;
4658 new_thr->th.th_prev_level = 0;
4659 new_thr->th.th_prev_num_threads = 1;
4661 TCW_4(new_thr->th.th_in_pool, FALSE);
4662 new_thr->th.th_active_in_pool = FALSE;
4663 TCW_4(new_thr->th.th_active, TRUE);
4671 if (__kmp_adjust_gtid_mode) {
4672 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4673 if (TCR_4(__kmp_gtid_mode) != 2) {
4674 TCW_4(__kmp_gtid_mode, 2);
4677 if (TCR_4(__kmp_gtid_mode) != 1) {
4678 TCW_4(__kmp_gtid_mode, 1);
4683#ifdef KMP_ADJUST_BLOCKTIME
4686 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4687 if (__kmp_nth > __kmp_avail_proc) {
4688 __kmp_zero_bt = TRUE;
4693#if KMP_AFFINITY_SUPPORTED
4695 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4700 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4701 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4703 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4705 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4716static void __kmp_reinitialize_team(kmp_team_t *team,
4717 kmp_internal_control_t *new_icvs,
4719 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4720 team->t.t_threads[0], team));
4721 KMP_DEBUG_ASSERT(team && new_icvs);
4722 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4723 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4725 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4727 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4728 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4730 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4731 team->t.t_threads[0], team));
4737static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4738 kmp_internal_control_t *new_icvs,
4740 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4743 KMP_DEBUG_ASSERT(team);
4744 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4745 KMP_DEBUG_ASSERT(team->t.t_threads);
4748 team->t.t_master_tid = 0;
4750 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4751 team->t.t_nproc = new_nproc;
4754 team->t.t_next_pool = NULL;
4758 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4759 team->t.t_invoke = NULL;
4762 team->t.t_sched.sched = new_icvs->sched.sched;
4764#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4765 team->t.t_fp_control_saved = FALSE;
4766 team->t.t_x87_fpu_control_word = 0;
4767 team->t.t_mxcsr = 0;
4770 team->t.t_construct = 0;
4772 team->t.t_ordered.dt.t_value = 0;
4773 team->t.t_master_active = FALSE;
4776 team->t.t_copypriv_data = NULL;
4779 team->t.t_copyin_counter = 0;
4782 team->t.t_control_stack_top = NULL;
4784 __kmp_reinitialize_team(team, new_icvs, loc);
4787 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4790#if KMP_AFFINITY_SUPPORTED
4791static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4792 int first,
int last,
int newp) {
4793 th->th.th_first_place = first;
4794 th->th.th_last_place = last;
4795 th->th.th_new_place = newp;
4796 if (newp != th->th.th_current_place) {
4797 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4798 team->t.t_display_affinity = 1;
4800 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4801 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4809static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4811 if (KMP_HIDDEN_HELPER_TEAM(team))
4814 kmp_info_t *master_th = team->t.t_threads[0];
4815 KMP_DEBUG_ASSERT(master_th != NULL);
4816 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4817 int first_place = master_th->th.th_first_place;
4818 int last_place = master_th->th.th_last_place;
4819 int masters_place = master_th->th.th_current_place;
4820 int num_masks = __kmp_affinity.num_masks;
4821 team->t.t_first_place = first_place;
4822 team->t.t_last_place = last_place;
4824 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4825 "bound to place %d partition = [%d,%d]\n",
4826 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4827 team->t.t_id, masters_place, first_place, last_place));
4829 switch (proc_bind) {
4831 case proc_bind_default:
4834 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4837 case proc_bind_primary: {
4839 int n_th = team->t.t_nproc;
4840 for (f = 1; f < n_th; f++) {
4841 kmp_info_t *th = team->t.t_threads[f];
4842 KMP_DEBUG_ASSERT(th != NULL);
4843 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4845 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4846 "partition = [%d,%d]\n",
4847 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4848 f, masters_place, first_place, last_place));
4852 case proc_bind_close: {
4854 int n_th = team->t.t_nproc;
4856 if (first_place <= last_place) {
4857 n_places = last_place - first_place + 1;
4859 n_places = num_masks - first_place + last_place + 1;
4861 if (n_th <= n_places) {
4862 int place = masters_place;
4863 for (f = 1; f < n_th; f++) {
4864 kmp_info_t *th = team->t.t_threads[f];
4865 KMP_DEBUG_ASSERT(th != NULL);
4867 if (place == last_place) {
4868 place = first_place;
4869 }
else if (place == (num_masks - 1)) {
4874 __kmp_set_thread_place(team, th, first_place, last_place, place);
4876 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4877 "partition = [%d,%d]\n",
4878 __kmp_gtid_from_thread(team->t.t_threads[f]),
4879 team->t.t_id, f, place, first_place, last_place));
4882 int S, rem, gap, s_count;
4883 S = n_th / n_places;
4885 rem = n_th - (S * n_places);
4886 gap = rem > 0 ? n_places / rem : n_places;
4887 int place = masters_place;
4889 for (f = 0; f < n_th; f++) {
4890 kmp_info_t *th = team->t.t_threads[f];
4891 KMP_DEBUG_ASSERT(th != NULL);
4893 __kmp_set_thread_place(team, th, first_place, last_place, place);
4896 if ((s_count == S) && rem && (gap_ct == gap)) {
4898 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4900 if (place == last_place) {
4901 place = first_place;
4902 }
else if (place == (num_masks - 1)) {
4910 }
else if (s_count == S) {
4911 if (place == last_place) {
4912 place = first_place;
4913 }
else if (place == (num_masks - 1)) {
4923 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4924 "partition = [%d,%d]\n",
4925 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4926 th->th.th_new_place, first_place, last_place));
4928 KMP_DEBUG_ASSERT(place == masters_place);
4932 case proc_bind_spread: {
4934 int n_th = team->t.t_nproc;
4937 if (first_place <= last_place) {
4938 n_places = last_place - first_place + 1;
4940 n_places = num_masks - first_place + last_place + 1;
4942 if (n_th <= n_places) {
4945 if (n_places != num_masks) {
4946 int S = n_places / n_th;
4947 int s_count, rem, gap, gap_ct;
4949 place = masters_place;
4950 rem = n_places - n_th * S;
4951 gap = rem ? n_th / rem : 1;
4954 if (update_master_only == 1)
4956 for (f = 0; f < thidx; f++) {
4957 kmp_info_t *th = team->t.t_threads[f];
4958 KMP_DEBUG_ASSERT(th != NULL);
4960 int fplace = place, nplace = place;
4962 while (s_count < S) {
4963 if (place == last_place) {
4964 place = first_place;
4965 }
else if (place == (num_masks - 1)) {
4972 if (rem && (gap_ct == gap)) {
4973 if (place == last_place) {
4974 place = first_place;
4975 }
else if (place == (num_masks - 1)) {
4983 __kmp_set_thread_place(team, th, fplace, place, nplace);
4986 if (place == last_place) {
4987 place = first_place;
4988 }
else if (place == (num_masks - 1)) {
4995 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4996 "partition = [%d,%d], num_masks: %u\n",
4997 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4998 f, th->th.th_new_place, th->th.th_first_place,
4999 th->th.th_last_place, num_masks));
5005 double current =
static_cast<double>(masters_place);
5007 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5012 if (update_master_only == 1)
5014 for (f = 0; f < thidx; f++) {
5015 first =
static_cast<int>(current);
5016 last =
static_cast<int>(current + spacing) - 1;
5017 KMP_DEBUG_ASSERT(last >= first);
5018 if (first >= n_places) {
5019 if (masters_place) {
5022 if (first == (masters_place + 1)) {
5023 KMP_DEBUG_ASSERT(f == n_th);
5026 if (last == masters_place) {
5027 KMP_DEBUG_ASSERT(f == (n_th - 1));
5031 KMP_DEBUG_ASSERT(f == n_th);
5036 if (last >= n_places) {
5037 last = (n_places - 1);
5042 KMP_DEBUG_ASSERT(0 <= first);
5043 KMP_DEBUG_ASSERT(n_places > first);
5044 KMP_DEBUG_ASSERT(0 <= last);
5045 KMP_DEBUG_ASSERT(n_places > last);
5046 KMP_DEBUG_ASSERT(last_place >= first_place);
5047 th = team->t.t_threads[f];
5048 KMP_DEBUG_ASSERT(th);
5049 __kmp_set_thread_place(team, th, first, last, place);
5051 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5052 "partition = [%d,%d], spacing = %.4f\n",
5053 __kmp_gtid_from_thread(team->t.t_threads[f]),
5054 team->t.t_id, f, th->th.th_new_place,
5055 th->th.th_first_place, th->th.th_last_place, spacing));
5059 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5061 int S, rem, gap, s_count;
5062 S = n_th / n_places;
5064 rem = n_th - (S * n_places);
5065 gap = rem > 0 ? n_places / rem : n_places;
5066 int place = masters_place;
5069 if (update_master_only == 1)
5071 for (f = 0; f < thidx; f++) {
5072 kmp_info_t *th = team->t.t_threads[f];
5073 KMP_DEBUG_ASSERT(th != NULL);
5075 __kmp_set_thread_place(team, th, place, place, place);
5078 if ((s_count == S) && rem && (gap_ct == gap)) {
5080 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5082 if (place == last_place) {
5083 place = first_place;
5084 }
else if (place == (num_masks - 1)) {
5092 }
else if (s_count == S) {
5093 if (place == last_place) {
5094 place = first_place;
5095 }
else if (place == (num_masks - 1)) {
5104 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5105 "partition = [%d,%d]\n",
5106 __kmp_gtid_from_thread(team->t.t_threads[f]),
5107 team->t.t_id, f, th->th.th_new_place,
5108 th->th.th_first_place, th->th.th_last_place));
5110 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5118 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5126__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5128 ompt_data_t ompt_parallel_data,
5130 kmp_proc_bind_t new_proc_bind,
5131 kmp_internal_control_t *new_icvs,
5132 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5133 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5136 int use_hot_team = !root->r.r_active;
5138 int do_place_partition = 1;
5140 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5141 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5142 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5145#if KMP_NESTED_HOT_TEAMS
5146 kmp_hot_team_ptr_t *hot_teams;
5148 team = master->th.th_team;
5149 level = team->t.t_active_level;
5150 if (master->th.th_teams_microtask) {
5151 if (master->th.th_teams_size.nteams > 1 &&
5154 (microtask_t)__kmp_teams_master ||
5155 master->th.th_teams_level <
5162 if ((master->th.th_teams_size.nteams == 1 &&
5163 master->th.th_teams_level >= team->t.t_level) ||
5164 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5165 do_place_partition = 0;
5167 hot_teams = master->th.th_hot_teams;
5168 if (level < __kmp_hot_teams_max_level && hot_teams &&
5169 hot_teams[level].hot_team) {
5177 KMP_DEBUG_ASSERT(new_nproc == 1);
5181 if (use_hot_team && new_nproc > 1) {
5182 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5183#if KMP_NESTED_HOT_TEAMS
5184 team = hot_teams[level].hot_team;
5186 team = root->r.r_hot_team;
5189 if (__kmp_tasking_mode != tskm_immediate_exec) {
5190 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5191 "task_team[1] = %p before reinit\n",
5192 team->t.t_task_team[0], team->t.t_task_team[1]));
5196 if (team->t.t_nproc != new_nproc &&
5197 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5199 int old_nthr = team->t.t_nproc;
5200 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5205 if (do_place_partition == 0)
5206 team->t.t_proc_bind = proc_bind_default;
5210 if (team->t.t_nproc == new_nproc) {
5211 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5214 if (team->t.t_size_changed == -1) {
5215 team->t.t_size_changed = 1;
5217 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5221 kmp_r_sched_t new_sched = new_icvs->sched;
5223 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5225 __kmp_reinitialize_team(team, new_icvs,
5226 root->r.r_uber_thread->th.th_ident);
5228 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5229 team->t.t_threads[0], team));
5230 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5232#if KMP_AFFINITY_SUPPORTED
5233 if ((team->t.t_size_changed == 0) &&
5234 (team->t.t_proc_bind == new_proc_bind)) {
5235 if (new_proc_bind == proc_bind_spread) {
5236 if (do_place_partition) {
5238 __kmp_partition_places(team, 1);
5241 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5242 "proc_bind = %d, partition = [%d,%d]\n",
5243 team->t.t_id, new_proc_bind, team->t.t_first_place,
5244 team->t.t_last_place));
5246 if (do_place_partition) {
5247 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5248 __kmp_partition_places(team);
5252 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5254 }
else if (team->t.t_nproc > new_nproc) {
5256 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5259 team->t.t_size_changed = 1;
5260 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5263 __kmp_add_threads_to_team(team, new_nproc);
5265#if KMP_NESTED_HOT_TEAMS
5266 if (__kmp_hot_teams_mode == 0) {
5269 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5270 hot_teams[level].hot_team_nth = new_nproc;
5273 for (f = new_nproc; f < team->t.t_nproc; f++) {
5274 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5275 if (__kmp_tasking_mode != tskm_immediate_exec) {
5278 team->t.t_threads[f]->th.th_task_team = NULL;
5280 __kmp_free_thread(team->t.t_threads[f]);
5281 team->t.t_threads[f] = NULL;
5283#if KMP_NESTED_HOT_TEAMS
5288 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5289 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5290 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5291 for (
int b = 0; b < bs_last_barrier; ++b) {
5292 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5293 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5295 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5300 team->t.t_nproc = new_nproc;
5302 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5303 __kmp_reinitialize_team(team, new_icvs,
5304 root->r.r_uber_thread->th.th_ident);
5307 for (f = 0; f < new_nproc; ++f) {
5308 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5313 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5314 team->t.t_threads[0], team));
5316 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5319 for (f = 0; f < team->t.t_nproc; f++) {
5320 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5321 team->t.t_threads[f]->th.th_team_nproc ==
5326 if (do_place_partition) {
5327 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5328#if KMP_AFFINITY_SUPPORTED
5329 __kmp_partition_places(team);
5335 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5337 int old_nproc = team->t.t_nproc;
5338 team->t.t_size_changed = 1;
5340#if KMP_NESTED_HOT_TEAMS
5341 int avail_threads = hot_teams[level].hot_team_nth;
5342 if (new_nproc < avail_threads)
5343 avail_threads = new_nproc;
5344 kmp_info_t **other_threads = team->t.t_threads;
5345 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5349 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5350 for (b = 0; b < bs_last_barrier; ++b) {
5351 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5352 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5354 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5358 if (hot_teams[level].hot_team_nth >= new_nproc) {
5361 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5362 team->t.t_nproc = new_nproc;
5366 team->t.t_nproc = hot_teams[level].hot_team_nth;
5367 hot_teams[level].hot_team_nth = new_nproc;
5369 if (team->t.t_max_nproc < new_nproc) {
5371 __kmp_reallocate_team_arrays(team, new_nproc);
5372 __kmp_reinitialize_team(team, new_icvs, NULL);
5375#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5376 KMP_AFFINITY_SUPPORTED
5382 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5386 for (f = team->t.t_nproc; f < new_nproc; f++) {
5387 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5388 KMP_DEBUG_ASSERT(new_worker);
5389 team->t.t_threads[f] = new_worker;
5392 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5393 "join=%llu, plain=%llu\n",
5394 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5395 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5396 team->t.t_bar[bs_plain_barrier].b_arrived));
5400 kmp_balign_t *balign = new_worker->th.th_bar;
5401 for (b = 0; b < bs_last_barrier; ++b) {
5402 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5403 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5404 KMP_BARRIER_PARENT_FLAG);
5406 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5412#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \
5413 KMP_AFFINITY_SUPPORTED
5415 new_temp_affinity.restore();
5417#if KMP_NESTED_HOT_TEAMS
5420 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5423 __kmp_add_threads_to_team(team, new_nproc);
5427 __kmp_initialize_team(team, new_nproc, new_icvs,
5428 root->r.r_uber_thread->th.th_ident);
5431 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5432 for (f = 0; f < team->t.t_nproc; ++f)
5433 __kmp_initialize_info(team->t.t_threads[f], team, f,
5434 __kmp_gtid_from_tid(f, team));
5437 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5438 for (f = old_nproc; f < team->t.t_nproc; ++f)
5439 team->t.t_threads[f]->th.th_task_state = old_state;
5442 for (f = 0; f < team->t.t_nproc; ++f) {
5443 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5444 team->t.t_threads[f]->th.th_team_nproc ==
5449 if (do_place_partition) {
5450 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5451#if KMP_AFFINITY_SUPPORTED
5452 __kmp_partition_places(team);
5457 kmp_info_t *master = team->t.t_threads[0];
5458 if (master->th.th_teams_microtask) {
5459 for (f = 1; f < new_nproc; ++f) {
5461 kmp_info_t *thr = team->t.t_threads[f];
5462 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5463 thr->th.th_teams_level = master->th.th_teams_level;
5464 thr->th.th_teams_size = master->th.th_teams_size;
5467#if KMP_NESTED_HOT_TEAMS
5471 for (f = 1; f < new_nproc; ++f) {
5472 kmp_info_t *thr = team->t.t_threads[f];
5474 kmp_balign_t *balign = thr->th.th_bar;
5475 for (b = 0; b < bs_last_barrier; ++b) {
5476 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5477 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5479 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5487 __kmp_alloc_argv_entries(argc, team, TRUE);
5488 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5492 KF_TRACE(10, (
" hot_team = %p\n", team));
5495 if (__kmp_tasking_mode != tskm_immediate_exec) {
5496 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5497 "task_team[1] = %p after reinit\n",
5498 team->t.t_task_team[0], team->t.t_task_team[1]));
5503 __ompt_team_assign_id(team, ompt_parallel_data);
5513 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5516 if (team->t.t_max_nproc >= max_nproc) {
5518 __kmp_team_pool = team->t.t_next_pool;
5520 if (max_nproc > 1 &&
5521 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5523 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5528 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5530 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5531 "task_team[1] %p to NULL\n",
5532 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5533 team->t.t_task_team[0] = NULL;
5534 team->t.t_task_team[1] = NULL;
5537 __kmp_alloc_argv_entries(argc, team, TRUE);
5538 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5541 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5542 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5545 for (b = 0; b < bs_last_barrier; ++b) {
5546 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5548 team->t.t_bar[b].b_master_arrived = 0;
5549 team->t.t_bar[b].b_team_arrived = 0;
5554 team->t.t_proc_bind = new_proc_bind;
5556 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5560 __ompt_team_assign_id(team, ompt_parallel_data);
5572 team = __kmp_reap_team(team);
5573 __kmp_team_pool = team;
5578 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5581 team->t.t_max_nproc = max_nproc;
5582 if (max_nproc > 1 &&
5583 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5585 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5590 __kmp_allocate_team_arrays(team, max_nproc);
5592 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5593 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5595 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5597 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5598 team->t.t_task_team[0] = NULL;
5600 team->t.t_task_team[1] = NULL;
5603 if (__kmp_storage_map) {
5604 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5608 __kmp_alloc_argv_entries(argc, team, FALSE);
5609 team->t.t_argc = argc;
5612 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5613 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5616 for (b = 0; b < bs_last_barrier; ++b) {
5617 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5619 team->t.t_bar[b].b_master_arrived = 0;
5620 team->t.t_bar[b].b_team_arrived = 0;
5625 team->t.t_proc_bind = new_proc_bind;
5628 __ompt_team_assign_id(team, ompt_parallel_data);
5629 team->t.ompt_serialized_team_info = NULL;
5634 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5645void __kmp_free_team(kmp_root_t *root,
5646 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5648 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5652 KMP_DEBUG_ASSERT(root);
5653 KMP_DEBUG_ASSERT(team);
5654 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5655 KMP_DEBUG_ASSERT(team->t.t_threads);
5657 int use_hot_team = team == root->r.r_hot_team;
5658#if KMP_NESTED_HOT_TEAMS
5661 level = team->t.t_active_level - 1;
5662 if (master->th.th_teams_microtask) {
5663 if (master->th.th_teams_size.nteams > 1) {
5667 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5668 master->th.th_teams_level == team->t.t_level) {
5674 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5676 if (level < __kmp_hot_teams_max_level) {
5677 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5684 TCW_SYNC_PTR(team->t.t_pkfn,
5687 team->t.t_copyin_counter = 0;
5692 if (!use_hot_team) {
5693 if (__kmp_tasking_mode != tskm_immediate_exec) {
5695 for (f = 1; f < team->t.t_nproc; ++f) {
5696 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5697 kmp_info_t *th = team->t.t_threads[f];
5698 volatile kmp_uint32 *state = &th->th.th_reap_state;
5699 while (*state != KMP_SAFE_TO_REAP) {
5703 if (!__kmp_is_thread_alive(th, &ecode)) {
5704 *state = KMP_SAFE_TO_REAP;
5709 if (th->th.th_sleep_loc)
5710 __kmp_null_resume_wrapper(th);
5717 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5718 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5719 if (task_team != NULL) {
5720 for (f = 0; f < team->t.t_nproc; ++f) {
5721 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5722 team->t.t_threads[f]->th.th_task_team = NULL;
5726 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5727 __kmp_get_gtid(), task_team, team->t.t_id));
5728#if KMP_NESTED_HOT_TEAMS
5729 __kmp_free_task_team(master, task_team);
5731 team->t.t_task_team[tt_idx] = NULL;
5737 team->t.t_parent = NULL;
5738 team->t.t_level = 0;
5739 team->t.t_active_level = 0;
5742 for (f = 1; f < team->t.t_nproc; ++f) {
5743 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5744 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5745 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5748 __kmp_free_thread(team->t.t_threads[f]);
5751 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5754 team->t.b->go_release();
5755 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5756 for (f = 1; f < team->t.t_nproc; ++f) {
5757 if (team->t.b->sleep[f].sleep) {
5758 __kmp_atomic_resume_64(
5759 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5760 (kmp_atomic_flag_64<> *)NULL);
5765 for (
int f = 1; f < team->t.t_nproc; ++f) {
5766 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5772 for (f = 1; f < team->t.t_nproc; ++f) {
5773 team->t.t_threads[f] = NULL;
5776 if (team->t.t_max_nproc > 1 &&
5777 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5778 distributedBarrier::deallocate(team->t.b);
5783 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5784 __kmp_team_pool = (
volatile kmp_team_t *)team;
5787 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5788 team->t.t_threads[1]->th.th_cg_roots);
5789 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5791 for (f = 1; f < team->t.t_nproc; ++f) {
5792 kmp_info_t *thr = team->t.t_threads[f];
5793 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5794 thr->th.th_cg_roots->cg_root == thr);
5796 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5797 thr->th.th_cg_roots = tmp->up;
5798 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5799 " up to node %p. cg_nthreads was %d\n",
5800 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5801 int i = tmp->cg_nthreads--;
5806 if (thr->th.th_cg_roots)
5807 thr->th.th_current_task->td_icvs.thread_limit =
5808 thr->th.th_cg_roots->cg_thread_limit;
5817kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5818 kmp_team_t *next_pool = team->t.t_next_pool;
5820 KMP_DEBUG_ASSERT(team);
5821 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5822 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5823 KMP_DEBUG_ASSERT(team->t.t_threads);
5824 KMP_DEBUG_ASSERT(team->t.t_argv);
5829 __kmp_free_team_arrays(team);
5830 if (team->t.t_argv != &team->t.t_inline_argv[0])
5831 __kmp_free((
void *)team->t.t_argv);
5863void __kmp_free_thread(kmp_info_t *this_th) {
5867 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5868 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5870 KMP_DEBUG_ASSERT(this_th);
5875 kmp_balign_t *balign = this_th->th.th_bar;
5876 for (b = 0; b < bs_last_barrier; ++b) {
5877 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5878 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5879 balign[b].bb.team = NULL;
5880 balign[b].bb.leaf_kids = 0;
5882 this_th->th.th_task_state = 0;
5883 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5886 TCW_PTR(this_th->th.th_team, NULL);
5887 TCW_PTR(this_th->th.th_root, NULL);
5888 TCW_PTR(this_th->th.th_dispatch, NULL);
5890 while (this_th->th.th_cg_roots) {
5891 this_th->th.th_cg_roots->cg_nthreads--;
5892 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5893 " %p of thread %p to %d\n",
5894 this_th, this_th->th.th_cg_roots,
5895 this_th->th.th_cg_roots->cg_root,
5896 this_th->th.th_cg_roots->cg_nthreads));
5897 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5898 if (tmp->cg_root == this_th) {
5899 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5901 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5902 this_th->th.th_cg_roots = tmp->up;
5905 if (tmp->cg_nthreads == 0) {
5908 this_th->th.th_cg_roots = NULL;
5918 __kmp_free_implicit_task(this_th);
5919 this_th->th.th_current_task = NULL;
5923 gtid = this_th->th.th_info.ds.ds_gtid;
5924 if (__kmp_thread_pool_insert_pt != NULL) {
5925 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5926 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5927 __kmp_thread_pool_insert_pt = NULL;
5936 if (__kmp_thread_pool_insert_pt != NULL) {
5937 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5939 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5941 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5942 scan = &((*scan)->th.th_next_pool))
5947 TCW_PTR(this_th->th.th_next_pool, *scan);
5948 __kmp_thread_pool_insert_pt = *scan = this_th;
5949 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5950 (this_th->th.th_info.ds.ds_gtid <
5951 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5952 TCW_4(this_th->th.th_in_pool, TRUE);
5953 __kmp_suspend_initialize_thread(this_th);
5954 __kmp_lock_suspend_mx(this_th);
5955 if (this_th->th.th_active == TRUE) {
5956 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5957 this_th->th.th_active_in_pool = TRUE;
5961 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5964 __kmp_unlock_suspend_mx(this_th);
5966 TCW_4(__kmp_nth, __kmp_nth - 1);
5968#ifdef KMP_ADJUST_BLOCKTIME
5971 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5972 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5973 if (__kmp_nth <= __kmp_avail_proc) {
5974 __kmp_zero_bt = FALSE;
5984void *__kmp_launch_thread(kmp_info_t *this_thr) {
5985#if OMP_PROFILING_SUPPORT
5986 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5988 if (ProfileTraceFile)
5989 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5992 int gtid = this_thr->th.th_info.ds.ds_gtid;
5994 kmp_team_t **
volatile pteam;
5997 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5999 if (__kmp_env_consistency_check) {
6000 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6004 if (ompd_state & OMPD_ENABLE_BP)
6005 ompd_bp_thread_begin();
6009 ompt_data_t *thread_data =
nullptr;
6010 if (ompt_enabled.enabled) {
6011 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6012 *thread_data = ompt_data_none;
6014 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6015 this_thr->th.ompt_thread_info.wait_id = 0;
6016 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6017 this_thr->th.ompt_thread_info.parallel_flags = 0;
6018 if (ompt_enabled.ompt_callback_thread_begin) {
6019 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6020 ompt_thread_worker, thread_data);
6022 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6027 while (!TCR_4(__kmp_global.g.g_done)) {
6028 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6032 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6035 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6038 if (ompt_enabled.enabled) {
6039 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6043 pteam = &this_thr->th.th_team;
6046 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6048 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6051 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6052 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6053 (*pteam)->t.t_pkfn));
6055 updateHWFPControl(*pteam);
6058 if (ompt_enabled.enabled) {
6059 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6063 rc = (*pteam)->t.t_invoke(gtid);
6067 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6068 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6069 (*pteam)->t.t_pkfn));
6072 if (ompt_enabled.enabled) {
6074 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6076 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6080 __kmp_join_barrier(gtid);
6085 if (ompd_state & OMPD_ENABLE_BP)
6086 ompd_bp_thread_end();
6090 if (ompt_enabled.ompt_callback_thread_end) {
6091 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6095 this_thr->th.th_task_team = NULL;
6097 __kmp_common_destroy_gtid(gtid);
6099 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6102#if OMP_PROFILING_SUPPORT
6103 llvm::timeTraceProfilerFinishThread();
6110void __kmp_internal_end_dest(
void *specific_gtid) {
6113 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6115 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6119 __kmp_internal_end_thread(gtid);
6122#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6124__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6125 __kmp_internal_end_atexit();
6132void __kmp_internal_end_atexit(
void) {
6133 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6157 __kmp_internal_end_library(-1);
6159 __kmp_close_console();
6163static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6168 KMP_DEBUG_ASSERT(thread != NULL);
6170 gtid = thread->th.th_info.ds.ds_gtid;
6173 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6176 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6178 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6180 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6182 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6186 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6188 __kmp_release_64(&flag);
6193 __kmp_reap_worker(thread);
6205 if (thread->th.th_active_in_pool) {
6206 thread->th.th_active_in_pool = FALSE;
6207 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6208 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6212 __kmp_free_implicit_task(thread);
6216 __kmp_free_fast_memory(thread);
6219 __kmp_suspend_uninitialize_thread(thread);
6221 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6222 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6227#ifdef KMP_ADJUST_BLOCKTIME
6230 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6231 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6232 if (__kmp_nth <= __kmp_avail_proc) {
6233 __kmp_zero_bt = FALSE;
6239 if (__kmp_env_consistency_check) {
6240 if (thread->th.th_cons) {
6241 __kmp_free_cons_stack(thread->th.th_cons);
6242 thread->th.th_cons = NULL;
6246 if (thread->th.th_pri_common != NULL) {
6247 __kmp_free(thread->th.th_pri_common);
6248 thread->th.th_pri_common = NULL;
6251 if (thread->th.th_task_state_memo_stack != NULL) {
6252 __kmp_free(thread->th.th_task_state_memo_stack);
6253 thread->th.th_task_state_memo_stack = NULL;
6257 if (thread->th.th_local.bget_data != NULL) {
6258 __kmp_finalize_bget(thread);
6262#if KMP_AFFINITY_SUPPORTED
6263 if (thread->th.th_affin_mask != NULL) {
6264 KMP_CPU_FREE(thread->th.th_affin_mask);
6265 thread->th.th_affin_mask = NULL;
6269#if KMP_USE_HIER_SCHED
6270 if (thread->th.th_hier_bar_data != NULL) {
6271 __kmp_free(thread->th.th_hier_bar_data);
6272 thread->th.th_hier_bar_data = NULL;
6276 __kmp_reap_team(thread->th.th_serial_team);
6277 thread->th.th_serial_team = NULL;
6284static void __kmp_itthash_clean(kmp_info_t *th) {
6286 if (__kmp_itt_region_domains.count > 0) {
6287 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6288 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6290 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6291 __kmp_thread_free(th, bucket);
6296 if (__kmp_itt_barrier_domains.count > 0) {
6297 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6298 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6300 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6301 __kmp_thread_free(th, bucket);
6309static void __kmp_internal_end(
void) {
6313 __kmp_unregister_library();
6320 __kmp_reclaim_dead_roots();
6324 for (i = 0; i < __kmp_threads_capacity; i++)
6326 if (__kmp_root[i]->r.r_active)
6329 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6331 if (i < __kmp_threads_capacity) {
6343 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6344 if (TCR_4(__kmp_init_monitor)) {
6345 __kmp_reap_monitor(&__kmp_monitor);
6346 TCW_4(__kmp_init_monitor, 0);
6348 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6349 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6355 for (i = 0; i < __kmp_threads_capacity; i++) {
6356 if (__kmp_root[i]) {
6359 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6368 while (__kmp_thread_pool != NULL) {
6370 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6371 __kmp_thread_pool = thread->th.th_next_pool;
6373 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6374 thread->th.th_next_pool = NULL;
6375 thread->th.th_in_pool = FALSE;
6376 __kmp_reap_thread(thread, 0);
6378 __kmp_thread_pool_insert_pt = NULL;
6381 while (__kmp_team_pool != NULL) {
6383 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6384 __kmp_team_pool = team->t.t_next_pool;
6386 team->t.t_next_pool = NULL;
6387 __kmp_reap_team(team);
6390 __kmp_reap_task_teams();
6397 for (i = 0; i < __kmp_threads_capacity; i++) {
6398 kmp_info_t *thr = __kmp_threads[i];
6399 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6404 for (i = 0; i < __kmp_threads_capacity; ++i) {
6411 TCW_SYNC_4(__kmp_init_common, FALSE);
6413 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6421 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6422 if (TCR_4(__kmp_init_monitor)) {
6423 __kmp_reap_monitor(&__kmp_monitor);
6424 TCW_4(__kmp_init_monitor, 0);
6426 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6427 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6430 TCW_4(__kmp_init_gtid, FALSE);
6439void __kmp_internal_end_library(
int gtid_req) {
6446 if (__kmp_global.g.g_abort) {
6447 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6451 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6452 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6457 if (TCR_4(__kmp_init_hidden_helper) &&
6458 !TCR_4(__kmp_hidden_helper_team_done)) {
6459 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6461 __kmp_hidden_helper_main_thread_release();
6463 __kmp_hidden_helper_threads_deinitz_wait();
6469 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6471 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6472 if (gtid == KMP_GTID_SHUTDOWN) {
6473 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6474 "already shutdown\n"));
6476 }
else if (gtid == KMP_GTID_MONITOR) {
6477 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6478 "registered, or system shutdown\n"));
6480 }
else if (gtid == KMP_GTID_DNE) {
6481 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6484 }
else if (KMP_UBER_GTID(gtid)) {
6486 if (__kmp_root[gtid]->r.r_active) {
6487 __kmp_global.g.g_abort = -1;
6488 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6489 __kmp_unregister_library();
6491 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6495 __kmp_itthash_clean(__kmp_threads[gtid]);
6498 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6499 __kmp_unregister_root_current_thread(gtid);
6506#ifdef DUMP_DEBUG_ON_EXIT
6507 if (__kmp_debug_buf)
6508 __kmp_dump_debug_buffer();
6513 __kmp_unregister_library();
6518 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6521 if (__kmp_global.g.g_abort) {
6522 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6524 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6527 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6528 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6537 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6540 __kmp_internal_end();
6542 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6543 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6545 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6547#ifdef DUMP_DEBUG_ON_EXIT
6548 if (__kmp_debug_buf)
6549 __kmp_dump_debug_buffer();
6553 __kmp_close_console();
6556 __kmp_fini_allocator();
6560void __kmp_internal_end_thread(
int gtid_req) {
6569 if (__kmp_global.g.g_abort) {
6570 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6574 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6575 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6580 if (TCR_4(__kmp_init_hidden_helper) &&
6581 !TCR_4(__kmp_hidden_helper_team_done)) {
6582 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6584 __kmp_hidden_helper_main_thread_release();
6586 __kmp_hidden_helper_threads_deinitz_wait();
6593 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6595 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6596 if (gtid == KMP_GTID_SHUTDOWN) {
6597 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6598 "already shutdown\n"));
6600 }
else if (gtid == KMP_GTID_MONITOR) {
6601 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6602 "registered, or system shutdown\n"));
6604 }
else if (gtid == KMP_GTID_DNE) {
6605 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6609 }
else if (KMP_UBER_GTID(gtid)) {
6611 if (__kmp_root[gtid]->r.r_active) {
6612 __kmp_global.g.g_abort = -1;
6613 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6615 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6619 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6621 __kmp_unregister_root_current_thread(gtid);
6625 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6628 __kmp_threads[gtid]->th.th_task_team = NULL;
6632 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6638 if (__kmp_pause_status != kmp_hard_paused)
6642 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6647 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6650 if (__kmp_global.g.g_abort) {
6651 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6653 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6656 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6657 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6668 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6670 for (i = 0; i < __kmp_threads_capacity; ++i) {
6671 if (KMP_UBER_GTID(i)) {
6674 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6675 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6676 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6683 __kmp_internal_end();
6685 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6686 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6688 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6690#ifdef DUMP_DEBUG_ON_EXIT
6691 if (__kmp_debug_buf)
6692 __kmp_dump_debug_buffer();
6699static long __kmp_registration_flag = 0;
6701static char *__kmp_registration_str = NULL;
6704static inline char *__kmp_reg_status_name() {
6710#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6711 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6714 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6718#if defined(KMP_USE_SHM)
6719bool __kmp_shm_available =
false;
6720bool __kmp_tmp_available =
false;
6722char *temp_reg_status_file_name =
nullptr;
6725void __kmp_register_library_startup(
void) {
6727 char *name = __kmp_reg_status_name();
6733#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6734 __kmp_initialize_system_tick();
6736 __kmp_read_system_time(&time.dtime);
6737 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6738 __kmp_registration_str =
6739 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6740 __kmp_registration_flag, KMP_LIBRARY_FILE);
6742 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6743 __kmp_registration_str));
6749#if defined(KMP_USE_SHM)
6750 char *shm_name =
nullptr;
6751 char *data1 =
nullptr;
6752 __kmp_shm_available = __kmp_detect_shm();
6753 if (__kmp_shm_available) {
6755 shm_name = __kmp_str_format(
"/%s", name);
6756 int shm_preexist = 0;
6757 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6758 if ((fd1 == -1) && (errno == EEXIST)) {
6761 fd1 = shm_open(shm_name, O_RDWR, 0600);
6763 KMP_WARNING(FunctionError,
"Can't open SHM");
6764 __kmp_shm_available =
false;
6769 if (__kmp_shm_available && shm_preexist == 0) {
6770 if (ftruncate(fd1, SHM_SIZE) == -1) {
6771 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6772 __kmp_shm_available =
false;
6775 if (__kmp_shm_available) {
6776 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6778 if (data1 == MAP_FAILED) {
6779 KMP_WARNING(FunctionError,
"Can't map SHM");
6780 __kmp_shm_available =
false;
6783 if (__kmp_shm_available) {
6784 if (shm_preexist == 0) {
6785 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6788 value = __kmp_str_format(
"%s", data1);
6789 munmap(data1, SHM_SIZE);
6794 if (!__kmp_shm_available)
6795 __kmp_tmp_available = __kmp_detect_tmp();
6796 if (!__kmp_shm_available && __kmp_tmp_available) {
6803 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6804 int tmp_preexist = 0;
6805 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);
6806 if ((fd1 == -1) && (errno == EEXIST)) {
6809 fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);
6811 KMP_WARNING(FunctionError,
"Can't open TEMP");
6812 __kmp_tmp_available =
false;
6817 if (__kmp_tmp_available && tmp_preexist == 0) {
6819 if (ftruncate(fd1, SHM_SIZE) == -1) {
6820 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6821 __kmp_tmp_available =
false;
6824 if (__kmp_tmp_available) {
6825 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6827 if (data1 == MAP_FAILED) {
6828 KMP_WARNING(FunctionError,
"Can't map /tmp");
6829 __kmp_tmp_available =
false;
6832 if (__kmp_tmp_available) {
6833 if (tmp_preexist == 0) {
6834 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6837 value = __kmp_str_format(
"%s", data1);
6838 munmap(data1, SHM_SIZE);
6843 if (!__kmp_shm_available && !__kmp_tmp_available) {
6846 __kmp_env_set(name, __kmp_registration_str, 0);
6848 value = __kmp_env_get(name);
6852 __kmp_env_set(name, __kmp_registration_str, 0);
6854 value = __kmp_env_get(name);
6857 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6864 char *flag_addr_str = NULL;
6865 char *flag_val_str = NULL;
6866 char const *file_name = NULL;
6867 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6868 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6871 unsigned long *flag_addr = 0;
6872 unsigned long flag_val = 0;
6873 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6874 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6875 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6879 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6893 file_name =
"unknown library";
6898 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6899 if (!__kmp_str_match_true(duplicate_ok)) {
6901 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6902 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6904 KMP_INTERNAL_FREE(duplicate_ok);
6905 __kmp_duplicate_library_ok = 1;
6910#if defined(KMP_USE_SHM)
6911 if (__kmp_shm_available) {
6912 shm_unlink(shm_name);
6913 }
else if (__kmp_tmp_available) {
6914 unlink(temp_reg_status_file_name);
6917 __kmp_env_unset(name);
6921 __kmp_env_unset(name);
6925 KMP_DEBUG_ASSERT(0);
6929 KMP_INTERNAL_FREE((
void *)value);
6930#if defined(KMP_USE_SHM)
6932 KMP_INTERNAL_FREE((
void *)shm_name);
6935 KMP_INTERNAL_FREE((
void *)name);
6939void __kmp_unregister_library(
void) {
6941 char *name = __kmp_reg_status_name();
6944#if defined(KMP_USE_SHM)
6945 char *shm_name =
nullptr;
6947 if (__kmp_shm_available) {
6948 shm_name = __kmp_str_format(
"/%s", name);
6949 fd1 = shm_open(shm_name, O_RDONLY, 0600);
6951 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6952 if (data1 != MAP_FAILED) {
6953 value = __kmp_str_format(
"%s", data1);
6954 munmap(data1, SHM_SIZE);
6958 }
else if (__kmp_tmp_available) {
6959 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6961 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6962 if (data1 != MAP_FAILED) {
6963 value = __kmp_str_format(
"%s", data1);
6964 munmap(data1, SHM_SIZE);
6969 value = __kmp_env_get(name);
6972 value = __kmp_env_get(name);
6975 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6976 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6977 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6979#if defined(KMP_USE_SHM)
6980 if (__kmp_shm_available) {
6981 shm_unlink(shm_name);
6982 }
else if (__kmp_tmp_available) {
6983 unlink(temp_reg_status_file_name);
6985 __kmp_env_unset(name);
6988 __kmp_env_unset(name);
6992#if defined(KMP_USE_SHM)
6994 KMP_INTERNAL_FREE(shm_name);
6995 if (temp_reg_status_file_name)
6996 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6999 KMP_INTERNAL_FREE(__kmp_registration_str);
7000 KMP_INTERNAL_FREE(value);
7001 KMP_INTERNAL_FREE(name);
7003 __kmp_registration_flag = 0;
7004 __kmp_registration_str = NULL;
7011#if KMP_MIC_SUPPORTED
7013static void __kmp_check_mic_type() {
7014 kmp_cpuid_t cpuid_state = {0};
7015 kmp_cpuid_t *cs_p = &cpuid_state;
7016 __kmp_x86_cpuid(1, 0, cs_p);
7018 if ((cs_p->eax & 0xff0) == 0xB10) {
7019 __kmp_mic_type = mic2;
7020 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7021 __kmp_mic_type = mic3;
7023 __kmp_mic_type = non_mic;
7030static void __kmp_user_level_mwait_init() {
7031 struct kmp_cpuid buf;
7032 __kmp_x86_cpuid(7, 0, &buf);
7033 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7034 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7035 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7036 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7037 __kmp_umwait_enabled));
7040#ifndef AT_INTELPHIUSERMWAIT
7043#define AT_INTELPHIUSERMWAIT 10000
7048unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7049unsigned long getauxval(
unsigned long) {
return 0; }
7051static void __kmp_user_level_mwait_init() {
7056 if (__kmp_mic_type == mic3) {
7057 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7058 if ((res & 0x1) || __kmp_user_level_mwait) {
7059 __kmp_mwait_enabled = TRUE;
7060 if (__kmp_user_level_mwait) {
7061 KMP_INFORM(EnvMwaitWarn);
7064 __kmp_mwait_enabled = FALSE;
7067 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7068 "__kmp_mwait_enabled = %d\n",
7069 __kmp_mic_type, __kmp_mwait_enabled));
7073static void __kmp_do_serial_initialize(
void) {
7077 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7079 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7080 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7081 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7082 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7083 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7093 __kmp_validate_locks();
7095#if ENABLE_LIBOMPTARGET
7097 __kmp_init_omptarget();
7101 __kmp_init_allocator();
7107 if (__kmp_need_register_serial)
7108 __kmp_register_library_startup();
7111 if (TCR_4(__kmp_global.g.g_done)) {
7112 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7115 __kmp_global.g.g_abort = 0;
7116 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7119#if KMP_USE_ADAPTIVE_LOCKS
7120#if KMP_DEBUG_ADAPTIVE_LOCKS
7121 __kmp_init_speculative_stats();
7124#if KMP_STATS_ENABLED
7127 __kmp_init_lock(&__kmp_global_lock);
7128 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7129 __kmp_init_lock(&__kmp_debug_lock);
7130 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7131 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7132 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7133 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7134 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7135 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7136 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7137 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7138 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7139 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7140 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7141 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7142 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7143 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7144 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7146 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7148 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7152 __kmp_runtime_initialize();
7154#if KMP_MIC_SUPPORTED
7155 __kmp_check_mic_type();
7162 __kmp_abort_delay = 0;
7166 __kmp_dflt_team_nth_ub = __kmp_xproc;
7167 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7168 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7170 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7171 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7173 __kmp_max_nth = __kmp_sys_max_nth;
7174 __kmp_cg_max_nth = __kmp_sys_max_nth;
7175 __kmp_teams_max_nth = __kmp_xproc;
7176 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7177 __kmp_teams_max_nth = __kmp_sys_max_nth;
7182 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7184 __kmp_monitor_wakeups =
7185 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7186 __kmp_bt_intervals =
7187 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7190 __kmp_library = library_throughput;
7192 __kmp_static = kmp_sch_static_balanced;
7199#if KMP_FAST_REDUCTION_BARRIER
7200#define kmp_reduction_barrier_gather_bb ((int)1)
7201#define kmp_reduction_barrier_release_bb ((int)1)
7202#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7203#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7205 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7206 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7207 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7208 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7209 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7210#if KMP_FAST_REDUCTION_BARRIER
7211 if (i == bs_reduction_barrier) {
7213 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7214 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7215 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7216 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7220#if KMP_FAST_REDUCTION_BARRIER
7221#undef kmp_reduction_barrier_release_pat
7222#undef kmp_reduction_barrier_gather_pat
7223#undef kmp_reduction_barrier_release_bb
7224#undef kmp_reduction_barrier_gather_bb
7226#if KMP_MIC_SUPPORTED
7227 if (__kmp_mic_type == mic2) {
7229 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7230 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7232 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7233 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7235#if KMP_FAST_REDUCTION_BARRIER
7236 if (__kmp_mic_type == mic2) {
7237 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7238 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7245 __kmp_env_checks = TRUE;
7247 __kmp_env_checks = FALSE;
7251 __kmp_foreign_tp = TRUE;
7253 __kmp_global.g.g_dynamic = FALSE;
7254 __kmp_global.g.g_dynamic_mode = dynamic_default;
7256 __kmp_init_nesting_mode();
7258 __kmp_env_initialize(NULL);
7260#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7261 __kmp_user_level_mwait_init();
7265 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7266 if (__kmp_str_match_true(val)) {
7267 kmp_str_buf_t buffer;
7268 __kmp_str_buf_init(&buffer);
7269 __kmp_i18n_dump_catalog(&buffer);
7270 __kmp_printf(
"%s", buffer.str);
7271 __kmp_str_buf_free(&buffer);
7273 __kmp_env_free(&val);
7276 __kmp_threads_capacity =
7277 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7279 __kmp_tp_capacity = __kmp_default_tp_capacity(
7280 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7285 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7286 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7287 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7288 __kmp_thread_pool = NULL;
7289 __kmp_thread_pool_insert_pt = NULL;
7290 __kmp_team_pool = NULL;
7297 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7299 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7300 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7301 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7304 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7306 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7311 gtid = __kmp_register_root(TRUE);
7312 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7313 KMP_ASSERT(KMP_UBER_GTID(gtid));
7314 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7318 __kmp_common_initialize();
7322 __kmp_register_atfork();
7325#if !KMP_DYNAMIC_LIB || \
7326 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7331 int rc = atexit(__kmp_internal_end_atexit);
7333 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7339#if KMP_HANDLE_SIGNALS
7345 __kmp_install_signals(FALSE);
7348 __kmp_install_signals(TRUE);
7353 __kmp_init_counter++;
7355 __kmp_init_serial = TRUE;
7357 if (__kmp_version) {
7358 __kmp_print_version_1();
7361 if (__kmp_settings) {
7365 if (__kmp_display_env || __kmp_display_env_verbose) {
7366 __kmp_env_print_2();
7375 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7378void __kmp_serial_initialize(
void) {
7379 if (__kmp_init_serial) {
7382 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7383 if (__kmp_init_serial) {
7384 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7387 __kmp_do_serial_initialize();
7388 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7391static void __kmp_do_middle_initialize(
void) {
7393 int prev_dflt_team_nth;
7395 if (!__kmp_init_serial) {
7396 __kmp_do_serial_initialize();
7399 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7401 if (UNLIKELY(!__kmp_need_register_serial)) {
7404 __kmp_register_library_startup();
7409 prev_dflt_team_nth = __kmp_dflt_team_nth;
7411#if KMP_AFFINITY_SUPPORTED
7414 __kmp_affinity_initialize(__kmp_affinity);
7418 KMP_ASSERT(__kmp_xproc > 0);
7419 if (__kmp_avail_proc == 0) {
7420 __kmp_avail_proc = __kmp_xproc;
7426 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7427 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7432 if (__kmp_dflt_team_nth == 0) {
7433#ifdef KMP_DFLT_NTH_CORES
7435 __kmp_dflt_team_nth = __kmp_ncores;
7436 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7437 "__kmp_ncores (%d)\n",
7438 __kmp_dflt_team_nth));
7441 __kmp_dflt_team_nth = __kmp_avail_proc;
7442 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7443 "__kmp_avail_proc(%d)\n",
7444 __kmp_dflt_team_nth));
7448 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7449 __kmp_dflt_team_nth = KMP_MIN_NTH;
7451 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7452 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7455 if (__kmp_nesting_mode > 0)
7456 __kmp_set_nesting_mode_threads();
7460 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7462 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7467 for (i = 0; i < __kmp_threads_capacity; i++) {
7468 kmp_info_t *thread = __kmp_threads[i];
7471 if (thread->th.th_current_task->td_icvs.nproc != 0)
7474 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7479 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7480 __kmp_dflt_team_nth));
7482#ifdef KMP_ADJUST_BLOCKTIME
7484 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7485 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7486 if (__kmp_nth > __kmp_avail_proc) {
7487 __kmp_zero_bt = TRUE;
7493 TCW_SYNC_4(__kmp_init_middle, TRUE);
7495 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7498void __kmp_middle_initialize(
void) {
7499 if (__kmp_init_middle) {
7502 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7503 if (__kmp_init_middle) {
7504 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7507 __kmp_do_middle_initialize();
7508 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7511void __kmp_parallel_initialize(
void) {
7512 int gtid = __kmp_entry_gtid();
7515 if (TCR_4(__kmp_init_parallel))
7517 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7518 if (TCR_4(__kmp_init_parallel)) {
7519 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7524 if (TCR_4(__kmp_global.g.g_done)) {
7527 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7528 __kmp_infinite_loop();
7534 if (!__kmp_init_middle) {
7535 __kmp_do_middle_initialize();
7537 __kmp_assign_root_init_mask();
7538 __kmp_resume_if_hard_paused();
7541 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7542 KMP_ASSERT(KMP_UBER_GTID(gtid));
7544#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7547 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7548 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7549 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7553#if KMP_HANDLE_SIGNALS
7555 __kmp_install_signals(TRUE);
7559 __kmp_suspend_initialize();
7561#if defined(USE_LOAD_BALANCE)
7562 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7563 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7566 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7567 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7571 if (__kmp_version) {
7572 __kmp_print_version_2();
7576 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7579 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7581 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7584void __kmp_hidden_helper_initialize() {
7585 if (TCR_4(__kmp_init_hidden_helper))
7589 if (!TCR_4(__kmp_init_parallel))
7590 __kmp_parallel_initialize();
7594 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7595 if (TCR_4(__kmp_init_hidden_helper)) {
7596 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7600#if KMP_AFFINITY_SUPPORTED
7604 if (!__kmp_hh_affinity.flags.initialized)
7605 __kmp_affinity_initialize(__kmp_hh_affinity);
7609 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7613 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7616 __kmp_do_initialize_hidden_helper_threads();
7619 __kmp_hidden_helper_threads_initz_wait();
7622 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7624 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7629void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7631 kmp_disp_t *dispatch;
7636 this_thr->th.th_local.this_construct = 0;
7638 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7640 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7641 KMP_DEBUG_ASSERT(dispatch);
7642 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7646 dispatch->th_disp_index = 0;
7647 dispatch->th_doacross_buf_idx = 0;
7648 if (__kmp_env_consistency_check)
7649 __kmp_push_parallel(gtid, team->t.t_ident);
7654void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7656 if (__kmp_env_consistency_check)
7657 __kmp_pop_parallel(gtid, team->t.t_ident);
7659 __kmp_finish_implicit_task(this_thr);
7662int __kmp_invoke_task_func(
int gtid) {
7664 int tid = __kmp_tid_from_gtid(gtid);
7665 kmp_info_t *this_thr = __kmp_threads[gtid];
7666 kmp_team_t *team = this_thr->th.th_team;
7668 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7670 if (__itt_stack_caller_create_ptr) {
7672 if (team->t.t_stack_id != NULL) {
7673 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7675 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7676 __kmp_itt_stack_callee_enter(
7677 (__itt_caller)team->t.t_parent->t.t_stack_id);
7681#if INCLUDE_SSC_MARKS
7682 SSC_MARK_INVOKING();
7687 void **exit_frame_p;
7688 ompt_data_t *my_task_data;
7689 ompt_data_t *my_parallel_data;
7692 if (ompt_enabled.enabled) {
7693 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7694 .ompt_task_info.frame.exit_frame.ptr);
7696 exit_frame_p = &dummy;
7700 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7701 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7702 if (ompt_enabled.ompt_callback_implicit_task) {
7703 ompt_team_size = team->t.t_nproc;
7704 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7705 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7706 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7707 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7711#if KMP_STATS_ENABLED
7713 if (previous_state == stats_state_e::TEAMS_REGION) {
7714 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7716 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7718 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7721 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7722 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7729 *exit_frame_p = NULL;
7730 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7733#if KMP_STATS_ENABLED
7734 if (previous_state == stats_state_e::TEAMS_REGION) {
7735 KMP_SET_THREAD_STATE(previous_state);
7737 KMP_POP_PARTITIONED_TIMER();
7741 if (__itt_stack_caller_create_ptr) {
7743 if (team->t.t_stack_id != NULL) {
7744 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7746 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7747 __kmp_itt_stack_callee_leave(
7748 (__itt_caller)team->t.t_parent->t.t_stack_id);
7752 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7757void __kmp_teams_master(
int gtid) {
7759 kmp_info_t *thr = __kmp_threads[gtid];
7760 kmp_team_t *team = thr->th.th_team;
7761 ident_t *loc = team->t.t_ident;
7762 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7763 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7764 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7765 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7766 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7769 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7772 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7773 tmp->cg_nthreads = 1;
7774 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7775 " cg_nthreads to 1\n",
7777 tmp->up = thr->th.th_cg_roots;
7778 thr->th.th_cg_roots = tmp;
7782#if INCLUDE_SSC_MARKS
7785 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7786 (microtask_t)thr->th.th_teams_microtask,
7787 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7788#if INCLUDE_SSC_MARKS
7792 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7793 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7796 __kmp_join_call(loc, gtid
7805int __kmp_invoke_teams_master(
int gtid) {
7806 kmp_info_t *this_thr = __kmp_threads[gtid];
7807 kmp_team_t *team = this_thr->th.th_team;
7809 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7810 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7811 (
void *)__kmp_teams_master);
7813 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7815 int tid = __kmp_tid_from_gtid(gtid);
7816 ompt_data_t *task_data =
7817 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7818 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7819 if (ompt_enabled.ompt_callback_implicit_task) {
7820 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7821 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7823 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7826 __kmp_teams_master(gtid);
7828 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7830 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7839void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7840 kmp_info_t *thr = __kmp_threads[gtid];
7842 if (num_threads > 0)
7843 thr->th.th_set_nproc = num_threads;
7846static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7848 KMP_DEBUG_ASSERT(thr);
7850 if (!TCR_4(__kmp_init_middle))
7851 __kmp_middle_initialize();
7852 __kmp_assign_root_init_mask();
7853 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7854 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7856 if (num_threads == 0) {
7857 if (__kmp_teams_thread_limit > 0) {
7858 num_threads = __kmp_teams_thread_limit;
7860 num_threads = __kmp_avail_proc / num_teams;
7865 if (num_threads > __kmp_dflt_team_nth) {
7866 num_threads = __kmp_dflt_team_nth;
7868 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7869 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7871 if (num_teams * num_threads > __kmp_teams_max_nth) {
7872 num_threads = __kmp_teams_max_nth / num_teams;
7874 if (num_threads == 0) {
7878 if (num_threads < 0) {
7879 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7885 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7887 if (num_threads > __kmp_dflt_team_nth) {
7888 num_threads = __kmp_dflt_team_nth;
7890 if (num_teams * num_threads > __kmp_teams_max_nth) {
7891 int new_threads = __kmp_teams_max_nth / num_teams;
7892 if (new_threads == 0) {
7895 if (new_threads != num_threads) {
7896 if (!__kmp_reserve_warn) {
7897 __kmp_reserve_warn = 1;
7898 __kmp_msg(kmp_ms_warning,
7899 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7900 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7903 num_threads = new_threads;
7906 thr->th.th_teams_size.nth = num_threads;
7911void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7913 kmp_info_t *thr = __kmp_threads[gtid];
7914 if (num_teams < 0) {
7917 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7921 if (num_teams == 0) {
7922 if (__kmp_nteams > 0) {
7923 num_teams = __kmp_nteams;
7928 if (num_teams > __kmp_teams_max_nth) {
7929 if (!__kmp_reserve_warn) {
7930 __kmp_reserve_warn = 1;
7931 __kmp_msg(kmp_ms_warning,
7932 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7933 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7935 num_teams = __kmp_teams_max_nth;
7939 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7941 __kmp_push_thread_limit(thr, num_teams, num_threads);
7946void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7947 int num_teams_ub,
int num_threads) {
7948 kmp_info_t *thr = __kmp_threads[gtid];
7949 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7950 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7951 KMP_DEBUG_ASSERT(num_threads >= 0);
7953 if (num_teams_lb > num_teams_ub) {
7954 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7955 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7960 if (num_teams_lb == 0 && num_teams_ub > 0)
7961 num_teams_lb = num_teams_ub;
7963 if (num_teams_lb == 0 && num_teams_ub == 0) {
7964 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7965 if (num_teams > __kmp_teams_max_nth) {
7966 if (!__kmp_reserve_warn) {
7967 __kmp_reserve_warn = 1;
7968 __kmp_msg(kmp_ms_warning,
7969 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7970 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7972 num_teams = __kmp_teams_max_nth;
7974 }
else if (num_teams_lb == num_teams_ub) {
7975 num_teams = num_teams_ub;
7977 if (num_threads <= 0) {
7978 if (num_teams_ub > __kmp_teams_max_nth) {
7979 num_teams = num_teams_lb;
7981 num_teams = num_teams_ub;
7984 num_teams = (num_threads > __kmp_teams_max_nth)
7986 : __kmp_teams_max_nth / num_threads;
7987 if (num_teams < num_teams_lb) {
7988 num_teams = num_teams_lb;
7989 }
else if (num_teams > num_teams_ub) {
7990 num_teams = num_teams_ub;
7996 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7998 __kmp_push_thread_limit(thr, num_teams, num_threads);
8002void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8003 kmp_info_t *thr = __kmp_threads[gtid];
8004 thr->th.th_set_proc_bind = proc_bind;
8009void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8010 kmp_info_t *this_thr = __kmp_threads[gtid];
8016 KMP_DEBUG_ASSERT(team);
8017 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8018 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8021 team->t.t_construct = 0;
8022 team->t.t_ordered.dt.t_value =
8026 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8027 if (team->t.t_max_nproc > 1) {
8029 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8030 team->t.t_disp_buffer[i].buffer_index = i;
8031 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8034 team->t.t_disp_buffer[0].buffer_index = 0;
8035 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8039 KMP_ASSERT(this_thr->th.th_team == team);
8042 for (f = 0; f < team->t.t_nproc; f++) {
8043 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8044 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8049 __kmp_fork_barrier(gtid, 0);
8052void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8053 kmp_info_t *this_thr = __kmp_threads[gtid];
8055 KMP_DEBUG_ASSERT(team);
8056 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8057 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8063 if (__kmp_threads[gtid] &&
8064 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8065 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8066 __kmp_threads[gtid]);
8067 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8068 "team->t.t_nproc=%d\n",
8069 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8071 __kmp_print_structure();
8073 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8074 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8077 __kmp_join_barrier(gtid);
8079 if (ompt_enabled.enabled &&
8080 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8081 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8082 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8083 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8085 void *codeptr = NULL;
8086 if (KMP_MASTER_TID(ds_tid) &&
8087 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8088 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8089 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8091 if (ompt_enabled.ompt_callback_sync_region_wait) {
8092 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8093 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8096 if (ompt_enabled.ompt_callback_sync_region) {
8097 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8098 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8102 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8103 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8104 ompt_scope_end, NULL, task_data, 0, ds_tid,
8105 ompt_task_implicit);
8111 KMP_ASSERT(this_thr->th.th_team == team);
8116#ifdef USE_LOAD_BALANCE
8120static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8123 kmp_team_t *hot_team;
8125 if (root->r.r_active) {
8128 hot_team = root->r.r_hot_team;
8129 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8130 return hot_team->t.t_nproc - 1;
8135 for (i = 1; i < hot_team->t.t_nproc; i++) {
8136 if (hot_team->t.t_threads[i]->th.th_active) {
8145static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8148 int hot_team_active;
8149 int team_curr_active;
8152 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8154 KMP_DEBUG_ASSERT(root);
8155 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8156 ->th.th_current_task->td_icvs.dynamic == TRUE);
8157 KMP_DEBUG_ASSERT(set_nproc > 1);
8159 if (set_nproc == 1) {
8160 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8169 pool_active = __kmp_thread_pool_active_nth;
8170 hot_team_active = __kmp_active_hot_team_nproc(root);
8171 team_curr_active = pool_active + hot_team_active + 1;
8174 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8175 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8176 "hot team active = %d\n",
8177 system_active, pool_active, hot_team_active));
8179 if (system_active < 0) {
8183 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8184 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8187 retval = __kmp_avail_proc - __kmp_nth +
8188 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8189 if (retval > set_nproc) {
8192 if (retval < KMP_MIN_NTH) {
8193 retval = KMP_MIN_NTH;
8196 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8204 if (system_active < team_curr_active) {
8205 system_active = team_curr_active;
8207 retval = __kmp_avail_proc - system_active + team_curr_active;
8208 if (retval > set_nproc) {
8211 if (retval < KMP_MIN_NTH) {
8212 retval = KMP_MIN_NTH;
8215 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8224void __kmp_cleanup(
void) {
8227 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8229 if (TCR_4(__kmp_init_parallel)) {
8230#if KMP_HANDLE_SIGNALS
8231 __kmp_remove_signals();
8233 TCW_4(__kmp_init_parallel, FALSE);
8236 if (TCR_4(__kmp_init_middle)) {
8237#if KMP_AFFINITY_SUPPORTED
8238 __kmp_affinity_uninitialize();
8240 __kmp_cleanup_hierarchy();
8241 TCW_4(__kmp_init_middle, FALSE);
8244 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8246 if (__kmp_init_serial) {
8247 __kmp_runtime_destroy();
8248 __kmp_init_serial = FALSE;
8251 __kmp_cleanup_threadprivate_caches();
8253 for (f = 0; f < __kmp_threads_capacity; f++) {
8254 if (__kmp_root[f] != NULL) {
8255 __kmp_free(__kmp_root[f]);
8256 __kmp_root[f] = NULL;
8259 __kmp_free(__kmp_threads);
8262 __kmp_threads = NULL;
8264 __kmp_threads_capacity = 0;
8267 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8269 kmp_old_threads_list_t *next = ptr->next;
8270 __kmp_free(ptr->threads);
8275#if KMP_USE_DYNAMIC_LOCK
8276 __kmp_cleanup_indirect_user_locks();
8278 __kmp_cleanup_user_locks();
8282 __kmp_free(ompd_env_block);
8283 ompd_env_block = NULL;
8284 ompd_env_block_size = 0;
8288#if KMP_AFFINITY_SUPPORTED
8289 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8290 __kmp_cpuinfo_file = NULL;
8293#if KMP_USE_ADAPTIVE_LOCKS
8294#if KMP_DEBUG_ADAPTIVE_LOCKS
8295 __kmp_print_speculative_stats();
8298 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8299 __kmp_nested_nth.nth = NULL;
8300 __kmp_nested_nth.size = 0;
8301 __kmp_nested_nth.used = 0;
8302 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8303 __kmp_nested_proc_bind.bind_types = NULL;
8304 __kmp_nested_proc_bind.size = 0;
8305 __kmp_nested_proc_bind.used = 0;
8306 if (__kmp_affinity_format) {
8307 KMP_INTERNAL_FREE(__kmp_affinity_format);
8308 __kmp_affinity_format = NULL;
8311 __kmp_i18n_catclose();
8313#if KMP_USE_HIER_SCHED
8314 __kmp_hier_scheds.deallocate();
8317#if KMP_STATS_ENABLED
8321 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8326int __kmp_ignore_mppbeg(
void) {
8329 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8330 if (__kmp_str_match_false(env))
8337int __kmp_ignore_mppend(
void) {
8340 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8341 if (__kmp_str_match_false(env))
8348void __kmp_internal_begin(
void) {
8354 gtid = __kmp_entry_gtid();
8355 root = __kmp_threads[gtid]->th.th_root;
8356 KMP_ASSERT(KMP_UBER_GTID(gtid));
8358 if (root->r.r_begin)
8360 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8361 if (root->r.r_begin) {
8362 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8366 root->r.r_begin = TRUE;
8368 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8373void __kmp_user_set_library(
enum library_type arg) {
8380 gtid = __kmp_entry_gtid();
8381 thread = __kmp_threads[gtid];
8383 root = thread->th.th_root;
8385 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8387 if (root->r.r_in_parallel) {
8389 KMP_WARNING(SetLibraryIncorrectCall);
8394 case library_serial:
8395 thread->th.th_set_nproc = 0;
8396 set__nproc(thread, 1);
8398 case library_turnaround:
8399 thread->th.th_set_nproc = 0;
8400 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8401 : __kmp_dflt_team_nth_ub);
8403 case library_throughput:
8404 thread->th.th_set_nproc = 0;
8405 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8406 : __kmp_dflt_team_nth_ub);
8409 KMP_FATAL(UnknownLibraryType, arg);
8412 __kmp_aux_set_library(arg);
8415void __kmp_aux_set_stacksize(
size_t arg) {
8416 if (!__kmp_init_serial)
8417 __kmp_serial_initialize();
8420 if (arg & (0x1000 - 1)) {
8421 arg &= ~(0x1000 - 1);
8426 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8429 if (!TCR_4(__kmp_init_parallel)) {
8432 if (value < __kmp_sys_min_stksize)
8433 value = __kmp_sys_min_stksize;
8434 else if (value > KMP_MAX_STKSIZE)
8435 value = KMP_MAX_STKSIZE;
8437 __kmp_stksize = value;
8439 __kmp_env_stksize = TRUE;
8442 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8447void __kmp_aux_set_library(
enum library_type arg) {
8448 __kmp_library = arg;
8450 switch (__kmp_library) {
8451 case library_serial: {
8452 KMP_INFORM(LibraryIsSerial);
8454 case library_turnaround:
8455 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8456 __kmp_use_yield = 2;
8458 case library_throughput:
8459 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8460 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8463 KMP_FATAL(UnknownLibraryType, arg);
8469static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8470 kmp_info_t *thr = __kmp_entry_thread();
8471 teams_serialized = 0;
8472 if (thr->th.th_teams_microtask) {
8473 kmp_team_t *team = thr->th.th_team;
8474 int tlevel = thr->th.th_teams_level;
8475 int ii = team->t.t_level;
8476 teams_serialized = team->t.t_serialized;
8477 int level = tlevel + 1;
8478 KMP_DEBUG_ASSERT(ii >= tlevel);
8479 while (ii > level) {
8480 for (teams_serialized = team->t.t_serialized;
8481 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8483 if (team->t.t_serialized && (!teams_serialized)) {
8484 team = team->t.t_parent;
8488 team = team->t.t_parent;
8497int __kmp_aux_get_team_num() {
8499 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8501 if (serialized > 1) {
8504 return team->t.t_master_tid;
8510int __kmp_aux_get_num_teams() {
8512 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8514 if (serialized > 1) {
8517 return team->t.t_parent->t.t_nproc;
8556typedef struct kmp_affinity_format_field_t {
8558 const char *long_name;
8561} kmp_affinity_format_field_t;
8563static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8564#if KMP_AFFINITY_SUPPORTED
8565 {
'A',
"thread_affinity",
's'},
8567 {
't',
"team_num",
'd'},
8568 {
'T',
"num_teams",
'd'},
8569 {
'L',
"nesting_level",
'd'},
8570 {
'n',
"thread_num",
'd'},
8571 {
'N',
"num_threads",
'd'},
8572 {
'a',
"ancestor_tnum",
'd'},
8574 {
'P',
"process_id",
'd'},
8575 {
'i',
"native_thread_id",
'd'}};
8578static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8580 kmp_str_buf_t *field_buffer) {
8581 int rc, format_index, field_value;
8582 const char *width_left, *width_right;
8583 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8584 static const int FORMAT_SIZE = 20;
8585 char format[FORMAT_SIZE] = {0};
8586 char absolute_short_name = 0;
8588 KMP_DEBUG_ASSERT(gtid >= 0);
8589 KMP_DEBUG_ASSERT(th);
8590 KMP_DEBUG_ASSERT(**ptr ==
'%');
8591 KMP_DEBUG_ASSERT(field_buffer);
8593 __kmp_str_buf_clear(field_buffer);
8600 __kmp_str_buf_cat(field_buffer,
"%", 1);
8611 right_justify =
false;
8613 right_justify =
true;
8617 width_left = width_right = NULL;
8618 if (**ptr >=
'0' && **ptr <=
'9') {
8626 format[format_index++] =
'%';
8628 format[format_index++] =
'-';
8630 format[format_index++] =
'0';
8631 if (width_left && width_right) {
8635 while (i < 8 && width_left < width_right) {
8636 format[format_index++] = *width_left;
8644 found_valid_name =
false;
8645 parse_long_name = (**ptr ==
'{');
8646 if (parse_long_name)
8648 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8649 sizeof(__kmp_affinity_format_table[0]);
8651 char short_name = __kmp_affinity_format_table[i].short_name;
8652 const char *long_name = __kmp_affinity_format_table[i].long_name;
8653 char field_format = __kmp_affinity_format_table[i].field_format;
8654 if (parse_long_name) {
8655 size_t length = KMP_STRLEN(long_name);
8656 if (strncmp(*ptr, long_name, length) == 0) {
8657 found_valid_name =
true;
8660 }
else if (**ptr == short_name) {
8661 found_valid_name =
true;
8664 if (found_valid_name) {
8665 format[format_index++] = field_format;
8666 format[format_index++] =
'\0';
8667 absolute_short_name = short_name;
8671 if (parse_long_name) {
8673 absolute_short_name = 0;
8681 switch (absolute_short_name) {
8683 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8686 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8689 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8692 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8695 static const int BUFFER_SIZE = 256;
8696 char buf[BUFFER_SIZE];
8697 __kmp_expand_host_name(buf, BUFFER_SIZE);
8698 rc = __kmp_str_buf_print(field_buffer, format, buf);
8701 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8704 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8707 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8711 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8712 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8714#if KMP_AFFINITY_SUPPORTED
8717 __kmp_str_buf_init(&buf);
8718 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8719 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8720 __kmp_str_buf_free(&buf);
8726 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8728 if (parse_long_name) {
8737 KMP_ASSERT(format_index <= FORMAT_SIZE);
8747size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8748 kmp_str_buf_t *buffer) {
8749 const char *parse_ptr;
8751 const kmp_info_t *th;
8752 kmp_str_buf_t field;
8754 KMP_DEBUG_ASSERT(buffer);
8755 KMP_DEBUG_ASSERT(gtid >= 0);
8757 __kmp_str_buf_init(&field);
8758 __kmp_str_buf_clear(buffer);
8760 th = __kmp_threads[gtid];
8766 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8767 parse_ptr = __kmp_affinity_format;
8769 KMP_DEBUG_ASSERT(parse_ptr);
8771 while (*parse_ptr !=
'\0') {
8773 if (*parse_ptr ==
'%') {
8775 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8776 __kmp_str_buf_catbuf(buffer, &field);
8780 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8785 __kmp_str_buf_free(&field);
8790void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8792 __kmp_str_buf_init(&buf);
8793 __kmp_aux_capture_affinity(gtid, format, &buf);
8794 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8795 __kmp_str_buf_free(&buf);
8799void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8800 int blocktime = arg;
8806 __kmp_save_internal_controls(thread);
8809 if (blocktime < KMP_MIN_BLOCKTIME)
8810 blocktime = KMP_MIN_BLOCKTIME;
8811 else if (blocktime > KMP_MAX_BLOCKTIME)
8812 blocktime = KMP_MAX_BLOCKTIME;
8814 set__blocktime_team(thread->th.th_team, tid, blocktime);
8815 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8819 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8821 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8822 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8828 set__bt_set_team(thread->th.th_team, tid, bt_set);
8829 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8831 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8832 "bt_intervals=%d, monitor_updates=%d\n",
8833 __kmp_gtid_from_tid(tid, thread->th.th_team),
8834 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8835 __kmp_monitor_wakeups));
8837 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8838 __kmp_gtid_from_tid(tid, thread->th.th_team),
8839 thread->th.th_team->t.t_id, tid, blocktime));
8843void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8844 if (!__kmp_init_serial) {
8845 __kmp_serial_initialize();
8847 __kmp_env_initialize(str);
8849 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8857PACKED_REDUCTION_METHOD_T
8858__kmp_determine_reduction_method(
8859 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8860 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8861 kmp_critical_name *lck) {
8872 PACKED_REDUCTION_METHOD_T retval;
8876 KMP_DEBUG_ASSERT(lck);
8878#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8880 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8881#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8883 retval = critical_reduce_block;
8886 team_size = __kmp_get_team_num_threads(global_tid);
8887 if (team_size == 1) {
8889 retval = empty_reduce_block;
8893 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8895#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8896 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8897 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8899#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8900 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8901 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8903 int teamsize_cutoff = 4;
8905#if KMP_MIC_SUPPORTED
8906 if (__kmp_mic_type != non_mic) {
8907 teamsize_cutoff = 8;
8910 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8911 if (tree_available) {
8912 if (team_size <= teamsize_cutoff) {
8913 if (atomic_available) {
8914 retval = atomic_reduce_block;
8917 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8919 }
else if (atomic_available) {
8920 retval = atomic_reduce_block;
8923#error "Unknown or unsupported OS"
8928#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8929 KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32
8931#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8932 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8933 KMP_OS_WASI || KMP_OS_AIX
8937 if (atomic_available) {
8938 if (num_vars <= 2) {
8939 retval = atomic_reduce_block;
8945 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8946 if (atomic_available && (num_vars <= 3)) {
8947 retval = atomic_reduce_block;
8948 }
else if (tree_available) {
8949 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8950 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8951 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8956#error "Unknown or unsupported OS"
8960#error "Unknown or unsupported architecture"
8968 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8971 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8973 int atomic_available, tree_available;
8975 switch ((forced_retval = __kmp_force_reduction_method)) {
8976 case critical_reduce_block:
8980 case atomic_reduce_block:
8981 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8982 if (!atomic_available) {
8983 KMP_WARNING(RedMethodNotSupported,
"atomic");
8984 forced_retval = critical_reduce_block;
8988 case tree_reduce_block:
8989 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8990 if (!tree_available) {
8991 KMP_WARNING(RedMethodNotSupported,
"tree");
8992 forced_retval = critical_reduce_block;
8994#if KMP_FAST_REDUCTION_BARRIER
8995 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9004 retval = forced_retval;
9007 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9009#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9010#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9015kmp_int32 __kmp_get_reduce_method(
void) {
9016 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9021void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9025void __kmp_hard_pause() {
9026 __kmp_pause_status = kmp_hard_paused;
9027 __kmp_internal_end_thread(-1);
9031void __kmp_resume_if_soft_paused() {
9032 if (__kmp_pause_status == kmp_soft_paused) {
9033 __kmp_pause_status = kmp_not_paused;
9035 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9036 kmp_info_t *thread = __kmp_threads[gtid];
9038 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9040 if (fl.is_sleeping())
9042 else if (__kmp_try_suspend_mx(thread)) {
9043 __kmp_unlock_suspend_mx(thread);
9046 if (fl.is_sleeping()) {
9049 }
else if (__kmp_try_suspend_mx(thread)) {
9050 __kmp_unlock_suspend_mx(thread);
9062int __kmp_pause_resource(kmp_pause_status_t level) {
9063 if (level == kmp_not_paused) {
9064 if (__kmp_pause_status == kmp_not_paused) {
9068 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9069 __kmp_pause_status == kmp_hard_paused);
9070 __kmp_pause_status = kmp_not_paused;
9073 }
else if (level == kmp_soft_paused) {
9074 if (__kmp_pause_status != kmp_not_paused) {
9081 }
else if (level == kmp_hard_paused) {
9082 if (__kmp_pause_status != kmp_not_paused) {
9095void __kmp_omp_display_env(
int verbose) {
9096 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9097 if (__kmp_init_serial == 0)
9098 __kmp_do_serial_initialize();
9099 __kmp_display_env_impl(!verbose, verbose);
9100 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9104void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9106 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9108 kmp_info_t **other_threads = team->t.t_threads;
9112 for (
int f = 1; f < old_nthreads; ++f) {
9113 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9115 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9121 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9122 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9126 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9128 team->t.t_threads[f]->th.th_used_in_team.store(2);
9129 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9132 team->t.b->go_release();
9138 int count = old_nthreads - 1;
9140 count = old_nthreads - 1;
9141 for (
int f = 1; f < old_nthreads; ++f) {
9142 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9143 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9144 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9145 void *, other_threads[f]->th.th_sleep_loc);
9146 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9149 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9155 team->t.b->update_num_threads(new_nthreads);
9156 team->t.b->go_reset();
9159void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9161 KMP_DEBUG_ASSERT(team);
9167 for (
int f = 1; f < new_nthreads; ++f) {
9168 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9169 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9171 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9172 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9173 (kmp_flag_32<false, false> *)NULL);
9179 int count = new_nthreads - 1;
9181 count = new_nthreads - 1;
9182 for (
int f = 1; f < new_nthreads; ++f) {
9183 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9191kmp_info_t **__kmp_hidden_helper_threads;
9192kmp_info_t *__kmp_hidden_helper_main_thread;
9193std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9195kmp_int32 __kmp_hidden_helper_threads_num = 8;
9196kmp_int32 __kmp_enable_hidden_helper = TRUE;
9198kmp_int32 __kmp_hidden_helper_threads_num = 0;
9199kmp_int32 __kmp_enable_hidden_helper = FALSE;
9203std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9205void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9210 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9211 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9212 __kmp_hidden_helper_threads_num)
9218 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9219 __kmp_hidden_helper_initz_release();
9220 __kmp_hidden_helper_main_thread_wait();
9222 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9223 __kmp_hidden_helper_worker_thread_signal();
9229void __kmp_hidden_helper_threads_initz_routine() {
9231 const int gtid = __kmp_register_root(TRUE);
9232 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9233 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9234 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9235 __kmp_hidden_helper_threads_num;
9237 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9242 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9244 __kmp_hidden_helper_threads_deinitz_release();
9264void __kmp_init_nesting_mode() {
9265 int levels = KMP_HW_LAST;
9266 __kmp_nesting_mode_nlevels = levels;
9267 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9268 for (
int i = 0; i < levels; ++i)
9269 __kmp_nesting_nth_level[i] = 0;
9270 if (__kmp_nested_nth.size < levels) {
9271 __kmp_nested_nth.nth =
9272 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9273 __kmp_nested_nth.size = levels;
9278void __kmp_set_nesting_mode_threads() {
9279 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9281 if (__kmp_nesting_mode == 1)
9282 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9283 else if (__kmp_nesting_mode > 1)
9284 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9286 if (__kmp_topology) {
9288 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9289 loc < __kmp_nesting_mode_nlevels;
9290 loc++, hw_level++) {
9291 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9292 if (__kmp_nesting_nth_level[loc] == 1)
9296 if (__kmp_nesting_mode > 1 && loc > 1) {
9297 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9298 int num_cores = __kmp_topology->get_count(core_level);
9299 int upper_levels = 1;
9300 for (
int level = 0; level < loc - 1; ++level)
9301 upper_levels *= __kmp_nesting_nth_level[level];
9302 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9303 __kmp_nesting_nth_level[loc - 1] =
9304 num_cores / __kmp_nesting_nth_level[loc - 2];
9306 __kmp_nesting_mode_nlevels = loc;
9307 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9309 if (__kmp_avail_proc >= 4) {
9310 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9311 __kmp_nesting_nth_level[1] = 2;
9312 __kmp_nesting_mode_nlevels = 2;
9314 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9315 __kmp_nesting_mode_nlevels = 1;
9317 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9319 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9320 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9322 set__nproc(thread, __kmp_nesting_nth_level[0]);
9323 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9324 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9325 if (get__max_active_levels(thread) > 1) {
9327 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9329 if (__kmp_nesting_mode == 1)
9330 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9335#if !KMP_STATS_ENABLED
9336void __kmp_reset_stats() {}
9339int __kmp_omp_debug_struct_info = FALSE;
9340int __kmp_debugging = FALSE;
9342#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9343void __kmp_itt_fini_ittlib() {}
9344void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)