16 #include "kmp_error.h"
20 #include "kmp_stats.h"
23 #include "ompt-specific.h"
26 #define MAX_MESSAGE 512
42 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
46 }
else if (__kmp_ignore_mppbeg() == FALSE) {
48 __kmp_internal_begin();
49 KC_TRACE(10, (
"__kmpc_begin: called\n"));
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, (
"__kmpc_end: called\n"));
69 KA_TRACE(30, (
"__kmpc_end\n"));
71 __kmp_internal_end_thread(-1);
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
102 kmp_int32 gtid = __kmp_entry_gtid();
104 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
125 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 return TCR_4(__kmp_all_nth);
137 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
147 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
169 if (__kmp_par_range == 0) {
176 semi2 = strchr(semi2,
';');
180 semi2 = strchr(semi2 + 1,
';');
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
189 if ((*name ==
'/') || (*name ==
';')) {
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
196 semi3 = strchr(semi2 + 1,
';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
203 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
207 return __kmp_par_range < 0;
221 return __kmp_entry_thread()->th.th_root->r.r_active;
234 kmp_int32 num_threads) {
235 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
238 __kmp_push_num_threads(loc, global_tid, num_threads);
241 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
249 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
270 int gtid = __kmp_entry_gtid();
272 #if (KMP_STATS_ENABLED)
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
292 va_start(ap, microtask);
295 ompt_frame_t *ompt_frame;
296 if (ompt_enabled.enabled) {
297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 ompt_frame = &(lwt->ompt_task_info.frame);
303 int tid = __kmp_tid_from_gtid(gtid);
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308 OMPT_STORE_RETURN_ADDRESS(gtid);
312 #if INCLUDE_SSC_MARKS
315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316 VOLATILE_CAST(microtask_t) microtask,
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
325 #if INCLUDE_SSC_MARKS
328 __kmp_join_call(loc, gtid
338 #if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 KMP_POP_PARTITIONED_TIMER();
344 #endif // KMP_STATS_ENABLED
360 kmp_int32 num_teams, kmp_int32 num_threads) {
362 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_start(ap, microtask);
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level;
393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
395 if (ompt_enabled.enabled) {
396 parent_team->t.t_implicit_task_taskdata[tid]
397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399 OMPT_STORE_RETURN_ADDRESS(gtid);
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
412 VOLATILE_CAST(microtask_t)
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
415 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
421 __kmp_join_call(loc, gtid
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431 this_thr->th.th_cg_roots = tmp->up;
432 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
433 " to node %p. cg_nthreads was %d\n",
434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 this_thr->th.th_current_task->td_icvs.thread_limit =
439 this_thr->th.th_cg_roots->cg_thread_limit;
441 this_thr->th.th_teams_microtask = NULL;
442 this_thr->th.th_teams_level = 0;
443 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
471 OMPT_STORE_RETURN_ADDRESS(global_tid);
473 __kmp_serialized_parallel(loc, global_tid);
484 kmp_internal_control_t *top;
485 kmp_info_t *this_thr;
486 kmp_team_t *serial_team;
489 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
497 if (!TCR_4(__kmp_init_parallel))
498 __kmp_parallel_initialize();
501 __kmp_resume_if_soft_paused();
504 this_thr = __kmp_threads[global_tid];
505 serial_team = this_thr->th.th_serial_team;
508 kmp_task_team_t *task_team = this_thr->th.th_task_team;
511 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
512 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
516 KMP_DEBUG_ASSERT(serial_team);
517 KMP_ASSERT(serial_team->t.t_serialized);
518 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
519 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
520 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
521 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
524 if (ompt_enabled.enabled &&
525 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
526 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
527 if (ompt_enabled.ompt_callback_implicit_task) {
528 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
529 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
530 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
534 ompt_data_t *parent_task_data;
535 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
537 if (ompt_enabled.ompt_callback_parallel_end) {
538 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
539 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
540 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
542 __ompt_lw_taskteam_unlink(this_thr);
543 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
549 top = serial_team->t.t_control_stack_top;
550 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
551 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
552 serial_team->t.t_control_stack_top = top->next;
557 serial_team->t.t_level--;
560 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
562 dispatch_private_info_t *disp_buffer =
563 serial_team->t.t_dispatch->th_disp_buffer;
564 serial_team->t.t_dispatch->th_disp_buffer =
565 serial_team->t.t_dispatch->th_disp_buffer->next;
566 __kmp_free(disp_buffer);
569 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
572 --serial_team->t.t_serialized;
573 if (serial_team->t.t_serialized == 0) {
577 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
578 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
579 __kmp_clear_x87_fpu_status_word();
580 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
581 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
585 this_thr->th.th_team = serial_team->t.t_parent;
586 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
589 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
590 this_thr->th.th_team_master =
591 serial_team->t.t_parent->t.t_threads[0];
592 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
595 this_thr->th.th_dispatch =
596 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
598 __kmp_pop_current_task_from_thread(this_thr);
600 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
601 this_thr->th.th_current_task->td_flags.executing = 1;
603 if (__kmp_tasking_mode != tskm_immediate_exec) {
605 this_thr->th.th_task_team =
606 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
608 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
610 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
613 if (__kmp_tasking_mode != tskm_immediate_exec) {
614 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
615 "depth of serial team %p to %d\n",
616 global_tid, serial_team, serial_team->t.t_serialized));
620 if (__kmp_env_consistency_check)
621 __kmp_pop_parallel(global_tid, NULL);
623 if (ompt_enabled.enabled)
624 this_thr->th.ompt_thread_info.state =
625 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
626 : ompt_state_work_parallel);
639 KC_TRACE(10, (
"__kmpc_flush: called\n"));
644 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
658 if (!__kmp_cpuinfo.initialized) {
659 __kmp_query_cpuid(&__kmp_cpuinfo);
661 if (!__kmp_cpuinfo.sse2) {
666 #elif KMP_COMPILER_MSVC
669 __sync_synchronize();
670 #endif // KMP_COMPILER_ICC
673 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
689 #error Unknown or unsupported architecture
692 #if OMPT_SUPPORT && OMPT_OPTIONAL
693 if (ompt_enabled.ompt_callback_flush) {
694 ompt_callbacks.ompt_callback(ompt_callback_flush)(
695 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
710 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
712 if (!TCR_4(__kmp_init_parallel))
713 __kmp_parallel_initialize();
716 __kmp_resume_if_soft_paused();
719 if (__kmp_env_consistency_check) {
721 KMP_WARNING(ConstructIdentInvalid);
724 __kmp_check_barrier(global_tid, ct_barrier, loc);
728 ompt_frame_t *ompt_frame;
729 if (ompt_enabled.enabled) {
730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
731 if (ompt_frame->enter_frame.ptr == NULL)
732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
733 OMPT_STORE_RETURN_ADDRESS(global_tid);
736 __kmp_threads[global_tid]->th.th_ident = loc;
744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
745 #if OMPT_SUPPORT && OMPT_OPTIONAL
746 if (ompt_enabled.enabled) {
747 ompt_frame->enter_frame = ompt_data_none;
762 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
764 if (!TCR_4(__kmp_init_parallel))
765 __kmp_parallel_initialize();
768 __kmp_resume_if_soft_paused();
771 if (KMP_MASTER_GTID(global_tid)) {
773 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
777 #if OMPT_SUPPORT && OMPT_OPTIONAL
779 if (ompt_enabled.ompt_callback_master) {
780 kmp_info_t *this_thr = __kmp_threads[global_tid];
781 kmp_team_t *team = this_thr->th.th_team;
783 int tid = __kmp_tid_from_gtid(global_tid);
784 ompt_callbacks.ompt_callback(ompt_callback_master)(
785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787 OMPT_GET_RETURN_ADDRESS(0));
792 if (__kmp_env_consistency_check) {
793 #if KMP_USE_DYNAMIC_LOCK
795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
800 __kmp_push_sync(global_tid, ct_master, loc, NULL);
802 __kmp_check_sync(global_tid, ct_master, loc, NULL);
818 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821 KMP_POP_PARTITIONED_TIMER();
823 #if OMPT_SUPPORT && OMPT_OPTIONAL
824 kmp_info_t *this_thr = __kmp_threads[global_tid];
825 kmp_team_t *team = this_thr->th.th_team;
826 if (ompt_enabled.ompt_callback_master) {
827 int tid = __kmp_tid_from_gtid(global_tid);
828 ompt_callbacks.ompt_callback(ompt_callback_master)(
829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831 OMPT_GET_RETURN_ADDRESS(0));
835 if (__kmp_env_consistency_check) {
837 KMP_WARNING(ThreadIdentInvalid);
839 if (KMP_MASTER_GTID(global_tid))
840 __kmp_pop_sync(global_tid, ct_master, loc);
854 KMP_DEBUG_ASSERT(__kmp_init_serial);
856 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
858 if (!TCR_4(__kmp_init_parallel))
859 __kmp_parallel_initialize();
862 __kmp_resume_if_soft_paused();
866 __kmp_itt_ordered_prep(gtid);
870 th = __kmp_threads[gtid];
872 #if OMPT_SUPPORT && OMPT_OPTIONAL
876 if (ompt_enabled.enabled) {
877 OMPT_STORE_RETURN_ADDRESS(gtid);
878 team = __kmp_team_from_gtid(gtid);
879 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
881 th->th.ompt_thread_info.wait_id = lck;
882 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
886 if (ompt_enabled.ompt_callback_mutex_acquire) {
887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
889 (ompt_wait_id_t)lck, codeptr_ra);
894 if (th->th.th_dispatch->th_deo_fcn != 0)
895 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
897 __kmp_parallel_deo(>id, &cid, loc);
899 #if OMPT_SUPPORT && OMPT_OPTIONAL
900 if (ompt_enabled.enabled) {
902 th->th.ompt_thread_info.state = ompt_state_work_parallel;
903 th->th.ompt_thread_info.wait_id = 0;
906 if (ompt_enabled.ompt_callback_mutex_acquired) {
907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
908 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
914 __kmp_itt_ordered_start(gtid);
929 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
932 __kmp_itt_ordered_end(gtid);
936 th = __kmp_threads[gtid];
938 if (th->th.th_dispatch->th_dxo_fcn != 0)
939 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
941 __kmp_parallel_dxo(>id, &cid, loc);
943 #if OMPT_SUPPORT && OMPT_OPTIONAL
944 OMPT_STORE_RETURN_ADDRESS(gtid);
945 if (ompt_enabled.ompt_callback_mutex_released) {
946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
948 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
949 OMPT_LOAD_RETURN_ADDRESS(gtid));
954 #if KMP_USE_DYNAMIC_LOCK
956 static __forceinline
void
957 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
958 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
962 kmp_indirect_lock_t **lck;
963 lck = (kmp_indirect_lock_t **)crit;
964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966 KMP_SET_I_LOCK_LOCATION(ilk, loc);
967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
969 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
971 __kmp_itt_critical_creating(ilk->lock, loc);
973 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
976 __kmp_itt_critical_destroyed(ilk->lock);
982 KMP_DEBUG_ASSERT(*lck != NULL);
986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
994 KMP_FSYNC_PREPARE(l); \
995 KMP_INIT_YIELD(spins); \
996 if (TCR_4(__kmp_nth) > \
997 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1000 KMP_YIELD_SPIN(spins); \
1002 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1004 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1005 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1006 __kmp_spin_backoff(&backoff); \
1007 if (TCR_4(__kmp_nth) > \
1008 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1011 KMP_YIELD_SPIN(spins); \
1015 KMP_FSYNC_ACQUIRED(l); \
1019 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1021 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1022 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1023 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1024 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1025 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1029 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1030 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1034 #include <sys/syscall.h>
1037 #define FUTEX_WAIT 0
1040 #define FUTEX_WAKE 1
1044 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1046 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1047 kmp_int32 gtid_code = (gtid + 1) << 1; \
1049 KMP_FSYNC_PREPARE(ftx); \
1050 kmp_int32 poll_val; \
1051 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1052 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1053 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1054 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1056 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1058 KMP_LOCK_BUSY(1, futex))) { \
1061 poll_val |= KMP_LOCK_BUSY(1, futex); \
1064 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1065 NULL, NULL, 0)) != 0) { \
1070 KMP_FSYNC_ACQUIRED(ftx); \
1074 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1076 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1077 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1078 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1079 KMP_FSYNC_ACQUIRED(ftx); \
1087 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1089 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1091 KMP_FSYNC_RELEASING(ftx); \
1092 kmp_int32 poll_val = \
1093 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1094 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1095 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1096 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1099 KMP_YIELD(TCR_4(__kmp_nth) > \
1100 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1103 #endif // KMP_USE_FUTEX
1105 #else // KMP_USE_DYNAMIC_LOCK
1107 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1110 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1113 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1121 __kmp_init_user_lock_with_checks(lck);
1122 __kmp_set_user_lock_location(lck, loc);
1124 __kmp_itt_critical_creating(lck);
1135 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1140 __kmp_itt_critical_destroyed(lck);
1144 __kmp_destroy_user_lock_with_checks(lck);
1145 __kmp_user_lock_free(&idx, gtid, lck);
1146 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1147 KMP_DEBUG_ASSERT(lck != NULL);
1153 #endif // KMP_USE_DYNAMIC_LOCK
1166 kmp_critical_name *crit) {
1167 #if KMP_USE_DYNAMIC_LOCK
1168 #if OMPT_SUPPORT && OMPT_OPTIONAL
1169 OMPT_STORE_RETURN_ADDRESS(global_tid);
1170 #endif // OMPT_SUPPORT
1171 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1174 #if OMPT_SUPPORT && OMPT_OPTIONAL
1175 ompt_state_t prev_state = ompt_state_undefined;
1176 ompt_thread_info_t ti;
1178 kmp_user_lock_p lck;
1180 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1184 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1185 KMP_CHECK_USER_LOCK_INIT();
1187 if ((__kmp_user_lock_kind == lk_tas) &&
1188 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1189 lck = (kmp_user_lock_p)crit;
1192 else if ((__kmp_user_lock_kind == lk_futex) &&
1193 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1194 lck = (kmp_user_lock_p)crit;
1198 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1201 if (__kmp_env_consistency_check)
1202 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1210 __kmp_itt_critical_acquiring(lck);
1212 #if OMPT_SUPPORT && OMPT_OPTIONAL
1213 OMPT_STORE_RETURN_ADDRESS(gtid);
1214 void *codeptr_ra = NULL;
1215 if (ompt_enabled.enabled) {
1216 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1218 prev_state = ti.state;
1219 ti.wait_id = (ompt_wait_id_t)lck;
1220 ti.state = ompt_state_wait_critical;
1223 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1224 if (ompt_enabled.ompt_callback_mutex_acquire) {
1225 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1226 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1227 (ompt_wait_id_t)crit, codeptr_ra);
1233 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1236 __kmp_itt_critical_acquired(lck);
1238 #if OMPT_SUPPORT && OMPT_OPTIONAL
1239 if (ompt_enabled.enabled) {
1241 ti.state = prev_state;
1245 if (ompt_enabled.ompt_callback_mutex_acquired) {
1246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1247 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1251 KMP_POP_PARTITIONED_TIMER();
1253 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1254 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1255 #endif // KMP_USE_DYNAMIC_LOCK
1258 #if KMP_USE_DYNAMIC_LOCK
1261 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1263 #define KMP_TSX_LOCK(seq) lockseq_##seq
1265 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1268 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1269 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1271 #define KMP_CPUINFO_RTM 0
1275 if (hint & kmp_lock_hint_hle)
1276 return KMP_TSX_LOCK(hle);
1277 if (hint & kmp_lock_hint_rtm)
1278 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1279 if (hint & kmp_lock_hint_adaptive)
1280 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1283 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1284 return __kmp_user_lock_seq;
1285 if ((hint & omp_lock_hint_speculative) &&
1286 (hint & omp_lock_hint_nonspeculative))
1287 return __kmp_user_lock_seq;
1290 if (hint & omp_lock_hint_contended)
1291 return lockseq_queuing;
1294 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1298 if (hint & omp_lock_hint_speculative)
1299 return KMP_TSX_LOCK(hle);
1301 return __kmp_user_lock_seq;
1304 #if OMPT_SUPPORT && OMPT_OPTIONAL
1305 #if KMP_USE_DYNAMIC_LOCK
1306 static kmp_mutex_impl_t
1307 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1309 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1314 return kmp_mutex_impl_queuing;
1317 return kmp_mutex_impl_spin;
1320 return kmp_mutex_impl_speculative;
1323 return kmp_mutex_impl_none;
1325 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1328 switch (ilock->type) {
1330 case locktag_adaptive:
1332 return kmp_mutex_impl_speculative;
1334 case locktag_nested_tas:
1335 return kmp_mutex_impl_spin;
1337 case locktag_nested_futex:
1339 case locktag_ticket:
1340 case locktag_queuing:
1342 case locktag_nested_ticket:
1343 case locktag_nested_queuing:
1344 case locktag_nested_drdpa:
1345 return kmp_mutex_impl_queuing;
1347 return kmp_mutex_impl_none;
1352 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1353 switch (__kmp_user_lock_kind) {
1355 return kmp_mutex_impl_spin;
1362 return kmp_mutex_impl_queuing;
1367 return kmp_mutex_impl_speculative;
1370 return kmp_mutex_impl_none;
1373 #endif // KMP_USE_DYNAMIC_LOCK
1374 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1389 void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1390 kmp_critical_name *crit, uint32_t hint) {
1392 kmp_user_lock_p lck;
1393 #if OMPT_SUPPORT && OMPT_OPTIONAL
1394 ompt_state_t prev_state = ompt_state_undefined;
1395 ompt_thread_info_t ti;
1397 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1399 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1402 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1404 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1406 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1408 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1409 if (KMP_IS_D_LOCK(lckseq)) {
1410 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1411 KMP_GET_D_TAG(lckseq));
1413 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1419 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1420 lck = (kmp_user_lock_p)lk;
1421 if (__kmp_env_consistency_check) {
1422 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1423 __kmp_map_hint_to_lock(hint));
1426 __kmp_itt_critical_acquiring(lck);
1428 #if OMPT_SUPPORT && OMPT_OPTIONAL
1429 if (ompt_enabled.enabled) {
1430 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1432 prev_state = ti.state;
1433 ti.wait_id = (ompt_wait_id_t)lck;
1434 ti.state = ompt_state_wait_critical;
1437 if (ompt_enabled.ompt_callback_mutex_acquire) {
1438 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1439 ompt_mutex_critical, (
unsigned int)hint,
1440 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1444 #if KMP_USE_INLINED_TAS
1445 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1446 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1448 #elif KMP_USE_INLINED_FUTEX
1449 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1450 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1454 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1457 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1459 if (__kmp_env_consistency_check) {
1460 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1461 __kmp_map_hint_to_lock(hint));
1464 __kmp_itt_critical_acquiring(lck);
1466 #if OMPT_SUPPORT && OMPT_OPTIONAL
1467 if (ompt_enabled.enabled) {
1468 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1470 prev_state = ti.state;
1471 ti.wait_id = (ompt_wait_id_t)lck;
1472 ti.state = ompt_state_wait_critical;
1475 if (ompt_enabled.ompt_callback_mutex_acquire) {
1476 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1477 ompt_mutex_critical, (
unsigned int)hint,
1478 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1482 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1484 KMP_POP_PARTITIONED_TIMER();
1487 __kmp_itt_critical_acquired(lck);
1489 #if OMPT_SUPPORT && OMPT_OPTIONAL
1490 if (ompt_enabled.enabled) {
1492 ti.state = prev_state;
1496 if (ompt_enabled.ompt_callback_mutex_acquired) {
1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1498 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1504 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1507 #endif // KMP_USE_DYNAMIC_LOCK
1519 kmp_critical_name *crit) {
1520 kmp_user_lock_p lck;
1522 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1524 #if KMP_USE_DYNAMIC_LOCK
1525 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1526 lck = (kmp_user_lock_p)crit;
1527 KMP_ASSERT(lck != NULL);
1528 if (__kmp_env_consistency_check) {
1529 __kmp_pop_sync(global_tid, ct_critical, loc);
1532 __kmp_itt_critical_releasing(lck);
1534 #if KMP_USE_INLINED_TAS
1535 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1536 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1538 #elif KMP_USE_INLINED_FUTEX
1539 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1540 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1544 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1547 kmp_indirect_lock_t *ilk =
1548 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1549 KMP_ASSERT(ilk != NULL);
1551 if (__kmp_env_consistency_check) {
1552 __kmp_pop_sync(global_tid, ct_critical, loc);
1555 __kmp_itt_critical_releasing(lck);
1557 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1560 #else // KMP_USE_DYNAMIC_LOCK
1562 if ((__kmp_user_lock_kind == lk_tas) &&
1563 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1564 lck = (kmp_user_lock_p)crit;
1567 else if ((__kmp_user_lock_kind == lk_futex) &&
1568 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1569 lck = (kmp_user_lock_p)crit;
1573 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1576 KMP_ASSERT(lck != NULL);
1578 if (__kmp_env_consistency_check)
1579 __kmp_pop_sync(global_tid, ct_critical, loc);
1582 __kmp_itt_critical_releasing(lck);
1586 __kmp_release_user_lock_with_checks(lck, global_tid);
1588 #endif // KMP_USE_DYNAMIC_LOCK
1590 #if OMPT_SUPPORT && OMPT_OPTIONAL
1593 OMPT_STORE_RETURN_ADDRESS(global_tid);
1594 if (ompt_enabled.ompt_callback_mutex_released) {
1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1596 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1600 KMP_POP_PARTITIONED_TIMER();
1601 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1616 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1618 if (!TCR_4(__kmp_init_parallel))
1619 __kmp_parallel_initialize();
1622 __kmp_resume_if_soft_paused();
1625 if (__kmp_env_consistency_check)
1626 __kmp_check_barrier(global_tid, ct_barrier, loc);
1629 ompt_frame_t *ompt_frame;
1630 if (ompt_enabled.enabled) {
1631 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1632 if (ompt_frame->enter_frame.ptr == NULL)
1633 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1634 OMPT_STORE_RETURN_ADDRESS(global_tid);
1638 __kmp_threads[global_tid]->th.th_ident = loc;
1640 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1641 #if OMPT_SUPPORT && OMPT_OPTIONAL
1642 if (ompt_enabled.enabled) {
1643 ompt_frame->enter_frame = ompt_data_none;
1647 return (status != 0) ? 0 : 1;
1660 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1662 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1678 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1680 if (!TCR_4(__kmp_init_parallel))
1681 __kmp_parallel_initialize();
1684 __kmp_resume_if_soft_paused();
1687 if (__kmp_env_consistency_check) {
1689 KMP_WARNING(ConstructIdentInvalid);
1691 __kmp_check_barrier(global_tid, ct_barrier, loc);
1695 ompt_frame_t *ompt_frame;
1696 if (ompt_enabled.enabled) {
1697 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1698 if (ompt_frame->enter_frame.ptr == NULL)
1699 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1700 OMPT_STORE_RETURN_ADDRESS(global_tid);
1704 __kmp_threads[global_tid]->th.th_ident = loc;
1706 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1707 #if OMPT_SUPPORT && OMPT_OPTIONAL
1708 if (ompt_enabled.enabled) {
1709 ompt_frame->enter_frame = ompt_data_none;
1715 if (__kmp_env_consistency_check) {
1719 if (global_tid < 0) {
1720 KMP_WARNING(ThreadIdentInvalid);
1726 __kmp_pop_sync(global_tid, ct_master, loc);
1746 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1751 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1754 #if OMPT_SUPPORT && OMPT_OPTIONAL
1755 kmp_info_t *this_thr = __kmp_threads[global_tid];
1756 kmp_team_t *team = this_thr->th.th_team;
1757 int tid = __kmp_tid_from_gtid(global_tid);
1759 if (ompt_enabled.enabled) {
1761 if (ompt_enabled.ompt_callback_work) {
1762 ompt_callbacks.ompt_callback(ompt_callback_work)(
1763 ompt_work_single_executor, ompt_scope_begin,
1764 &(team->t.ompt_team_info.parallel_data),
1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766 1, OMPT_GET_RETURN_ADDRESS(0));
1769 if (ompt_enabled.ompt_callback_work) {
1770 ompt_callbacks.ompt_callback(ompt_callback_work)(
1771 ompt_work_single_other, ompt_scope_begin,
1772 &(team->t.ompt_team_info.parallel_data),
1773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1774 1, OMPT_GET_RETURN_ADDRESS(0));
1775 ompt_callbacks.ompt_callback(ompt_callback_work)(
1776 ompt_work_single_other, ompt_scope_end,
1777 &(team->t.ompt_team_info.parallel_data),
1778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1779 1, OMPT_GET_RETURN_ADDRESS(0));
1798 __kmp_exit_single(global_tid);
1799 KMP_POP_PARTITIONED_TIMER();
1801 #if OMPT_SUPPORT && OMPT_OPTIONAL
1802 kmp_info_t *this_thr = __kmp_threads[global_tid];
1803 kmp_team_t *team = this_thr->th.th_team;
1804 int tid = __kmp_tid_from_gtid(global_tid);
1806 if (ompt_enabled.ompt_callback_work) {
1807 ompt_callbacks.ompt_callback(ompt_callback_work)(
1808 ompt_work_single_executor, ompt_scope_end,
1809 &(team->t.ompt_team_info.parallel_data),
1810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1811 OMPT_GET_RETURN_ADDRESS(0));
1824 KMP_POP_PARTITIONED_TIMER();
1825 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1827 #if OMPT_SUPPORT && OMPT_OPTIONAL
1828 if (ompt_enabled.ompt_callback_work) {
1829 ompt_work_t ompt_work_type = ompt_work_loop;
1830 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1831 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1835 ompt_work_type = ompt_work_loop;
1837 ompt_work_type = ompt_work_sections;
1839 ompt_work_type = ompt_work_distribute;
1844 KMP_DEBUG_ASSERT(ompt_work_type);
1846 ompt_callbacks.ompt_callback(ompt_callback_work)(
1847 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1848 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1851 if (__kmp_env_consistency_check)
1852 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1858 void ompc_set_num_threads(
int arg) {
1860 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1863 void ompc_set_dynamic(
int flag) {
1867 thread = __kmp_entry_thread();
1869 __kmp_save_internal_controls(thread);
1871 set__dynamic(thread, flag ? TRUE : FALSE);
1874 void ompc_set_nested(
int flag) {
1878 thread = __kmp_entry_thread();
1880 __kmp_save_internal_controls(thread);
1882 set__nested(thread, flag ? TRUE : FALSE);
1885 void ompc_set_max_active_levels(
int max_active_levels) {
1890 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1893 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1895 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1898 int ompc_get_ancestor_thread_num(
int level) {
1899 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1902 int ompc_get_team_size(
int level) {
1903 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1909 void ompc_set_affinity_format(
char const *format) {
1910 if (!__kmp_init_serial) {
1911 __kmp_serial_initialize();
1913 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1914 format, KMP_STRLEN(format) + 1);
1917 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1919 if (!__kmp_init_serial) {
1920 __kmp_serial_initialize();
1922 format_size = KMP_STRLEN(__kmp_affinity_format);
1923 if (buffer && size) {
1924 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1930 void ompc_display_affinity(
char const *format) {
1932 if (!TCR_4(__kmp_init_middle)) {
1933 __kmp_middle_initialize();
1935 gtid = __kmp_get_gtid();
1936 __kmp_aux_display_affinity(gtid, format);
1939 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1940 char const *format) {
1942 size_t num_required;
1943 kmp_str_buf_t capture_buf;
1944 if (!TCR_4(__kmp_init_middle)) {
1945 __kmp_middle_initialize();
1947 gtid = __kmp_get_gtid();
1948 __kmp_str_buf_init(&capture_buf);
1949 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1950 if (buffer && buf_size) {
1951 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1952 capture_buf.used + 1);
1954 __kmp_str_buf_free(&capture_buf);
1955 return num_required;
1959 void kmpc_set_stacksize(
int arg) {
1961 __kmp_aux_set_stacksize(arg);
1964 void kmpc_set_stacksize_s(
size_t arg) {
1966 __kmp_aux_set_stacksize(arg);
1969 void kmpc_set_blocktime(
int arg) {
1973 gtid = __kmp_entry_gtid();
1974 tid = __kmp_tid_from_gtid(gtid);
1975 thread = __kmp_thread_from_gtid(gtid);
1977 __kmp_aux_set_blocktime(arg, thread, tid);
1980 void kmpc_set_library(
int arg) {
1982 __kmp_user_set_library((
enum library_type)arg);
1985 void kmpc_set_defaults(
char const *str) {
1987 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1990 void kmpc_set_disp_num_buffers(
int arg) {
1993 if (__kmp_init_serial == 0 && arg > 0)
1994 __kmp_dispatch_num_buffers = arg;
1997 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2001 if (!TCR_4(__kmp_init_middle)) {
2002 __kmp_middle_initialize();
2004 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2008 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2012 if (!TCR_4(__kmp_init_middle)) {
2013 __kmp_middle_initialize();
2015 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2019 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2020 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2023 if (!TCR_4(__kmp_init_middle)) {
2024 __kmp_middle_initialize();
2026 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2076 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2080 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2084 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2086 if (__kmp_env_consistency_check) {
2088 KMP_WARNING(ConstructIdentInvalid);
2095 *data_ptr = cpy_data;
2098 ompt_frame_t *ompt_frame;
2099 if (ompt_enabled.enabled) {
2100 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2101 if (ompt_frame->enter_frame.ptr == NULL)
2102 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2103 OMPT_STORE_RETURN_ADDRESS(gtid);
2108 __kmp_threads[gtid]->th.th_ident = loc;
2110 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2113 (*cpy_func)(cpy_data, *data_ptr);
2119 if (ompt_enabled.enabled) {
2120 OMPT_STORE_RETURN_ADDRESS(gtid);
2124 __kmp_threads[gtid]->th.th_ident = loc;
2127 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2128 #if OMPT_SUPPORT && OMPT_OPTIONAL
2129 if (ompt_enabled.enabled) {
2130 ompt_frame->enter_frame = ompt_data_none;
2137 #define INIT_LOCK __kmp_init_user_lock_with_checks
2138 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2139 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2140 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2141 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2142 #define ACQUIRE_NESTED_LOCK_TIMED \
2143 __kmp_acquire_nested_user_lock_with_checks_timed
2144 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2145 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2146 #define TEST_LOCK __kmp_test_user_lock_with_checks
2147 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2148 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2149 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2154 #if KMP_USE_DYNAMIC_LOCK
2157 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2158 kmp_dyna_lockseq_t seq) {
2159 if (KMP_IS_D_LOCK(seq)) {
2160 KMP_INIT_D_LOCK(lock, seq);
2162 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2165 KMP_INIT_I_LOCK(lock, seq);
2167 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2168 __kmp_itt_lock_creating(ilk->lock, loc);
2174 static __forceinline
void
2175 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2176 kmp_dyna_lockseq_t seq) {
2179 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2180 seq = __kmp_user_lock_seq;
2184 seq = lockseq_nested_tas;
2188 seq = lockseq_nested_futex;
2191 case lockseq_ticket:
2192 seq = lockseq_nested_ticket;
2194 case lockseq_queuing:
2195 seq = lockseq_nested_queuing;
2198 seq = lockseq_nested_drdpa;
2201 seq = lockseq_nested_queuing;
2203 KMP_INIT_I_LOCK(lock, seq);
2205 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2206 __kmp_itt_lock_creating(ilk->lock, loc);
2211 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2213 KMP_DEBUG_ASSERT(__kmp_init_serial);
2214 if (__kmp_env_consistency_check && user_lock == NULL) {
2215 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2218 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2220 #if OMPT_SUPPORT && OMPT_OPTIONAL
2222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2224 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2225 if (ompt_enabled.ompt_callback_lock_init) {
2226 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2227 ompt_mutex_lock, (omp_lock_hint_t)hint,
2228 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2235 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2236 void **user_lock, uintptr_t hint) {
2237 KMP_DEBUG_ASSERT(__kmp_init_serial);
2238 if (__kmp_env_consistency_check && user_lock == NULL) {
2239 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2242 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2244 #if OMPT_SUPPORT && OMPT_OPTIONAL
2246 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2248 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2249 if (ompt_enabled.ompt_callback_lock_init) {
2250 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2251 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2252 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2258 #endif // KMP_USE_DYNAMIC_LOCK
2261 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2262 #if KMP_USE_DYNAMIC_LOCK
2264 KMP_DEBUG_ASSERT(__kmp_init_serial);
2265 if (__kmp_env_consistency_check && user_lock == NULL) {
2266 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2268 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2270 #if OMPT_SUPPORT && OMPT_OPTIONAL
2272 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2274 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2275 if (ompt_enabled.ompt_callback_lock_init) {
2276 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2277 ompt_mutex_lock, omp_lock_hint_none,
2278 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2283 #else // KMP_USE_DYNAMIC_LOCK
2285 static char const *
const func =
"omp_init_lock";
2286 kmp_user_lock_p lck;
2287 KMP_DEBUG_ASSERT(__kmp_init_serial);
2289 if (__kmp_env_consistency_check) {
2290 if (user_lock == NULL) {
2291 KMP_FATAL(LockIsUninitialized, func);
2295 KMP_CHECK_USER_LOCK_INIT();
2297 if ((__kmp_user_lock_kind == lk_tas) &&
2298 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2299 lck = (kmp_user_lock_p)user_lock;
2302 else if ((__kmp_user_lock_kind == lk_futex) &&
2303 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2304 lck = (kmp_user_lock_p)user_lock;
2308 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2311 __kmp_set_user_lock_location(lck, loc);
2313 #if OMPT_SUPPORT && OMPT_OPTIONAL
2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2317 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318 if (ompt_enabled.ompt_callback_lock_init) {
2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2321 (ompt_wait_id_t)user_lock, codeptr);
2326 __kmp_itt_lock_creating(lck);
2329 #endif // KMP_USE_DYNAMIC_LOCK
2333 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2334 #if KMP_USE_DYNAMIC_LOCK
2336 KMP_DEBUG_ASSERT(__kmp_init_serial);
2337 if (__kmp_env_consistency_check && user_lock == NULL) {
2338 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2340 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2342 #if OMPT_SUPPORT && OMPT_OPTIONAL
2344 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2346 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2347 if (ompt_enabled.ompt_callback_lock_init) {
2348 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2349 ompt_mutex_nest_lock, omp_lock_hint_none,
2350 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2355 #else // KMP_USE_DYNAMIC_LOCK
2357 static char const *
const func =
"omp_init_nest_lock";
2358 kmp_user_lock_p lck;
2359 KMP_DEBUG_ASSERT(__kmp_init_serial);
2361 if (__kmp_env_consistency_check) {
2362 if (user_lock == NULL) {
2363 KMP_FATAL(LockIsUninitialized, func);
2367 KMP_CHECK_USER_LOCK_INIT();
2369 if ((__kmp_user_lock_kind == lk_tas) &&
2370 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2371 OMP_NEST_LOCK_T_SIZE)) {
2372 lck = (kmp_user_lock_p)user_lock;
2375 else if ((__kmp_user_lock_kind == lk_futex) &&
2376 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2377 OMP_NEST_LOCK_T_SIZE)) {
2378 lck = (kmp_user_lock_p)user_lock;
2382 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2385 INIT_NESTED_LOCK(lck);
2386 __kmp_set_user_lock_location(lck, loc);
2388 #if OMPT_SUPPORT && OMPT_OPTIONAL
2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2392 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2393 if (ompt_enabled.ompt_callback_lock_init) {
2394 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2395 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2396 (ompt_wait_id_t)user_lock, codeptr);
2401 __kmp_itt_lock_creating(lck);
2404 #endif // KMP_USE_DYNAMIC_LOCK
2407 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2408 #if KMP_USE_DYNAMIC_LOCK
2411 kmp_user_lock_p lck;
2412 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2413 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2415 lck = (kmp_user_lock_p)user_lock;
2417 __kmp_itt_lock_destroyed(lck);
2419 #if OMPT_SUPPORT && OMPT_OPTIONAL
2421 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2423 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2424 if (ompt_enabled.ompt_callback_lock_destroy) {
2425 kmp_user_lock_p lck;
2426 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2427 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2429 lck = (kmp_user_lock_p)user_lock;
2431 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2432 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2435 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2437 kmp_user_lock_p lck;
2439 if ((__kmp_user_lock_kind == lk_tas) &&
2440 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2441 lck = (kmp_user_lock_p)user_lock;
2444 else if ((__kmp_user_lock_kind == lk_futex) &&
2445 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2446 lck = (kmp_user_lock_p)user_lock;
2450 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2453 #if OMPT_SUPPORT && OMPT_OPTIONAL
2455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2457 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2458 if (ompt_enabled.ompt_callback_lock_destroy) {
2459 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2460 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2465 __kmp_itt_lock_destroyed(lck);
2469 if ((__kmp_user_lock_kind == lk_tas) &&
2470 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2474 else if ((__kmp_user_lock_kind == lk_futex) &&
2475 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2480 __kmp_user_lock_free(user_lock, gtid, lck);
2482 #endif // KMP_USE_DYNAMIC_LOCK
2486 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2487 #if KMP_USE_DYNAMIC_LOCK
2490 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2491 __kmp_itt_lock_destroyed(ilk->lock);
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2497 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498 if (ompt_enabled.ompt_callback_lock_destroy) {
2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2500 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2503 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2505 #else // KMP_USE_DYNAMIC_LOCK
2507 kmp_user_lock_p lck;
2509 if ((__kmp_user_lock_kind == lk_tas) &&
2510 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2511 OMP_NEST_LOCK_T_SIZE)) {
2512 lck = (kmp_user_lock_p)user_lock;
2515 else if ((__kmp_user_lock_kind == lk_futex) &&
2516 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2517 OMP_NEST_LOCK_T_SIZE)) {
2518 lck = (kmp_user_lock_p)user_lock;
2522 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2525 #if OMPT_SUPPORT && OMPT_OPTIONAL
2527 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2529 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2530 if (ompt_enabled.ompt_callback_lock_destroy) {
2531 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2532 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2537 __kmp_itt_lock_destroyed(lck);
2540 DESTROY_NESTED_LOCK(lck);
2542 if ((__kmp_user_lock_kind == lk_tas) &&
2543 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2544 OMP_NEST_LOCK_T_SIZE)) {
2548 else if ((__kmp_user_lock_kind == lk_futex) &&
2549 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2550 OMP_NEST_LOCK_T_SIZE)) {
2555 __kmp_user_lock_free(user_lock, gtid, lck);
2557 #endif // KMP_USE_DYNAMIC_LOCK
2560 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2562 #if KMP_USE_DYNAMIC_LOCK
2563 int tag = KMP_EXTRACT_D_TAG(user_lock);
2565 __kmp_itt_lock_acquiring(
2569 #if OMPT_SUPPORT && OMPT_OPTIONAL
2571 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2573 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2574 if (ompt_enabled.ompt_callback_mutex_acquire) {
2575 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2576 ompt_mutex_lock, omp_lock_hint_none,
2577 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2581 #if KMP_USE_INLINED_TAS
2582 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2583 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2585 #elif KMP_USE_INLINED_FUTEX
2586 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2587 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2591 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2594 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2596 #if OMPT_SUPPORT && OMPT_OPTIONAL
2597 if (ompt_enabled.ompt_callback_mutex_acquired) {
2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2599 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2603 #else // KMP_USE_DYNAMIC_LOCK
2605 kmp_user_lock_p lck;
2607 if ((__kmp_user_lock_kind == lk_tas) &&
2608 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2612 else if ((__kmp_user_lock_kind == lk_futex) &&
2613 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2614 lck = (kmp_user_lock_p)user_lock;
2618 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2622 __kmp_itt_lock_acquiring(lck);
2624 #if OMPT_SUPPORT && OMPT_OPTIONAL
2626 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2628 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2629 if (ompt_enabled.ompt_callback_mutex_acquire) {
2630 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2631 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2632 (ompt_wait_id_t)lck, codeptr);
2636 ACQUIRE_LOCK(lck, gtid);
2639 __kmp_itt_lock_acquired(lck);
2642 #if OMPT_SUPPORT && OMPT_OPTIONAL
2643 if (ompt_enabled.ompt_callback_mutex_acquired) {
2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2645 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2649 #endif // KMP_USE_DYNAMIC_LOCK
2652 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2653 #if KMP_USE_DYNAMIC_LOCK
2656 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2658 #if OMPT_SUPPORT && OMPT_OPTIONAL
2660 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2662 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2663 if (ompt_enabled.enabled) {
2664 if (ompt_enabled.ompt_callback_mutex_acquire) {
2665 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2666 ompt_mutex_nest_lock, omp_lock_hint_none,
2667 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2672 int acquire_status =
2673 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2674 (void) acquire_status;
2676 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2679 #if OMPT_SUPPORT && OMPT_OPTIONAL
2680 if (ompt_enabled.enabled) {
2681 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2682 if (ompt_enabled.ompt_callback_mutex_acquired) {
2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2685 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2688 if (ompt_enabled.ompt_callback_nest_lock) {
2690 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2691 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2697 #else // KMP_USE_DYNAMIC_LOCK
2699 kmp_user_lock_p lck;
2701 if ((__kmp_user_lock_kind == lk_tas) &&
2702 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2703 OMP_NEST_LOCK_T_SIZE)) {
2704 lck = (kmp_user_lock_p)user_lock;
2707 else if ((__kmp_user_lock_kind == lk_futex) &&
2708 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2709 OMP_NEST_LOCK_T_SIZE)) {
2710 lck = (kmp_user_lock_p)user_lock;
2714 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2718 __kmp_itt_lock_acquiring(lck);
2720 #if OMPT_SUPPORT && OMPT_OPTIONAL
2722 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2724 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2725 if (ompt_enabled.enabled) {
2726 if (ompt_enabled.ompt_callback_mutex_acquire) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2728 ompt_mutex_nest_lock, omp_lock_hint_none,
2729 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2734 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2737 __kmp_itt_lock_acquired(lck);
2740 #if OMPT_SUPPORT && OMPT_OPTIONAL
2741 if (ompt_enabled.enabled) {
2742 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2743 if (ompt_enabled.ompt_callback_mutex_acquired) {
2745 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2746 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2749 if (ompt_enabled.ompt_callback_nest_lock) {
2751 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2752 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2758 #endif // KMP_USE_DYNAMIC_LOCK
2761 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2762 #if KMP_USE_DYNAMIC_LOCK
2764 int tag = KMP_EXTRACT_D_TAG(user_lock);
2766 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2768 #if KMP_USE_INLINED_TAS
2769 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2770 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2772 #elif KMP_USE_INLINED_FUTEX
2773 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2774 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2778 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2781 #if OMPT_SUPPORT && OMPT_OPTIONAL
2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2785 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786 if (ompt_enabled.ompt_callback_mutex_released) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2788 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2792 #else // KMP_USE_DYNAMIC_LOCK
2794 kmp_user_lock_p lck;
2799 if ((__kmp_user_lock_kind == lk_tas) &&
2800 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2801 #if KMP_OS_LINUX && \
2802 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2805 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2807 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2810 #if OMPT_SUPPORT && OMPT_OPTIONAL
2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2814 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2815 if (ompt_enabled.ompt_callback_mutex_released) {
2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2817 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2823 lck = (kmp_user_lock_p)user_lock;
2827 else if ((__kmp_user_lock_kind == lk_futex) &&
2828 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2829 lck = (kmp_user_lock_p)user_lock;
2833 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2837 __kmp_itt_lock_releasing(lck);
2840 RELEASE_LOCK(lck, gtid);
2842 #if OMPT_SUPPORT && OMPT_OPTIONAL
2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2846 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2847 if (ompt_enabled.ompt_callback_mutex_released) {
2848 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2849 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2853 #endif // KMP_USE_DYNAMIC_LOCK
2857 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2858 #if KMP_USE_DYNAMIC_LOCK
2861 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2863 int release_status =
2864 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2865 (void) release_status;
2867 #if OMPT_SUPPORT && OMPT_OPTIONAL
2869 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2871 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2872 if (ompt_enabled.enabled) {
2873 if (release_status == KMP_LOCK_RELEASED) {
2874 if (ompt_enabled.ompt_callback_mutex_released) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2877 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2879 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2881 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2882 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2887 #else // KMP_USE_DYNAMIC_LOCK
2889 kmp_user_lock_p lck;
2893 if ((__kmp_user_lock_kind == lk_tas) &&
2894 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2895 OMP_NEST_LOCK_T_SIZE)) {
2896 #if KMP_OS_LINUX && \
2897 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2899 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2901 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2904 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905 int release_status = KMP_LOCK_STILL_HELD;
2908 if (--(tl->lk.depth_locked) == 0) {
2909 TCW_4(tl->lk.poll, 0);
2910 #if OMPT_SUPPORT && OMPT_OPTIONAL
2911 release_status = KMP_LOCK_RELEASED;
2916 #if OMPT_SUPPORT && OMPT_OPTIONAL
2918 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2920 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2921 if (ompt_enabled.enabled) {
2922 if (release_status == KMP_LOCK_RELEASED) {
2923 if (ompt_enabled.ompt_callback_mutex_released) {
2925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2926 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2928 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2930 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2931 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2938 lck = (kmp_user_lock_p)user_lock;
2942 else if ((__kmp_user_lock_kind == lk_futex) &&
2943 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2944 OMP_NEST_LOCK_T_SIZE)) {
2945 lck = (kmp_user_lock_p)user_lock;
2949 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2953 __kmp_itt_lock_releasing(lck);
2957 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2958 #if OMPT_SUPPORT && OMPT_OPTIONAL
2960 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2962 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2963 if (ompt_enabled.enabled) {
2964 if (release_status == KMP_LOCK_RELEASED) {
2965 if (ompt_enabled.ompt_callback_mutex_released) {
2967 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2968 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2970 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2972 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2973 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2978 #endif // KMP_USE_DYNAMIC_LOCK
2982 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2985 #if KMP_USE_DYNAMIC_LOCK
2987 int tag = KMP_EXTRACT_D_TAG(user_lock);
2989 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2991 #if OMPT_SUPPORT && OMPT_OPTIONAL
2993 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2995 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2996 if (ompt_enabled.ompt_callback_mutex_acquire) {
2997 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2998 ompt_mutex_lock, omp_lock_hint_none,
2999 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3003 #if KMP_USE_INLINED_TAS
3004 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3005 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3007 #elif KMP_USE_INLINED_FUTEX
3008 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3009 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3013 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3017 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3019 #if OMPT_SUPPORT && OMPT_OPTIONAL
3020 if (ompt_enabled.ompt_callback_mutex_acquired) {
3021 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3022 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
3028 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3033 #else // KMP_USE_DYNAMIC_LOCK
3035 kmp_user_lock_p lck;
3038 if ((__kmp_user_lock_kind == lk_tas) &&
3039 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3040 lck = (kmp_user_lock_p)user_lock;
3043 else if ((__kmp_user_lock_kind == lk_futex) &&
3044 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3045 lck = (kmp_user_lock_p)user_lock;
3049 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3053 __kmp_itt_lock_acquiring(lck);
3055 #if OMPT_SUPPORT && OMPT_OPTIONAL
3057 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3059 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3060 if (ompt_enabled.ompt_callback_mutex_acquire) {
3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3062 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3063 (ompt_wait_id_t)lck, codeptr);
3067 rc = TEST_LOCK(lck, gtid);
3070 __kmp_itt_lock_acquired(lck);
3072 __kmp_itt_lock_cancelled(lck);
3075 #if OMPT_SUPPORT && OMPT_OPTIONAL
3076 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3077 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3078 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3082 return (rc ? FTN_TRUE : FTN_FALSE);
3086 #endif // KMP_USE_DYNAMIC_LOCK
3090 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3091 #if KMP_USE_DYNAMIC_LOCK
3094 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3096 #if OMPT_SUPPORT && OMPT_OPTIONAL
3098 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3100 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3101 if (ompt_enabled.ompt_callback_mutex_acquire) {
3102 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3103 ompt_mutex_nest_lock, omp_lock_hint_none,
3104 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3108 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3113 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3116 #if OMPT_SUPPORT && OMPT_OPTIONAL
3117 if (ompt_enabled.enabled && rc) {
3119 if (ompt_enabled.ompt_callback_mutex_acquired) {
3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3122 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3125 if (ompt_enabled.ompt_callback_nest_lock) {
3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3128 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3135 #else // KMP_USE_DYNAMIC_LOCK
3137 kmp_user_lock_p lck;
3140 if ((__kmp_user_lock_kind == lk_tas) &&
3141 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3142 OMP_NEST_LOCK_T_SIZE)) {
3143 lck = (kmp_user_lock_p)user_lock;
3146 else if ((__kmp_user_lock_kind == lk_futex) &&
3147 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3148 OMP_NEST_LOCK_T_SIZE)) {
3149 lck = (kmp_user_lock_p)user_lock;
3153 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3157 __kmp_itt_lock_acquiring(lck);
3160 #if OMPT_SUPPORT && OMPT_OPTIONAL
3162 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3164 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3165 if (ompt_enabled.enabled) &&
3166 ompt_enabled.ompt_callback_mutex_acquire) {
3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3168 ompt_mutex_nest_lock, omp_lock_hint_none,
3169 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3173 rc = TEST_NESTED_LOCK(lck, gtid);
3176 __kmp_itt_lock_acquired(lck);
3178 __kmp_itt_lock_cancelled(lck);
3181 #if OMPT_SUPPORT && OMPT_OPTIONAL
3182 if (ompt_enabled.enabled && rc) {
3184 if (ompt_enabled.ompt_callback_mutex_acquired) {
3186 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3187 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3190 if (ompt_enabled.ompt_callback_nest_lock) {
3192 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3193 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3202 #endif // KMP_USE_DYNAMIC_LOCK
3212 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3213 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3215 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3216 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3222 static __forceinline
void
3223 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3224 kmp_critical_name *crit) {
3230 kmp_user_lock_p lck;
3232 #if KMP_USE_DYNAMIC_LOCK
3234 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3237 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3238 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3239 KMP_GET_D_TAG(__kmp_user_lock_seq));
3241 __kmp_init_indirect_csptr(crit, loc, global_tid,
3242 KMP_GET_I_TAG(__kmp_user_lock_seq));
3248 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3249 lck = (kmp_user_lock_p)lk;
3250 KMP_DEBUG_ASSERT(lck != NULL);
3251 if (__kmp_env_consistency_check) {
3252 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3254 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3256 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3258 KMP_DEBUG_ASSERT(lck != NULL);
3259 if (__kmp_env_consistency_check) {
3260 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3262 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3265 #else // KMP_USE_DYNAMIC_LOCK
3270 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3271 lck = (kmp_user_lock_p)crit;
3273 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3275 KMP_DEBUG_ASSERT(lck != NULL);
3277 if (__kmp_env_consistency_check)
3278 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3280 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3282 #endif // KMP_USE_DYNAMIC_LOCK
3286 static __forceinline
void
3287 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3288 kmp_critical_name *crit) {
3290 kmp_user_lock_p lck;
3292 #if KMP_USE_DYNAMIC_LOCK
3294 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3295 lck = (kmp_user_lock_p)crit;
3296 if (__kmp_env_consistency_check)
3297 __kmp_pop_sync(global_tid, ct_critical, loc);
3298 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3300 kmp_indirect_lock_t *ilk =
3301 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3302 if (__kmp_env_consistency_check)
3303 __kmp_pop_sync(global_tid, ct_critical, loc);
3304 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3307 #else // KMP_USE_DYNAMIC_LOCK
3312 if (__kmp_base_user_lock_size > 32) {
3313 lck = *((kmp_user_lock_p *)crit);
3314 KMP_ASSERT(lck != NULL);
3316 lck = (kmp_user_lock_p)crit;
3319 if (__kmp_env_consistency_check)
3320 __kmp_pop_sync(global_tid, ct_critical, loc);
3322 __kmp_release_user_lock_with_checks(lck, global_tid);
3324 #endif // KMP_USE_DYNAMIC_LOCK
3328 static __forceinline
int
3329 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3334 if (th->th.th_teams_microtask) {
3335 *team_p = team = th->th.th_team;
3336 if (team->t.t_level == th->th.th_teams_level) {
3338 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3340 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3341 th->th.th_team = team->t.t_parent;
3342 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3343 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3344 *task_state = th->th.th_task_state;
3345 th->th.th_task_state = 0;
3353 static __forceinline
void
3354 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3356 th->th.th_info.ds.ds_tid = 0;
3357 th->th.th_team = team;
3358 th->th.th_team_nproc = team->t.t_nproc;
3359 th->th.th_task_team = team->t.t_task_team[task_state];
3360 th->th.th_task_state = task_state;
3382 size_t reduce_size,
void *reduce_data,
3383 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3384 kmp_critical_name *lck) {
3388 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3392 int teams_swapped = 0, task_state;
3394 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3402 if (!TCR_4(__kmp_init_parallel))
3403 __kmp_parallel_initialize();
3406 __kmp_resume_if_soft_paused();
3410 #if KMP_USE_DYNAMIC_LOCK
3411 if (__kmp_env_consistency_check)
3412 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3414 if (__kmp_env_consistency_check)
3415 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3419 th = __kmp_thread_from_gtid(global_tid);
3420 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3421 #endif // OMP_40_ENABLED
3439 packed_reduction_method = __kmp_determine_reduction_method(
3440 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3441 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3443 if (packed_reduction_method == critical_reduce_block) {
3445 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3448 }
else if (packed_reduction_method == empty_reduce_block) {
3454 }
else if (packed_reduction_method == atomic_reduce_block) {
3464 if (__kmp_env_consistency_check)
3465 __kmp_pop_sync(global_tid, ct_reduce, loc);
3467 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3468 tree_reduce_block)) {
3488 ompt_frame_t *ompt_frame;
3489 if (ompt_enabled.enabled) {
3490 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3491 if (ompt_frame->enter_frame.ptr == NULL)
3492 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3493 OMPT_STORE_RETURN_ADDRESS(global_tid);
3497 __kmp_threads[global_tid]->th.th_ident = loc;
3500 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3501 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3502 retval = (retval != 0) ? (0) : (1);
3503 #if OMPT_SUPPORT && OMPT_OPTIONAL
3504 if (ompt_enabled.enabled) {
3505 ompt_frame->enter_frame = ompt_data_none;
3511 if (__kmp_env_consistency_check) {
3513 __kmp_pop_sync(global_tid, ct_reduce, loc);
3523 if (teams_swapped) {
3524 __kmp_restore_swapped_teams(th, team, task_state);
3529 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3530 global_tid, packed_reduction_method, retval));
3544 kmp_critical_name *lck) {
3546 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3548 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3550 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3552 if (packed_reduction_method == critical_reduce_block) {
3554 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3556 }
else if (packed_reduction_method == empty_reduce_block) {
3561 }
else if (packed_reduction_method == atomic_reduce_block) {
3568 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3569 tree_reduce_block)) {
3579 if (__kmp_env_consistency_check)
3580 __kmp_pop_sync(global_tid, ct_reduce, loc);
3582 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3583 global_tid, packed_reduction_method));
3606 size_t reduce_size,
void *reduce_data,
3607 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3608 kmp_critical_name *lck) {
3611 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3615 int teams_swapped = 0, task_state;
3618 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3626 if (!TCR_4(__kmp_init_parallel))
3627 __kmp_parallel_initialize();
3630 __kmp_resume_if_soft_paused();
3634 #if KMP_USE_DYNAMIC_LOCK
3635 if (__kmp_env_consistency_check)
3636 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3638 if (__kmp_env_consistency_check)
3639 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3643 th = __kmp_thread_from_gtid(global_tid);
3644 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3645 #endif // OMP_40_ENABLED
3647 packed_reduction_method = __kmp_determine_reduction_method(
3648 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3649 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3651 if (packed_reduction_method == critical_reduce_block) {
3653 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3656 }
else if (packed_reduction_method == empty_reduce_block) {
3662 }
else if (packed_reduction_method == atomic_reduce_block) {
3666 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667 tree_reduce_block)) {
3673 ompt_frame_t *ompt_frame;
3674 if (ompt_enabled.enabled) {
3675 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3676 if (ompt_frame->enter_frame.ptr == NULL)
3677 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3678 OMPT_STORE_RETURN_ADDRESS(global_tid);
3682 __kmp_threads[global_tid]->th.th_ident =
3686 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3687 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3688 retval = (retval != 0) ? (0) : (1);
3689 #if OMPT_SUPPORT && OMPT_OPTIONAL
3690 if (ompt_enabled.enabled) {
3691 ompt_frame->enter_frame = ompt_data_none;
3697 if (__kmp_env_consistency_check) {
3699 __kmp_pop_sync(global_tid, ct_reduce, loc);
3709 if (teams_swapped) {
3710 __kmp_restore_swapped_teams(th, team, task_state);
3715 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3716 global_tid, packed_reduction_method, retval));
3732 kmp_critical_name *lck) {
3734 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3738 int teams_swapped = 0, task_state;
3741 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3744 th = __kmp_thread_from_gtid(global_tid);
3745 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3746 #endif // OMP_40_ENABLED
3748 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3753 if (packed_reduction_method == critical_reduce_block) {
3755 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3759 ompt_frame_t *ompt_frame;
3760 if (ompt_enabled.enabled) {
3761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3762 if (ompt_frame->enter_frame.ptr == NULL)
3763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3764 OMPT_STORE_RETURN_ADDRESS(global_tid);
3768 __kmp_threads[global_tid]->th.th_ident = loc;
3770 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3771 #if OMPT_SUPPORT && OMPT_OPTIONAL
3772 if (ompt_enabled.enabled) {
3773 ompt_frame->enter_frame = ompt_data_none;
3777 }
else if (packed_reduction_method == empty_reduce_block) {
3783 ompt_frame_t *ompt_frame;
3784 if (ompt_enabled.enabled) {
3785 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3786 if (ompt_frame->enter_frame.ptr == NULL)
3787 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3788 OMPT_STORE_RETURN_ADDRESS(global_tid);
3792 __kmp_threads[global_tid]->th.th_ident = loc;
3794 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3795 #if OMPT_SUPPORT && OMPT_OPTIONAL
3796 if (ompt_enabled.enabled) {
3797 ompt_frame->enter_frame = ompt_data_none;
3801 }
else if (packed_reduction_method == atomic_reduce_block) {
3804 ompt_frame_t *ompt_frame;
3805 if (ompt_enabled.enabled) {
3806 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3807 if (ompt_frame->enter_frame.ptr == NULL)
3808 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3809 OMPT_STORE_RETURN_ADDRESS(global_tid);
3814 __kmp_threads[global_tid]->th.th_ident = loc;
3816 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3817 #if OMPT_SUPPORT && OMPT_OPTIONAL
3818 if (ompt_enabled.enabled) {
3819 ompt_frame->enter_frame = ompt_data_none;
3823 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3824 tree_reduce_block)) {
3827 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3836 if (teams_swapped) {
3837 __kmp_restore_swapped_teams(th, team, task_state);
3841 if (__kmp_env_consistency_check)
3842 __kmp_pop_sync(global_tid, ct_reduce, loc);
3844 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3845 global_tid, packed_reduction_method));
3850 #undef __KMP_GET_REDUCTION_METHOD
3851 #undef __KMP_SET_REDUCTION_METHOD
3855 kmp_uint64 __kmpc_get_taskid() {
3860 gtid = __kmp_get_gtid();
3864 thread = __kmp_thread_from_gtid(gtid);
3865 return thread->th.th_current_task->td_task_id;
3869 kmp_uint64 __kmpc_get_parent_taskid() {
3873 kmp_taskdata_t *parent_task;
3875 gtid = __kmp_get_gtid();
3879 thread = __kmp_thread_from_gtid(gtid);
3880 parent_task = thread->th.th_current_task->td_parent;
3881 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3897 void __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
3898 const struct kmp_dim *dims) {
3900 kmp_int64 last, trace_count;
3901 kmp_info_t *th = __kmp_threads[gtid];
3902 kmp_team_t *team = th->th.th_team;
3904 kmp_disp_t *pr_buf = th->th.th_dispatch;
3905 dispatch_shared_info_t *sh_buf;
3909 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3910 gtid, num_dims, !team->t.t_serialized));
3911 KMP_DEBUG_ASSERT(dims != NULL);
3912 KMP_DEBUG_ASSERT(num_dims > 0);
3914 if (team->t.t_serialized) {
3915 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3918 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3919 idx = pr_buf->th_doacross_buf_idx++;
3921 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3924 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3925 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3926 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3927 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3928 pr_buf->th_doacross_info[0] =
3929 (kmp_int64)num_dims;
3932 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3933 pr_buf->th_doacross_info[2] = dims[0].lo;
3934 pr_buf->th_doacross_info[3] = dims[0].up;
3935 pr_buf->th_doacross_info[4] = dims[0].st;
3937 for (j = 1; j < num_dims; ++j) {
3940 if (dims[j].st == 1) {
3942 range_length = dims[j].up - dims[j].lo + 1;
3944 if (dims[j].st > 0) {
3945 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3946 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3948 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3950 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3953 pr_buf->th_doacross_info[last++] = range_length;
3954 pr_buf->th_doacross_info[last++] = dims[j].lo;
3955 pr_buf->th_doacross_info[last++] = dims[j].up;
3956 pr_buf->th_doacross_info[last++] = dims[j].st;
3961 if (dims[0].st == 1) {
3962 trace_count = dims[0].up - dims[0].lo + 1;
3963 }
else if (dims[0].st > 0) {
3964 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3965 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3967 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3968 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3970 for (j = 1; j < num_dims; ++j) {
3971 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3973 KMP_DEBUG_ASSERT(trace_count > 0);
3977 if (idx != sh_buf->doacross_buf_idx) {
3979 __kmp_wait_yield_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3986 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3987 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3989 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3990 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3992 if (flags == NULL) {
3994 size_t size = trace_count / 8 + 8;
3995 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3997 sh_buf->doacross_flags = flags;
3998 }
else if (flags == (kmp_uint32 *)1) {
4001 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4003 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4010 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4011 pr_buf->th_doacross_flags =
4012 sh_buf->doacross_flags;
4014 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4017 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4018 kmp_int32 shft, num_dims, i;
4020 kmp_int64 iter_number;
4021 kmp_info_t *th = __kmp_threads[gtid];
4022 kmp_team_t *team = th->th.th_team;
4024 kmp_int64 lo, up, st;
4026 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4027 if (team->t.t_serialized) {
4028 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4033 pr_buf = th->th.th_dispatch;
4034 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4035 num_dims = pr_buf->th_doacross_info[0];
4036 lo = pr_buf->th_doacross_info[2];
4037 up = pr_buf->th_doacross_info[3];
4038 st = pr_buf->th_doacross_info[4];
4040 if (vec[0] < lo || vec[0] > up) {
4041 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4042 "bounds [%lld,%lld]\n",
4043 gtid, vec[0], lo, up));
4046 iter_number = vec[0] - lo;
4047 }
else if (st > 0) {
4048 if (vec[0] < lo || vec[0] > up) {
4049 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4050 "bounds [%lld,%lld]\n",
4051 gtid, vec[0], lo, up));
4054 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4056 if (vec[0] > lo || vec[0] < up) {
4057 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058 "bounds [%lld,%lld]\n",
4059 gtid, vec[0], lo, up));
4062 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4064 for (i = 1; i < num_dims; ++i) {
4066 kmp_int32 j = i * 4;
4067 ln = pr_buf->th_doacross_info[j + 1];
4068 lo = pr_buf->th_doacross_info[j + 2];
4069 up = pr_buf->th_doacross_info[j + 3];
4070 st = pr_buf->th_doacross_info[j + 4];
4072 if (vec[i] < lo || vec[i] > up) {
4073 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4074 "bounds [%lld,%lld]\n",
4075 gtid, vec[i], lo, up));
4079 }
else if (st > 0) {
4080 if (vec[i] < lo || vec[i] > up) {
4081 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4082 "bounds [%lld,%lld]\n",
4083 gtid, vec[i], lo, up));
4086 iter = (kmp_uint64)(vec[i] - lo) / st;
4088 if (vec[i] > lo || vec[i] < up) {
4089 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4090 "bounds [%lld,%lld]\n",
4091 gtid, vec[i], lo, up));
4094 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4096 iter_number = iter + ln * iter_number;
4098 shft = iter_number % 32;
4101 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4106 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4107 gtid, (iter_number << 5) + shft));
4110 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4111 kmp_int32 shft, num_dims, i;
4113 kmp_int64 iter_number;
4114 kmp_info_t *th = __kmp_threads[gtid];
4115 kmp_team_t *team = th->th.th_team;
4119 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4120 if (team->t.t_serialized) {
4121 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4127 pr_buf = th->th.th_dispatch;
4128 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4129 num_dims = pr_buf->th_doacross_info[0];
4130 lo = pr_buf->th_doacross_info[2];
4131 st = pr_buf->th_doacross_info[4];
4133 iter_number = vec[0] - lo;
4134 }
else if (st > 0) {
4135 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4137 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4139 for (i = 1; i < num_dims; ++i) {
4141 kmp_int32 j = i * 4;
4142 ln = pr_buf->th_doacross_info[j + 1];
4143 lo = pr_buf->th_doacross_info[j + 2];
4144 st = pr_buf->th_doacross_info[j + 4];
4147 }
else if (st > 0) {
4148 iter = (kmp_uint64)(vec[i] - lo) / st;
4150 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4152 iter_number = iter + ln * iter_number;
4154 shft = iter_number % 32;
4158 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4159 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4160 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4161 (iter_number << 5) + shft));
4164 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4166 kmp_info_t *th = __kmp_threads[gtid];
4167 kmp_team_t *team = th->th.th_team;
4168 kmp_disp_t *pr_buf = th->th.th_dispatch;
4170 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4171 if (team->t.t_serialized) {
4172 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4175 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4176 if (num_done == th->th.th_team_nproc) {
4178 int idx = pr_buf->th_doacross_buf_idx - 1;
4179 dispatch_shared_info_t *sh_buf =
4180 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4181 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4182 (kmp_int64)&sh_buf->doacross_num_done);
4183 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4184 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4185 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4186 sh_buf->doacross_flags = NULL;
4187 sh_buf->doacross_num_done = 0;
4188 sh_buf->doacross_buf_idx +=
4189 __kmp_dispatch_num_buffers;
4192 pr_buf->th_doacross_flags = NULL;
4193 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4194 pr_buf->th_doacross_info = NULL;
4195 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4200 int __kmpc_get_target_offload(
void) {
4201 if (!__kmp_init_serial) {
4202 __kmp_serial_initialize();
4204 return __kmp_target_offload;
4207 int __kmpc_pause_resource(kmp_pause_status_t level) {
4208 if (!__kmp_init_serial) {
4211 return __kmp_pause_resource(level);
4213 #endif // OMP_50_ENABLED
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)