16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265 #if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 OMPT_STORE_RETURN_ADDRESS(gtid);
297 #if INCLUDE_SSC_MARKS
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask,
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
304 #if INCLUDE_SSC_MARKS
307 __kmp_join_call(loc, gtid
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
323 #if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
328 KMP_POP_PARTITIONED_TIMER();
345 kmp_int32 num_teams, kmp_int32 num_threads) {
347 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
395 va_start(ap, microtask);
397 #if KMP_STATS_ENABLED
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level;
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
419 OMPT_STORE_RETURN_ADDRESS(gtid);
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master,
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
463 #if KMP_STATS_ENABLED
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
468 KMP_POP_PARTITIONED_TIMER();
477 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
495 __kmp_assert_valid_gtid(global_tid);
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
499 __kmp_serialized_parallel(loc, global_tid);
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
515 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
527 __kmp_resume_if_soft_paused();
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
534 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
535 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
538 KMP_DEBUG_ASSERT(serial_team);
539 KMP_ASSERT(serial_team->t.t_serialized);
540 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
541 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
542 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
546 if (ompt_enabled.enabled &&
547 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
548 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
549 if (ompt_enabled.ompt_callback_implicit_task) {
550 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
551 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
552 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
556 ompt_data_t *parent_task_data;
557 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559 if (ompt_enabled.ompt_callback_parallel_end) {
560 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
561 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
562 ompt_parallel_invoker_program | ompt_parallel_team,
563 OMPT_LOAD_RETURN_ADDRESS(global_tid));
565 __ompt_lw_taskteam_unlink(this_thr);
566 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
572 top = serial_team->t.t_control_stack_top;
573 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
574 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
575 serial_team->t.t_control_stack_top = top->next;
580 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582 dispatch_private_info_t *disp_buffer =
583 serial_team->t.t_dispatch->th_disp_buffer;
584 serial_team->t.t_dispatch->th_disp_buffer =
585 serial_team->t.t_dispatch->th_disp_buffer->next;
586 __kmp_free(disp_buffer);
588 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
590 --serial_team->t.t_serialized;
591 if (serial_team->t.t_serialized == 0) {
595 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
596 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
597 __kmp_clear_x87_fpu_status_word();
598 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
599 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
603 __kmp_pop_current_task_from_thread(this_thr);
605 if (ompd_state & OMPD_ENABLE_BP)
606 ompd_bp_parallel_end();
609 this_thr->th.th_team = serial_team->t.t_parent;
610 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
613 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
614 this_thr->th.th_team_master =
615 serial_team->t.t_parent->t.t_threads[0];
616 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
619 this_thr->th.th_dispatch =
620 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
623 this_thr->th.th_current_task->td_flags.executing = 1;
625 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 this_thr->th.th_task_team =
628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
635 if (__kmp_tasking_mode != tskm_immediate_exec) {
636 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
637 "depth of serial team %p to %d\n",
638 global_tid, serial_team, serial_team->t.t_serialized));
642 serial_team->t.t_level--;
643 if (__kmp_env_consistency_check)
644 __kmp_pop_parallel(global_tid, NULL);
646 if (ompt_enabled.enabled)
647 this_thr->th.ompt_thread_info.state =
648 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
649 : ompt_state_work_parallel);
662 KC_TRACE(10, (
"__kmpc_flush: called\n"));
667 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
681 if (!__kmp_cpuinfo.initialized) {
682 __kmp_query_cpuid(&__kmp_cpuinfo);
684 if (!__kmp_cpuinfo.flags.sse2) {
689 #elif KMP_COMPILER_MSVC
692 __sync_synchronize();
696 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
702 #error Unknown or unsupported architecture
705 #if OMPT_SUPPORT && OMPT_OPTIONAL
706 if (ompt_enabled.ompt_callback_flush) {
707 ompt_callbacks.ompt_callback(ompt_callback_flush)(
708 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
723 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
724 __kmp_assert_valid_gtid(global_tid);
726 if (!TCR_4(__kmp_init_parallel))
727 __kmp_parallel_initialize();
729 __kmp_resume_if_soft_paused();
731 if (__kmp_env_consistency_check) {
733 KMP_WARNING(ConstructIdentInvalid);
735 __kmp_check_barrier(global_tid, ct_barrier, loc);
739 ompt_frame_t *ompt_frame;
740 if (ompt_enabled.enabled) {
741 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
742 if (ompt_frame->enter_frame.ptr == NULL)
743 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
745 OMPT_STORE_RETURN_ADDRESS(global_tid);
747 __kmp_threads[global_tid]->th.th_ident = loc;
755 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
756 #if OMPT_SUPPORT && OMPT_OPTIONAL
757 if (ompt_enabled.enabled) {
758 ompt_frame->enter_frame = ompt_data_none;
773 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
774 __kmp_assert_valid_gtid(global_tid);
776 if (!TCR_4(__kmp_init_parallel))
777 __kmp_parallel_initialize();
779 __kmp_resume_if_soft_paused();
781 if (KMP_MASTER_GTID(global_tid)) {
783 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
787 #if OMPT_SUPPORT && OMPT_OPTIONAL
789 if (ompt_enabled.ompt_callback_masked) {
790 kmp_info_t *this_thr = __kmp_threads[global_tid];
791 kmp_team_t *team = this_thr->th.th_team;
793 int tid = __kmp_tid_from_gtid(global_tid);
794 ompt_callbacks.ompt_callback(ompt_callback_masked)(
795 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
796 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
797 OMPT_GET_RETURN_ADDRESS(0));
802 if (__kmp_env_consistency_check) {
803 #if KMP_USE_DYNAMIC_LOCK
805 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
807 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
810 __kmp_push_sync(global_tid, ct_master, loc, NULL);
812 __kmp_check_sync(global_tid, ct_master, loc, NULL);
828 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
829 __kmp_assert_valid_gtid(global_tid);
830 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
831 KMP_POP_PARTITIONED_TIMER();
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
834 kmp_info_t *this_thr = __kmp_threads[global_tid];
835 kmp_team_t *team = this_thr->th.th_team;
836 if (ompt_enabled.ompt_callback_masked) {
837 int tid = __kmp_tid_from_gtid(global_tid);
838 ompt_callbacks.ompt_callback(ompt_callback_masked)(
839 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
840 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
841 OMPT_GET_RETURN_ADDRESS(0));
845 if (__kmp_env_consistency_check) {
846 if (KMP_MASTER_GTID(global_tid))
847 __kmp_pop_sync(global_tid, ct_master, loc);
862 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
863 __kmp_assert_valid_gtid(global_tid);
865 if (!TCR_4(__kmp_init_parallel))
866 __kmp_parallel_initialize();
868 __kmp_resume_if_soft_paused();
870 tid = __kmp_tid_from_gtid(global_tid);
873 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
877 #if OMPT_SUPPORT && OMPT_OPTIONAL
879 if (ompt_enabled.ompt_callback_masked) {
880 kmp_info_t *this_thr = __kmp_threads[global_tid];
881 kmp_team_t *team = this_thr->th.th_team;
882 ompt_callbacks.ompt_callback(ompt_callback_masked)(
883 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
884 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
885 OMPT_GET_RETURN_ADDRESS(0));
890 if (__kmp_env_consistency_check) {
891 #if KMP_USE_DYNAMIC_LOCK
893 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
895 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
898 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
900 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
916 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
917 __kmp_assert_valid_gtid(global_tid);
918 KMP_POP_PARTITIONED_TIMER();
920 #if OMPT_SUPPORT && OMPT_OPTIONAL
921 kmp_info_t *this_thr = __kmp_threads[global_tid];
922 kmp_team_t *team = this_thr->th.th_team;
923 if (ompt_enabled.ompt_callback_masked) {
924 int tid = __kmp_tid_from_gtid(global_tid);
925 ompt_callbacks.ompt_callback(ompt_callback_masked)(
926 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
927 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
928 OMPT_GET_RETURN_ADDRESS(0));
932 if (__kmp_env_consistency_check) {
933 __kmp_pop_sync(global_tid, ct_masked, loc);
947 KMP_DEBUG_ASSERT(__kmp_init_serial);
949 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
950 __kmp_assert_valid_gtid(gtid);
952 if (!TCR_4(__kmp_init_parallel))
953 __kmp_parallel_initialize();
955 __kmp_resume_if_soft_paused();
958 __kmp_itt_ordered_prep(gtid);
962 th = __kmp_threads[gtid];
964 #if OMPT_SUPPORT && OMPT_OPTIONAL
968 OMPT_STORE_RETURN_ADDRESS(gtid);
969 if (ompt_enabled.enabled) {
970 team = __kmp_team_from_gtid(gtid);
971 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
973 th->th.ompt_thread_info.wait_id = lck;
974 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
977 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
978 if (ompt_enabled.ompt_callback_mutex_acquire) {
979 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
980 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
986 if (th->th.th_dispatch->th_deo_fcn != 0)
987 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
989 __kmp_parallel_deo(>id, &cid, loc);
991 #if OMPT_SUPPORT && OMPT_OPTIONAL
992 if (ompt_enabled.enabled) {
994 th->th.ompt_thread_info.state = ompt_state_work_parallel;
995 th->th.ompt_thread_info.wait_id = 0;
998 if (ompt_enabled.ompt_callback_mutex_acquired) {
999 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1000 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1006 __kmp_itt_ordered_start(gtid);
1021 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1022 __kmp_assert_valid_gtid(gtid);
1025 __kmp_itt_ordered_end(gtid);
1029 th = __kmp_threads[gtid];
1031 if (th->th.th_dispatch->th_dxo_fcn != 0)
1032 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1034 __kmp_parallel_dxo(>id, &cid, loc);
1036 #if OMPT_SUPPORT && OMPT_OPTIONAL
1037 OMPT_STORE_RETURN_ADDRESS(gtid);
1038 if (ompt_enabled.ompt_callback_mutex_released) {
1039 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1041 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1042 ->t.t_ordered.dt.t_value,
1043 OMPT_LOAD_RETURN_ADDRESS(gtid));
1048 #if KMP_USE_DYNAMIC_LOCK
1050 static __forceinline
void
1051 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1052 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1056 kmp_indirect_lock_t **lck;
1057 lck = (kmp_indirect_lock_t **)crit;
1058 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1059 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1060 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1061 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1063 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1065 __kmp_itt_critical_creating(ilk->lock, loc);
1067 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1070 __kmp_itt_critical_destroyed(ilk->lock);
1076 KMP_DEBUG_ASSERT(*lck != NULL);
1080 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1082 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1083 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1084 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1085 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1086 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1088 KMP_FSYNC_PREPARE(l); \
1089 KMP_INIT_YIELD(spins); \
1090 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1092 if (TCR_4(__kmp_nth) > \
1093 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1096 KMP_YIELD_SPIN(spins); \
1098 __kmp_spin_backoff(&backoff); \
1100 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1101 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1103 KMP_FSYNC_ACQUIRED(l); \
1107 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1109 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1110 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1111 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1112 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1113 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1117 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1118 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1122 #include <sys/syscall.h>
1125 #define FUTEX_WAIT 0
1128 #define FUTEX_WAKE 1
1132 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1134 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1135 kmp_int32 gtid_code = (gtid + 1) << 1; \
1137 KMP_FSYNC_PREPARE(ftx); \
1138 kmp_int32 poll_val; \
1139 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1140 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1141 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1142 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1144 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1146 KMP_LOCK_BUSY(1, futex))) { \
1149 poll_val |= KMP_LOCK_BUSY(1, futex); \
1152 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1153 NULL, NULL, 0)) != 0) { \
1158 KMP_FSYNC_ACQUIRED(ftx); \
1162 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1164 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1165 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1166 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1167 KMP_FSYNC_ACQUIRED(ftx); \
1175 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1177 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1179 KMP_FSYNC_RELEASING(ftx); \
1180 kmp_int32 poll_val = \
1181 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1182 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1183 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1184 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1187 KMP_YIELD_OVERSUB(); \
1194 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1197 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1200 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1207 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1208 __kmp_init_user_lock_with_checks(lck);
1209 __kmp_set_user_lock_location(lck, loc);
1211 __kmp_itt_critical_creating(lck);
1222 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1227 __kmp_itt_critical_destroyed(lck);
1231 __kmp_destroy_user_lock_with_checks(lck);
1232 __kmp_user_lock_free(&idx, gtid, lck);
1233 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1234 KMP_DEBUG_ASSERT(lck != NULL);
1253 kmp_critical_name *crit) {
1254 #if KMP_USE_DYNAMIC_LOCK
1255 #if OMPT_SUPPORT && OMPT_OPTIONAL
1256 OMPT_STORE_RETURN_ADDRESS(global_tid);
1261 #if OMPT_SUPPORT && OMPT_OPTIONAL
1262 ompt_state_t prev_state = ompt_state_undefined;
1263 ompt_thread_info_t ti;
1265 kmp_user_lock_p lck;
1267 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1268 __kmp_assert_valid_gtid(global_tid);
1272 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1273 KMP_CHECK_USER_LOCK_INIT();
1275 if ((__kmp_user_lock_kind == lk_tas) &&
1276 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1277 lck = (kmp_user_lock_p)crit;
1280 else if ((__kmp_user_lock_kind == lk_futex) &&
1281 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1282 lck = (kmp_user_lock_p)crit;
1286 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1289 if (__kmp_env_consistency_check)
1290 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1298 __kmp_itt_critical_acquiring(lck);
1300 #if OMPT_SUPPORT && OMPT_OPTIONAL
1301 OMPT_STORE_RETURN_ADDRESS(gtid);
1302 void *codeptr_ra = NULL;
1303 if (ompt_enabled.enabled) {
1304 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1306 prev_state = ti.state;
1307 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1308 ti.state = ompt_state_wait_critical;
1311 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1312 if (ompt_enabled.ompt_callback_mutex_acquire) {
1313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1314 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1315 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1321 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1324 __kmp_itt_critical_acquired(lck);
1326 #if OMPT_SUPPORT && OMPT_OPTIONAL
1327 if (ompt_enabled.enabled) {
1329 ti.state = prev_state;
1333 if (ompt_enabled.ompt_callback_mutex_acquired) {
1334 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1335 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1339 KMP_POP_PARTITIONED_TIMER();
1341 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1342 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1346 #if KMP_USE_DYNAMIC_LOCK
1349 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1351 #define KMP_TSX_LOCK(seq) lockseq_##seq
1353 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1356 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1357 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1359 #define KMP_CPUINFO_RTM 0
1363 if (hint & kmp_lock_hint_hle)
1364 return KMP_TSX_LOCK(hle);
1365 if (hint & kmp_lock_hint_rtm)
1366 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1367 if (hint & kmp_lock_hint_adaptive)
1368 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1371 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1372 return __kmp_user_lock_seq;
1373 if ((hint & omp_lock_hint_speculative) &&
1374 (hint & omp_lock_hint_nonspeculative))
1375 return __kmp_user_lock_seq;
1378 if (hint & omp_lock_hint_contended)
1379 return lockseq_queuing;
1382 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1386 if (hint & omp_lock_hint_speculative)
1387 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1389 return __kmp_user_lock_seq;
1392 #if OMPT_SUPPORT && OMPT_OPTIONAL
1393 #if KMP_USE_DYNAMIC_LOCK
1394 static kmp_mutex_impl_t
1395 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1397 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1402 return kmp_mutex_impl_queuing;
1405 return kmp_mutex_impl_spin;
1408 case locktag_rtm_spin:
1409 return kmp_mutex_impl_speculative;
1412 return kmp_mutex_impl_none;
1414 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1417 switch (ilock->type) {
1419 case locktag_adaptive:
1420 case locktag_rtm_queuing:
1421 return kmp_mutex_impl_speculative;
1423 case locktag_nested_tas:
1424 return kmp_mutex_impl_spin;
1426 case locktag_nested_futex:
1428 case locktag_ticket:
1429 case locktag_queuing:
1431 case locktag_nested_ticket:
1432 case locktag_nested_queuing:
1433 case locktag_nested_drdpa:
1434 return kmp_mutex_impl_queuing;
1436 return kmp_mutex_impl_none;
1441 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1442 switch (__kmp_user_lock_kind) {
1444 return kmp_mutex_impl_spin;
1451 return kmp_mutex_impl_queuing;
1454 case lk_rtm_queuing:
1457 return kmp_mutex_impl_speculative;
1460 return kmp_mutex_impl_none;
1480 kmp_critical_name *crit, uint32_t hint) {
1482 kmp_user_lock_p lck;
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484 ompt_state_t prev_state = ompt_state_undefined;
1485 ompt_thread_info_t ti;
1487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1489 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1492 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1493 __kmp_assert_valid_gtid(global_tid);
1495 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1497 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1498 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1500 if (KMP_IS_D_LOCK(lockseq)) {
1501 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1502 KMP_GET_D_TAG(lockseq));
1504 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1510 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1511 lck = (kmp_user_lock_p)lk;
1512 if (__kmp_env_consistency_check) {
1513 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1514 __kmp_map_hint_to_lock(hint));
1517 __kmp_itt_critical_acquiring(lck);
1519 #if OMPT_SUPPORT && OMPT_OPTIONAL
1520 if (ompt_enabled.enabled) {
1521 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1523 prev_state = ti.state;
1524 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1525 ti.state = ompt_state_wait_critical;
1528 if (ompt_enabled.ompt_callback_mutex_acquire) {
1529 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1530 ompt_mutex_critical, (
unsigned int)hint,
1531 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1536 #if KMP_USE_INLINED_TAS
1537 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1538 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1540 #elif KMP_USE_INLINED_FUTEX
1541 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1542 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1546 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1549 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1551 if (__kmp_env_consistency_check) {
1552 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1553 __kmp_map_hint_to_lock(hint));
1556 __kmp_itt_critical_acquiring(lck);
1558 #if OMPT_SUPPORT && OMPT_OPTIONAL
1559 if (ompt_enabled.enabled) {
1560 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1562 prev_state = ti.state;
1563 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1564 ti.state = ompt_state_wait_critical;
1567 if (ompt_enabled.ompt_callback_mutex_acquire) {
1568 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1569 ompt_mutex_critical, (
unsigned int)hint,
1570 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1575 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1577 KMP_POP_PARTITIONED_TIMER();
1580 __kmp_itt_critical_acquired(lck);
1582 #if OMPT_SUPPORT && OMPT_OPTIONAL
1583 if (ompt_enabled.enabled) {
1585 ti.state = prev_state;
1589 if (ompt_enabled.ompt_callback_mutex_acquired) {
1590 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1591 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1596 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1597 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1612 kmp_critical_name *crit) {
1613 kmp_user_lock_p lck;
1615 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1617 #if KMP_USE_DYNAMIC_LOCK
1618 int locktag = KMP_EXTRACT_D_TAG(crit);
1620 lck = (kmp_user_lock_p)crit;
1621 KMP_ASSERT(lck != NULL);
1622 if (__kmp_env_consistency_check) {
1623 __kmp_pop_sync(global_tid, ct_critical, loc);
1626 __kmp_itt_critical_releasing(lck);
1628 #if KMP_USE_INLINED_TAS
1629 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1630 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1632 #elif KMP_USE_INLINED_FUTEX
1633 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1634 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1638 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1641 kmp_indirect_lock_t *ilk =
1642 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1643 KMP_ASSERT(ilk != NULL);
1645 if (__kmp_env_consistency_check) {
1646 __kmp_pop_sync(global_tid, ct_critical, loc);
1649 __kmp_itt_critical_releasing(lck);
1651 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1656 if ((__kmp_user_lock_kind == lk_tas) &&
1657 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1658 lck = (kmp_user_lock_p)crit;
1661 else if ((__kmp_user_lock_kind == lk_futex) &&
1662 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1663 lck = (kmp_user_lock_p)crit;
1667 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1670 KMP_ASSERT(lck != NULL);
1672 if (__kmp_env_consistency_check)
1673 __kmp_pop_sync(global_tid, ct_critical, loc);
1676 __kmp_itt_critical_releasing(lck);
1680 __kmp_release_user_lock_with_checks(lck, global_tid);
1684 #if OMPT_SUPPORT && OMPT_OPTIONAL
1687 OMPT_STORE_RETURN_ADDRESS(global_tid);
1688 if (ompt_enabled.ompt_callback_mutex_released) {
1689 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1690 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1691 OMPT_LOAD_RETURN_ADDRESS(0));
1695 KMP_POP_PARTITIONED_TIMER();
1696 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1710 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1711 __kmp_assert_valid_gtid(global_tid);
1713 if (!TCR_4(__kmp_init_parallel))
1714 __kmp_parallel_initialize();
1716 __kmp_resume_if_soft_paused();
1718 if (__kmp_env_consistency_check)
1719 __kmp_check_barrier(global_tid, ct_barrier, loc);
1722 ompt_frame_t *ompt_frame;
1723 if (ompt_enabled.enabled) {
1724 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1725 if (ompt_frame->enter_frame.ptr == NULL)
1726 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1728 OMPT_STORE_RETURN_ADDRESS(global_tid);
1731 __kmp_threads[global_tid]->th.th_ident = loc;
1733 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1734 #if OMPT_SUPPORT && OMPT_OPTIONAL
1735 if (ompt_enabled.enabled) {
1736 ompt_frame->enter_frame = ompt_data_none;
1740 return (status != 0) ? 0 : 1;
1753 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1754 __kmp_assert_valid_gtid(global_tid);
1755 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1770 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1771 __kmp_assert_valid_gtid(global_tid);
1773 if (!TCR_4(__kmp_init_parallel))
1774 __kmp_parallel_initialize();
1776 __kmp_resume_if_soft_paused();
1778 if (__kmp_env_consistency_check) {
1780 KMP_WARNING(ConstructIdentInvalid);
1782 __kmp_check_barrier(global_tid, ct_barrier, loc);
1786 ompt_frame_t *ompt_frame;
1787 if (ompt_enabled.enabled) {
1788 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1789 if (ompt_frame->enter_frame.ptr == NULL)
1790 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1792 OMPT_STORE_RETURN_ADDRESS(global_tid);
1795 __kmp_threads[global_tid]->th.th_ident = loc;
1797 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1798 #if OMPT_SUPPORT && OMPT_OPTIONAL
1799 if (ompt_enabled.enabled) {
1800 ompt_frame->enter_frame = ompt_data_none;
1806 if (__kmp_env_consistency_check) {
1812 __kmp_pop_sync(global_tid, ct_master, loc);
1832 __kmp_assert_valid_gtid(global_tid);
1833 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1838 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1841 #if OMPT_SUPPORT && OMPT_OPTIONAL
1842 kmp_info_t *this_thr = __kmp_threads[global_tid];
1843 kmp_team_t *team = this_thr->th.th_team;
1844 int tid = __kmp_tid_from_gtid(global_tid);
1846 if (ompt_enabled.enabled) {
1848 if (ompt_enabled.ompt_callback_work) {
1849 ompt_callbacks.ompt_callback(ompt_callback_work)(
1850 ompt_work_single_executor, ompt_scope_begin,
1851 &(team->t.ompt_team_info.parallel_data),
1852 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1853 1, OMPT_GET_RETURN_ADDRESS(0));
1856 if (ompt_enabled.ompt_callback_work) {
1857 ompt_callbacks.ompt_callback(ompt_callback_work)(
1858 ompt_work_single_other, ompt_scope_begin,
1859 &(team->t.ompt_team_info.parallel_data),
1860 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1861 1, OMPT_GET_RETURN_ADDRESS(0));
1862 ompt_callbacks.ompt_callback(ompt_callback_work)(
1863 ompt_work_single_other, ompt_scope_end,
1864 &(team->t.ompt_team_info.parallel_data),
1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1866 1, OMPT_GET_RETURN_ADDRESS(0));
1885 __kmp_assert_valid_gtid(global_tid);
1886 __kmp_exit_single(global_tid);
1887 KMP_POP_PARTITIONED_TIMER();
1889 #if OMPT_SUPPORT && OMPT_OPTIONAL
1890 kmp_info_t *this_thr = __kmp_threads[global_tid];
1891 kmp_team_t *team = this_thr->th.th_team;
1892 int tid = __kmp_tid_from_gtid(global_tid);
1894 if (ompt_enabled.ompt_callback_work) {
1895 ompt_callbacks.ompt_callback(ompt_callback_work)(
1896 ompt_work_single_executor, ompt_scope_end,
1897 &(team->t.ompt_team_info.parallel_data),
1898 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1899 OMPT_GET_RETURN_ADDRESS(0));
1912 KMP_POP_PARTITIONED_TIMER();
1913 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1915 #if OMPT_SUPPORT && OMPT_OPTIONAL
1916 if (ompt_enabled.ompt_callback_work) {
1917 ompt_work_t ompt_work_type = ompt_work_loop;
1918 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1919 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1923 ompt_work_type = ompt_work_loop;
1925 ompt_work_type = ompt_work_sections;
1927 ompt_work_type = ompt_work_distribute;
1932 KMP_DEBUG_ASSERT(ompt_work_type);
1934 ompt_callbacks.ompt_callback(ompt_callback_work)(
1935 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1936 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1939 if (__kmp_env_consistency_check)
1940 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1946 void ompc_set_num_threads(
int arg) {
1948 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1951 void ompc_set_dynamic(
int flag) {
1955 thread = __kmp_entry_thread();
1957 __kmp_save_internal_controls(thread);
1959 set__dynamic(thread, flag ?
true :
false);
1962 void ompc_set_nested(
int flag) {
1966 thread = __kmp_entry_thread();
1968 __kmp_save_internal_controls(thread);
1970 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1973 void ompc_set_max_active_levels(
int max_active_levels) {
1978 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1981 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1983 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1986 int ompc_get_ancestor_thread_num(
int level) {
1987 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1990 int ompc_get_team_size(
int level) {
1991 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1995 void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
1996 if (!__kmp_init_serial) {
1997 __kmp_serial_initialize();
1999 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2000 format, KMP_STRLEN(format) + 1);
2003 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2005 if (!__kmp_init_serial) {
2006 __kmp_serial_initialize();
2008 format_size = KMP_STRLEN(__kmp_affinity_format);
2009 if (buffer && size) {
2010 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2016 void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2018 if (!TCR_4(__kmp_init_middle)) {
2019 __kmp_middle_initialize();
2021 __kmp_assign_root_init_mask();
2022 gtid = __kmp_get_gtid();
2023 __kmp_aux_display_affinity(gtid, format);
2026 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2027 char const *format) {
2029 size_t num_required;
2030 kmp_str_buf_t capture_buf;
2031 if (!TCR_4(__kmp_init_middle)) {
2032 __kmp_middle_initialize();
2034 __kmp_assign_root_init_mask();
2035 gtid = __kmp_get_gtid();
2036 __kmp_str_buf_init(&capture_buf);
2037 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2038 if (buffer && buf_size) {
2039 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2040 capture_buf.used + 1);
2042 __kmp_str_buf_free(&capture_buf);
2043 return num_required;
2046 void kmpc_set_stacksize(
int arg) {
2048 __kmp_aux_set_stacksize(arg);
2051 void kmpc_set_stacksize_s(
size_t arg) {
2053 __kmp_aux_set_stacksize(arg);
2056 void kmpc_set_blocktime(
int arg) {
2060 gtid = __kmp_entry_gtid();
2061 tid = __kmp_tid_from_gtid(gtid);
2062 thread = __kmp_thread_from_gtid(gtid);
2064 __kmp_aux_set_blocktime(arg, thread, tid);
2067 void kmpc_set_library(
int arg) {
2069 __kmp_user_set_library((
enum library_type)arg);
2072 void kmpc_set_defaults(
char const *str) {
2074 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2077 void kmpc_set_disp_num_buffers(
int arg) {
2080 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2081 arg <= KMP_MAX_DISP_NUM_BUFF) {
2082 __kmp_dispatch_num_buffers = arg;
2086 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2087 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2090 if (!TCR_4(__kmp_init_middle)) {
2091 __kmp_middle_initialize();
2093 __kmp_assign_root_init_mask();
2094 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2098 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2099 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2102 if (!TCR_4(__kmp_init_middle)) {
2103 __kmp_middle_initialize();
2105 __kmp_assign_root_init_mask();
2106 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2110 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2111 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2114 if (!TCR_4(__kmp_init_middle)) {
2115 __kmp_middle_initialize();
2117 __kmp_assign_root_init_mask();
2118 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2168 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2171 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2172 __kmp_assert_valid_gtid(gtid);
2176 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2178 if (__kmp_env_consistency_check) {
2180 KMP_WARNING(ConstructIdentInvalid);
2187 *data_ptr = cpy_data;
2190 ompt_frame_t *ompt_frame;
2191 if (ompt_enabled.enabled) {
2192 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2193 if (ompt_frame->enter_frame.ptr == NULL)
2194 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2196 OMPT_STORE_RETURN_ADDRESS(gtid);
2200 __kmp_threads[gtid]->th.th_ident = loc;
2202 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2205 (*cpy_func)(cpy_data, *data_ptr);
2211 OMPT_STORE_RETURN_ADDRESS(gtid);
2214 __kmp_threads[gtid]->th.th_ident = loc;
2217 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2218 #if OMPT_SUPPORT && OMPT_OPTIONAL
2219 if (ompt_enabled.enabled) {
2220 ompt_frame->enter_frame = ompt_data_none;
2228 #define INIT_LOCK __kmp_init_user_lock_with_checks
2229 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2230 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2231 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2232 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2233 #define ACQUIRE_NESTED_LOCK_TIMED \
2234 __kmp_acquire_nested_user_lock_with_checks_timed
2235 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2236 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2237 #define TEST_LOCK __kmp_test_user_lock_with_checks
2238 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2239 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2240 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2245 #if KMP_USE_DYNAMIC_LOCK
2248 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2249 kmp_dyna_lockseq_t seq) {
2250 if (KMP_IS_D_LOCK(seq)) {
2251 KMP_INIT_D_LOCK(lock, seq);
2253 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2256 KMP_INIT_I_LOCK(lock, seq);
2258 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2259 __kmp_itt_lock_creating(ilk->lock, loc);
2265 static __forceinline
void
2266 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2267 kmp_dyna_lockseq_t seq) {
2270 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2271 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2272 seq = __kmp_user_lock_seq;
2276 seq = lockseq_nested_tas;
2280 seq = lockseq_nested_futex;
2283 case lockseq_ticket:
2284 seq = lockseq_nested_ticket;
2286 case lockseq_queuing:
2287 seq = lockseq_nested_queuing;
2290 seq = lockseq_nested_drdpa;
2293 seq = lockseq_nested_queuing;
2295 KMP_INIT_I_LOCK(lock, seq);
2297 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2298 __kmp_itt_lock_creating(ilk->lock, loc);
2303 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2305 KMP_DEBUG_ASSERT(__kmp_init_serial);
2306 if (__kmp_env_consistency_check && user_lock == NULL) {
2307 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2310 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2312 #if OMPT_SUPPORT && OMPT_OPTIONAL
2314 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2317 if (ompt_enabled.ompt_callback_lock_init) {
2318 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2319 ompt_mutex_lock, (omp_lock_hint_t)hint,
2320 __ompt_get_mutex_impl_type(user_lock),
2321 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2327 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2328 void **user_lock, uintptr_t hint) {
2329 KMP_DEBUG_ASSERT(__kmp_init_serial);
2330 if (__kmp_env_consistency_check && user_lock == NULL) {
2331 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2334 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2336 #if OMPT_SUPPORT && OMPT_OPTIONAL
2338 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2340 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2341 if (ompt_enabled.ompt_callback_lock_init) {
2342 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2343 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2344 __ompt_get_mutex_impl_type(user_lock),
2345 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2353 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2354 #if KMP_USE_DYNAMIC_LOCK
2356 KMP_DEBUG_ASSERT(__kmp_init_serial);
2357 if (__kmp_env_consistency_check && user_lock == NULL) {
2358 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2360 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2362 #if OMPT_SUPPORT && OMPT_OPTIONAL
2364 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2366 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2367 if (ompt_enabled.ompt_callback_lock_init) {
2368 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2369 ompt_mutex_lock, omp_lock_hint_none,
2370 __ompt_get_mutex_impl_type(user_lock),
2371 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2377 static char const *
const func =
"omp_init_lock";
2378 kmp_user_lock_p lck;
2379 KMP_DEBUG_ASSERT(__kmp_init_serial);
2381 if (__kmp_env_consistency_check) {
2382 if (user_lock == NULL) {
2383 KMP_FATAL(LockIsUninitialized, func);
2387 KMP_CHECK_USER_LOCK_INIT();
2389 if ((__kmp_user_lock_kind == lk_tas) &&
2390 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2391 lck = (kmp_user_lock_p)user_lock;
2394 else if ((__kmp_user_lock_kind == lk_futex) &&
2395 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2396 lck = (kmp_user_lock_p)user_lock;
2400 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2403 __kmp_set_user_lock_location(lck, loc);
2405 #if OMPT_SUPPORT && OMPT_OPTIONAL
2407 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2409 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2410 if (ompt_enabled.ompt_callback_lock_init) {
2411 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2412 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2413 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2418 __kmp_itt_lock_creating(lck);
2425 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2426 #if KMP_USE_DYNAMIC_LOCK
2428 KMP_DEBUG_ASSERT(__kmp_init_serial);
2429 if (__kmp_env_consistency_check && user_lock == NULL) {
2430 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2432 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2434 #if OMPT_SUPPORT && OMPT_OPTIONAL
2436 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2438 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2439 if (ompt_enabled.ompt_callback_lock_init) {
2440 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2441 ompt_mutex_nest_lock, omp_lock_hint_none,
2442 __ompt_get_mutex_impl_type(user_lock),
2443 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 static char const *
const func =
"omp_init_nest_lock";
2450 kmp_user_lock_p lck;
2451 KMP_DEBUG_ASSERT(__kmp_init_serial);
2453 if (__kmp_env_consistency_check) {
2454 if (user_lock == NULL) {
2455 KMP_FATAL(LockIsUninitialized, func);
2459 KMP_CHECK_USER_LOCK_INIT();
2461 if ((__kmp_user_lock_kind == lk_tas) &&
2462 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2463 OMP_NEST_LOCK_T_SIZE)) {
2464 lck = (kmp_user_lock_p)user_lock;
2467 else if ((__kmp_user_lock_kind == lk_futex) &&
2468 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2469 OMP_NEST_LOCK_T_SIZE)) {
2470 lck = (kmp_user_lock_p)user_lock;
2474 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2477 INIT_NESTED_LOCK(lck);
2478 __kmp_set_user_lock_location(lck, loc);
2480 #if OMPT_SUPPORT && OMPT_OPTIONAL
2482 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2484 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2485 if (ompt_enabled.ompt_callback_lock_init) {
2486 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2487 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2488 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2493 __kmp_itt_lock_creating(lck);
2499 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2500 #if KMP_USE_DYNAMIC_LOCK
2503 kmp_user_lock_p lck;
2504 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2505 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2507 lck = (kmp_user_lock_p)user_lock;
2509 __kmp_itt_lock_destroyed(lck);
2511 #if OMPT_SUPPORT && OMPT_OPTIONAL
2513 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2515 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2516 if (ompt_enabled.ompt_callback_lock_destroy) {
2517 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2518 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2521 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2523 kmp_user_lock_p lck;
2525 if ((__kmp_user_lock_kind == lk_tas) &&
2526 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2527 lck = (kmp_user_lock_p)user_lock;
2530 else if ((__kmp_user_lock_kind == lk_futex) &&
2531 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2532 lck = (kmp_user_lock_p)user_lock;
2536 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2539 #if OMPT_SUPPORT && OMPT_OPTIONAL
2541 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2543 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2544 if (ompt_enabled.ompt_callback_lock_destroy) {
2545 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2546 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2551 __kmp_itt_lock_destroyed(lck);
2555 if ((__kmp_user_lock_kind == lk_tas) &&
2556 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2560 else if ((__kmp_user_lock_kind == lk_futex) &&
2561 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2566 __kmp_user_lock_free(user_lock, gtid, lck);
2572 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2573 #if KMP_USE_DYNAMIC_LOCK
2576 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2577 __kmp_itt_lock_destroyed(ilk->lock);
2579 #if OMPT_SUPPORT && OMPT_OPTIONAL
2581 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2583 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2584 if (ompt_enabled.ompt_callback_lock_destroy) {
2585 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2586 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2589 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2593 kmp_user_lock_p lck;
2595 if ((__kmp_user_lock_kind == lk_tas) &&
2596 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2597 OMP_NEST_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2601 else if ((__kmp_user_lock_kind == lk_futex) &&
2602 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2603 OMP_NEST_LOCK_T_SIZE)) {
2604 lck = (kmp_user_lock_p)user_lock;
2608 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2611 #if OMPT_SUPPORT && OMPT_OPTIONAL
2613 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2615 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2616 if (ompt_enabled.ompt_callback_lock_destroy) {
2617 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2618 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2623 __kmp_itt_lock_destroyed(lck);
2626 DESTROY_NESTED_LOCK(lck);
2628 if ((__kmp_user_lock_kind == lk_tas) &&
2629 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2630 OMP_NEST_LOCK_T_SIZE)) {
2634 else if ((__kmp_user_lock_kind == lk_futex) &&
2635 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2636 OMP_NEST_LOCK_T_SIZE)) {
2641 __kmp_user_lock_free(user_lock, gtid, lck);
2646 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2648 #if KMP_USE_DYNAMIC_LOCK
2649 int tag = KMP_EXTRACT_D_TAG(user_lock);
2651 __kmp_itt_lock_acquiring(
2655 #if OMPT_SUPPORT && OMPT_OPTIONAL
2657 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2659 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2660 if (ompt_enabled.ompt_callback_mutex_acquire) {
2661 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2662 ompt_mutex_lock, omp_lock_hint_none,
2663 __ompt_get_mutex_impl_type(user_lock),
2664 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2667 #if KMP_USE_INLINED_TAS
2668 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2669 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2671 #elif KMP_USE_INLINED_FUTEX
2672 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2673 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2677 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2680 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683 if (ompt_enabled.ompt_callback_mutex_acquired) {
2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2685 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2691 kmp_user_lock_p lck;
2693 if ((__kmp_user_lock_kind == lk_tas) &&
2694 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2695 lck = (kmp_user_lock_p)user_lock;
2698 else if ((__kmp_user_lock_kind == lk_futex) &&
2699 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2704 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2708 __kmp_itt_lock_acquiring(lck);
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2712 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2714 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2715 if (ompt_enabled.ompt_callback_mutex_acquire) {
2716 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2717 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2718 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2722 ACQUIRE_LOCK(lck, gtid);
2725 __kmp_itt_lock_acquired(lck);
2728 #if OMPT_SUPPORT && OMPT_OPTIONAL
2729 if (ompt_enabled.ompt_callback_mutex_acquired) {
2730 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2731 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2738 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2739 #if KMP_USE_DYNAMIC_LOCK
2742 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2744 #if OMPT_SUPPORT && OMPT_OPTIONAL
2746 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2749 if (ompt_enabled.enabled) {
2750 if (ompt_enabled.ompt_callback_mutex_acquire) {
2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2752 ompt_mutex_nest_lock, omp_lock_hint_none,
2753 __ompt_get_mutex_impl_type(user_lock),
2754 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2758 int acquire_status =
2759 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2760 (void)acquire_status;
2762 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2765 #if OMPT_SUPPORT && OMPT_OPTIONAL
2766 if (ompt_enabled.enabled) {
2767 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2768 if (ompt_enabled.ompt_callback_mutex_acquired) {
2770 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2771 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2775 if (ompt_enabled.ompt_callback_nest_lock) {
2777 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2778 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2786 kmp_user_lock_p lck;
2788 if ((__kmp_user_lock_kind == lk_tas) &&
2789 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2790 OMP_NEST_LOCK_T_SIZE)) {
2791 lck = (kmp_user_lock_p)user_lock;
2794 else if ((__kmp_user_lock_kind == lk_futex) &&
2795 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2796 OMP_NEST_LOCK_T_SIZE)) {
2797 lck = (kmp_user_lock_p)user_lock;
2801 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2805 __kmp_itt_lock_acquiring(lck);
2807 #if OMPT_SUPPORT && OMPT_OPTIONAL
2809 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2811 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2812 if (ompt_enabled.enabled) {
2813 if (ompt_enabled.ompt_callback_mutex_acquire) {
2814 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2815 ompt_mutex_nest_lock, omp_lock_hint_none,
2816 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2822 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2825 __kmp_itt_lock_acquired(lck);
2828 #if OMPT_SUPPORT && OMPT_OPTIONAL
2829 if (ompt_enabled.enabled) {
2830 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2831 if (ompt_enabled.ompt_callback_mutex_acquired) {
2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2834 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2837 if (ompt_enabled.ompt_callback_nest_lock) {
2839 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2840 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2849 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2850 #if KMP_USE_DYNAMIC_LOCK
2852 int tag = KMP_EXTRACT_D_TAG(user_lock);
2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2856 #if KMP_USE_INLINED_TAS
2857 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2858 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2860 #elif KMP_USE_INLINED_FUTEX
2861 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2862 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2866 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2869 #if OMPT_SUPPORT && OMPT_OPTIONAL
2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2874 if (ompt_enabled.ompt_callback_mutex_released) {
2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2876 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882 kmp_user_lock_p lck;
2887 if ((__kmp_user_lock_kind == lk_tas) &&
2888 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2889 #if KMP_OS_LINUX && \
2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2893 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2895 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2898 #if OMPT_SUPPORT && OMPT_OPTIONAL
2900 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2902 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2903 if (ompt_enabled.ompt_callback_mutex_released) {
2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2905 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2911 lck = (kmp_user_lock_p)user_lock;
2915 else if ((__kmp_user_lock_kind == lk_futex) &&
2916 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2917 lck = (kmp_user_lock_p)user_lock;
2921 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2925 __kmp_itt_lock_releasing(lck);
2928 RELEASE_LOCK(lck, gtid);
2930 #if OMPT_SUPPORT && OMPT_OPTIONAL
2932 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2934 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2935 if (ompt_enabled.ompt_callback_mutex_released) {
2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2937 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2945 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2946 #if KMP_USE_DYNAMIC_LOCK
2949 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2951 int release_status =
2952 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2953 (void)release_status;
2955 #if OMPT_SUPPORT && OMPT_OPTIONAL
2957 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2959 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2960 if (ompt_enabled.enabled) {
2961 if (release_status == KMP_LOCK_RELEASED) {
2962 if (ompt_enabled.ompt_callback_mutex_released) {
2964 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2965 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2968 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2970 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2971 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2978 kmp_user_lock_p lck;
2982 if ((__kmp_user_lock_kind == lk_tas) &&
2983 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2984 OMP_NEST_LOCK_T_SIZE)) {
2985 #if KMP_OS_LINUX && \
2986 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2988 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2990 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2993 #if OMPT_SUPPORT && OMPT_OPTIONAL
2994 int release_status = KMP_LOCK_STILL_HELD;
2997 if (--(tl->lk.depth_locked) == 0) {
2998 TCW_4(tl->lk.poll, 0);
2999 #if OMPT_SUPPORT && OMPT_OPTIONAL
3000 release_status = KMP_LOCK_RELEASED;
3005 #if OMPT_SUPPORT && OMPT_OPTIONAL
3007 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3009 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3010 if (ompt_enabled.enabled) {
3011 if (release_status == KMP_LOCK_RELEASED) {
3012 if (ompt_enabled.ompt_callback_mutex_released) {
3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3015 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3017 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3019 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3020 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3027 lck = (kmp_user_lock_p)user_lock;
3031 else if ((__kmp_user_lock_kind == lk_futex) &&
3032 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3033 OMP_NEST_LOCK_T_SIZE)) {
3034 lck = (kmp_user_lock_p)user_lock;
3038 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3042 __kmp_itt_lock_releasing(lck);
3046 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3047 #if OMPT_SUPPORT && OMPT_OPTIONAL
3049 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3052 if (ompt_enabled.enabled) {
3053 if (release_status == KMP_LOCK_RELEASED) {
3054 if (ompt_enabled.ompt_callback_mutex_released) {
3056 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3057 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3059 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3061 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3062 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3071 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3074 #if KMP_USE_DYNAMIC_LOCK
3076 int tag = KMP_EXTRACT_D_TAG(user_lock);
3078 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3080 #if OMPT_SUPPORT && OMPT_OPTIONAL
3082 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3084 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3085 if (ompt_enabled.ompt_callback_mutex_acquire) {
3086 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3087 ompt_mutex_lock, omp_lock_hint_none,
3088 __ompt_get_mutex_impl_type(user_lock),
3089 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3092 #if KMP_USE_INLINED_TAS
3093 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3094 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3096 #elif KMP_USE_INLINED_FUTEX
3097 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3098 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3102 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3106 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109 if (ompt_enabled.ompt_callback_mutex_acquired) {
3110 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3111 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3117 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3124 kmp_user_lock_p lck;
3127 if ((__kmp_user_lock_kind == lk_tas) &&
3128 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3129 lck = (kmp_user_lock_p)user_lock;
3132 else if ((__kmp_user_lock_kind == lk_futex) &&
3133 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3134 lck = (kmp_user_lock_p)user_lock;
3138 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3142 __kmp_itt_lock_acquiring(lck);
3144 #if OMPT_SUPPORT && OMPT_OPTIONAL
3146 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3148 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3149 if (ompt_enabled.ompt_callback_mutex_acquire) {
3150 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3151 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3152 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3156 rc = TEST_LOCK(lck, gtid);
3159 __kmp_itt_lock_acquired(lck);
3161 __kmp_itt_lock_cancelled(lck);
3164 #if OMPT_SUPPORT && OMPT_OPTIONAL
3165 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3166 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3167 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3171 return (rc ? FTN_TRUE : FTN_FALSE);
3179 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3180 #if KMP_USE_DYNAMIC_LOCK
3183 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3185 #if OMPT_SUPPORT && OMPT_OPTIONAL
3187 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3189 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3190 if (ompt_enabled.ompt_callback_mutex_acquire) {
3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3192 ompt_mutex_nest_lock, omp_lock_hint_none,
3193 __ompt_get_mutex_impl_type(user_lock),
3194 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3197 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3200 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3202 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3205 #if OMPT_SUPPORT && OMPT_OPTIONAL
3206 if (ompt_enabled.enabled && rc) {
3208 if (ompt_enabled.ompt_callback_mutex_acquired) {
3210 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3211 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3215 if (ompt_enabled.ompt_callback_nest_lock) {
3217 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3218 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3227 kmp_user_lock_p lck;
3230 if ((__kmp_user_lock_kind == lk_tas) &&
3231 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3232 OMP_NEST_LOCK_T_SIZE)) {
3233 lck = (kmp_user_lock_p)user_lock;
3236 else if ((__kmp_user_lock_kind == lk_futex) &&
3237 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3238 OMP_NEST_LOCK_T_SIZE)) {
3239 lck = (kmp_user_lock_p)user_lock;
3243 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3247 __kmp_itt_lock_acquiring(lck);
3250 #if OMPT_SUPPORT && OMPT_OPTIONAL
3252 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3254 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3255 if (ompt_enabled.enabled) &&
3256 ompt_enabled.ompt_callback_mutex_acquire) {
3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3258 ompt_mutex_nest_lock, omp_lock_hint_none,
3259 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3264 rc = TEST_NESTED_LOCK(lck, gtid);
3267 __kmp_itt_lock_acquired(lck);
3269 __kmp_itt_lock_cancelled(lck);
3272 #if OMPT_SUPPORT && OMPT_OPTIONAL
3273 if (ompt_enabled.enabled && rc) {
3275 if (ompt_enabled.ompt_callback_mutex_acquired) {
3277 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3278 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3281 if (ompt_enabled.ompt_callback_nest_lock) {
3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3284 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3303 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3304 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3306 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3307 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3313 static __forceinline
void
3314 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3315 kmp_critical_name *crit) {
3321 kmp_user_lock_p lck;
3323 #if KMP_USE_DYNAMIC_LOCK
3325 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3328 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3329 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3330 KMP_GET_D_TAG(__kmp_user_lock_seq));
3332 __kmp_init_indirect_csptr(crit, loc, global_tid,
3333 KMP_GET_I_TAG(__kmp_user_lock_seq));
3339 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3340 lck = (kmp_user_lock_p)lk;
3341 KMP_DEBUG_ASSERT(lck != NULL);
3342 if (__kmp_env_consistency_check) {
3343 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3345 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3347 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3349 KMP_DEBUG_ASSERT(lck != NULL);
3350 if (__kmp_env_consistency_check) {
3351 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3353 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3361 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3362 lck = (kmp_user_lock_p)crit;
3364 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3366 KMP_DEBUG_ASSERT(lck != NULL);
3368 if (__kmp_env_consistency_check)
3369 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3371 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3377 static __forceinline
void
3378 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3379 kmp_critical_name *crit) {
3381 kmp_user_lock_p lck;
3383 #if KMP_USE_DYNAMIC_LOCK
3385 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3386 lck = (kmp_user_lock_p)crit;
3387 if (__kmp_env_consistency_check)
3388 __kmp_pop_sync(global_tid, ct_critical, loc);
3389 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3391 kmp_indirect_lock_t *ilk =
3392 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3393 if (__kmp_env_consistency_check)
3394 __kmp_pop_sync(global_tid, ct_critical, loc);
3395 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3403 if (__kmp_base_user_lock_size > 32) {
3404 lck = *((kmp_user_lock_p *)crit);
3405 KMP_ASSERT(lck != NULL);
3407 lck = (kmp_user_lock_p)crit;
3410 if (__kmp_env_consistency_check)
3411 __kmp_pop_sync(global_tid, ct_critical, loc);
3413 __kmp_release_user_lock_with_checks(lck, global_tid);
3418 static __forceinline
int
3419 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3424 if (th->th.th_teams_microtask) {
3425 *team_p = team = th->th.th_team;
3426 if (team->t.t_level == th->th.th_teams_level) {
3428 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3430 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3431 th->th.th_team = team->t.t_parent;
3432 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3433 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3434 *task_state = th->th.th_task_state;
3435 th->th.th_task_state = 0;
3443 static __forceinline
void
3444 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3446 th->th.th_info.ds.ds_tid = 0;
3447 th->th.th_team = team;
3448 th->th.th_team_nproc = team->t.t_nproc;
3449 th->th.th_task_team = team->t.t_task_team[task_state];
3450 __kmp_type_convert(task_state, &(th->th.th_task_state));
3471 size_t reduce_size,
void *reduce_data,
3472 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3473 kmp_critical_name *lck) {
3477 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3480 int teams_swapped = 0, task_state;
3481 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3482 __kmp_assert_valid_gtid(global_tid);
3490 if (!TCR_4(__kmp_init_parallel))
3491 __kmp_parallel_initialize();
3493 __kmp_resume_if_soft_paused();
3496 #if KMP_USE_DYNAMIC_LOCK
3497 if (__kmp_env_consistency_check)
3498 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3500 if (__kmp_env_consistency_check)
3501 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3504 th = __kmp_thread_from_gtid(global_tid);
3505 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3523 packed_reduction_method = __kmp_determine_reduction_method(
3524 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3525 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3527 OMPT_REDUCTION_DECL(th, global_tid);
3528 if (packed_reduction_method == critical_reduce_block) {
3530 OMPT_REDUCTION_BEGIN;
3532 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3535 }
else if (packed_reduction_method == empty_reduce_block) {
3537 OMPT_REDUCTION_BEGIN;
3543 }
else if (packed_reduction_method == atomic_reduce_block) {
3553 if (__kmp_env_consistency_check)
3554 __kmp_pop_sync(global_tid, ct_reduce, loc);
3556 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3557 tree_reduce_block)) {
3577 ompt_frame_t *ompt_frame;
3578 if (ompt_enabled.enabled) {
3579 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3580 if (ompt_frame->enter_frame.ptr == NULL)
3581 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3583 OMPT_STORE_RETURN_ADDRESS(global_tid);
3586 __kmp_threads[global_tid]->th.th_ident = loc;
3589 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3590 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3591 retval = (retval != 0) ? (0) : (1);
3592 #if OMPT_SUPPORT && OMPT_OPTIONAL
3593 if (ompt_enabled.enabled) {
3594 ompt_frame->enter_frame = ompt_data_none;
3600 if (__kmp_env_consistency_check) {
3602 __kmp_pop_sync(global_tid, ct_reduce, loc);
3611 if (teams_swapped) {
3612 __kmp_restore_swapped_teams(th, team, task_state);
3616 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3617 global_tid, packed_reduction_method, retval));
3631 kmp_critical_name *lck) {
3633 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3635 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3636 __kmp_assert_valid_gtid(global_tid);
3638 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3640 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3642 if (packed_reduction_method == critical_reduce_block) {
3644 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3647 }
else if (packed_reduction_method == empty_reduce_block) {
3654 }
else if (packed_reduction_method == atomic_reduce_block) {
3661 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3662 tree_reduce_block)) {
3673 if (__kmp_env_consistency_check)
3674 __kmp_pop_sync(global_tid, ct_reduce, loc);
3676 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3677 global_tid, packed_reduction_method));
3700 size_t reduce_size,
void *reduce_data,
3701 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3702 kmp_critical_name *lck) {
3705 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3708 int teams_swapped = 0, task_state;
3710 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3711 __kmp_assert_valid_gtid(global_tid);
3719 if (!TCR_4(__kmp_init_parallel))
3720 __kmp_parallel_initialize();
3722 __kmp_resume_if_soft_paused();
3725 #if KMP_USE_DYNAMIC_LOCK
3726 if (__kmp_env_consistency_check)
3727 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3729 if (__kmp_env_consistency_check)
3730 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3733 th = __kmp_thread_from_gtid(global_tid);
3734 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3736 packed_reduction_method = __kmp_determine_reduction_method(
3737 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3738 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3740 OMPT_REDUCTION_DECL(th, global_tid);
3742 if (packed_reduction_method == critical_reduce_block) {
3744 OMPT_REDUCTION_BEGIN;
3745 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3748 }
else if (packed_reduction_method == empty_reduce_block) {
3750 OMPT_REDUCTION_BEGIN;
3755 }
else if (packed_reduction_method == atomic_reduce_block) {
3759 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3760 tree_reduce_block)) {
3766 ompt_frame_t *ompt_frame;
3767 if (ompt_enabled.enabled) {
3768 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3769 if (ompt_frame->enter_frame.ptr == NULL)
3770 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3772 OMPT_STORE_RETURN_ADDRESS(global_tid);
3775 __kmp_threads[global_tid]->th.th_ident =
3779 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3780 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3781 retval = (retval != 0) ? (0) : (1);
3782 #if OMPT_SUPPORT && OMPT_OPTIONAL
3783 if (ompt_enabled.enabled) {
3784 ompt_frame->enter_frame = ompt_data_none;
3790 if (__kmp_env_consistency_check) {
3792 __kmp_pop_sync(global_tid, ct_reduce, loc);
3801 if (teams_swapped) {
3802 __kmp_restore_swapped_teams(th, team, task_state);
3806 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3807 global_tid, packed_reduction_method, retval));
3822 kmp_critical_name *lck) {
3824 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3827 int teams_swapped = 0, task_state;
3829 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3830 __kmp_assert_valid_gtid(global_tid);
3832 th = __kmp_thread_from_gtid(global_tid);
3833 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3835 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3839 OMPT_REDUCTION_DECL(th, global_tid);
3841 if (packed_reduction_method == critical_reduce_block) {
3842 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3848 ompt_frame_t *ompt_frame;
3849 if (ompt_enabled.enabled) {
3850 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3851 if (ompt_frame->enter_frame.ptr == NULL)
3852 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3854 OMPT_STORE_RETURN_ADDRESS(global_tid);
3857 __kmp_threads[global_tid]->th.th_ident = loc;
3859 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3860 #if OMPT_SUPPORT && OMPT_OPTIONAL
3861 if (ompt_enabled.enabled) {
3862 ompt_frame->enter_frame = ompt_data_none;
3866 }
else if (packed_reduction_method == empty_reduce_block) {
3874 ompt_frame_t *ompt_frame;
3875 if (ompt_enabled.enabled) {
3876 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3877 if (ompt_frame->enter_frame.ptr == NULL)
3878 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3880 OMPT_STORE_RETURN_ADDRESS(global_tid);
3883 __kmp_threads[global_tid]->th.th_ident = loc;
3885 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3886 #if OMPT_SUPPORT && OMPT_OPTIONAL
3887 if (ompt_enabled.enabled) {
3888 ompt_frame->enter_frame = ompt_data_none;
3892 }
else if (packed_reduction_method == atomic_reduce_block) {
3895 ompt_frame_t *ompt_frame;
3896 if (ompt_enabled.enabled) {
3897 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3898 if (ompt_frame->enter_frame.ptr == NULL)
3899 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3901 OMPT_STORE_RETURN_ADDRESS(global_tid);
3905 __kmp_threads[global_tid]->th.th_ident = loc;
3907 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3908 #if OMPT_SUPPORT && OMPT_OPTIONAL
3909 if (ompt_enabled.enabled) {
3910 ompt_frame->enter_frame = ompt_data_none;
3914 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3915 tree_reduce_block)) {
3918 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3926 if (teams_swapped) {
3927 __kmp_restore_swapped_teams(th, team, task_state);
3930 if (__kmp_env_consistency_check)
3931 __kmp_pop_sync(global_tid, ct_reduce, loc);
3933 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3934 global_tid, packed_reduction_method));
3939 #undef __KMP_GET_REDUCTION_METHOD
3940 #undef __KMP_SET_REDUCTION_METHOD
3944 kmp_uint64 __kmpc_get_taskid() {
3949 gtid = __kmp_get_gtid();
3953 thread = __kmp_thread_from_gtid(gtid);
3954 return thread->th.th_current_task->td_task_id;
3958 kmp_uint64 __kmpc_get_parent_taskid() {
3962 kmp_taskdata_t *parent_task;
3964 gtid = __kmp_get_gtid();
3968 thread = __kmp_thread_from_gtid(gtid);
3969 parent_task = thread->th.th_current_task->td_parent;
3970 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3986 const struct kmp_dim *dims) {
3987 __kmp_assert_valid_gtid(gtid);
3989 kmp_int64 last, trace_count;
3990 kmp_info_t *th = __kmp_threads[gtid];
3991 kmp_team_t *team = th->th.th_team;
3993 kmp_disp_t *pr_buf = th->th.th_dispatch;
3994 dispatch_shared_info_t *sh_buf;
3998 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3999 gtid, num_dims, !team->t.t_serialized));
4000 KMP_DEBUG_ASSERT(dims != NULL);
4001 KMP_DEBUG_ASSERT(num_dims > 0);
4003 if (team->t.t_serialized) {
4004 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4007 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4008 idx = pr_buf->th_doacross_buf_idx++;
4010 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4013 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4014 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4015 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4016 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4017 pr_buf->th_doacross_info[0] =
4018 (kmp_int64)num_dims;
4021 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4022 pr_buf->th_doacross_info[2] = dims[0].lo;
4023 pr_buf->th_doacross_info[3] = dims[0].up;
4024 pr_buf->th_doacross_info[4] = dims[0].st;
4026 for (j = 1; j < num_dims; ++j) {
4029 if (dims[j].st == 1) {
4031 range_length = dims[j].up - dims[j].lo + 1;
4033 if (dims[j].st > 0) {
4034 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4035 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4037 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4039 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4042 pr_buf->th_doacross_info[last++] = range_length;
4043 pr_buf->th_doacross_info[last++] = dims[j].lo;
4044 pr_buf->th_doacross_info[last++] = dims[j].up;
4045 pr_buf->th_doacross_info[last++] = dims[j].st;
4050 if (dims[0].st == 1) {
4051 trace_count = dims[0].up - dims[0].lo + 1;
4052 }
else if (dims[0].st > 0) {
4053 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4054 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4056 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4057 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4059 for (j = 1; j < num_dims; ++j) {
4060 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4062 KMP_DEBUG_ASSERT(trace_count > 0);
4066 if (idx != sh_buf->doacross_buf_idx) {
4068 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4075 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4076 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4078 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4079 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4081 if (flags == NULL) {
4084 (size_t)trace_count / 8 + 8;
4085 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4087 sh_buf->doacross_flags = flags;
4088 }
else if (flags == (kmp_uint32 *)1) {
4091 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4093 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4100 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4101 pr_buf->th_doacross_flags =
4102 sh_buf->doacross_flags;
4104 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4107 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4108 __kmp_assert_valid_gtid(gtid);
4112 kmp_int64 iter_number;
4113 kmp_info_t *th = __kmp_threads[gtid];
4114 kmp_team_t *team = th->th.th_team;
4116 kmp_int64 lo, up, st;
4118 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4119 if (team->t.t_serialized) {
4120 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4125 pr_buf = th->th.th_dispatch;
4126 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4127 num_dims = (size_t)pr_buf->th_doacross_info[0];
4128 lo = pr_buf->th_doacross_info[2];
4129 up = pr_buf->th_doacross_info[3];
4130 st = pr_buf->th_doacross_info[4];
4131 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4132 ompt_dependence_t deps[num_dims];
4135 if (vec[0] < lo || vec[0] > up) {
4136 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4137 "bounds [%lld,%lld]\n",
4138 gtid, vec[0], lo, up));
4141 iter_number = vec[0] - lo;
4142 }
else if (st > 0) {
4143 if (vec[0] < lo || vec[0] > up) {
4144 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4145 "bounds [%lld,%lld]\n",
4146 gtid, vec[0], lo, up));
4149 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4151 if (vec[0] > lo || vec[0] < up) {
4152 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4153 "bounds [%lld,%lld]\n",
4154 gtid, vec[0], lo, up));
4157 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4159 #if OMPT_SUPPORT && OMPT_OPTIONAL
4160 deps[0].variable.value = iter_number;
4161 deps[0].dependence_type = ompt_dependence_type_sink;
4163 for (i = 1; i < num_dims; ++i) {
4166 ln = pr_buf->th_doacross_info[j + 1];
4167 lo = pr_buf->th_doacross_info[j + 2];
4168 up = pr_buf->th_doacross_info[j + 3];
4169 st = pr_buf->th_doacross_info[j + 4];
4171 if (vec[i] < lo || vec[i] > up) {
4172 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4173 "bounds [%lld,%lld]\n",
4174 gtid, vec[i], lo, up));
4178 }
else if (st > 0) {
4179 if (vec[i] < lo || vec[i] > up) {
4180 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4181 "bounds [%lld,%lld]\n",
4182 gtid, vec[i], lo, up));
4185 iter = (kmp_uint64)(vec[i] - lo) / st;
4187 if (vec[i] > lo || vec[i] < up) {
4188 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4189 "bounds [%lld,%lld]\n",
4190 gtid, vec[i], lo, up));
4193 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4195 iter_number = iter + ln * iter_number;
4196 #if OMPT_SUPPORT && OMPT_OPTIONAL
4197 deps[i].variable.value = iter;
4198 deps[i].dependence_type = ompt_dependence_type_sink;
4201 shft = iter_number % 32;
4204 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4208 #if OMPT_SUPPORT && OMPT_OPTIONAL
4209 if (ompt_enabled.ompt_callback_dependences) {
4210 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4211 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4215 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4216 gtid, (iter_number << 5) + shft));
4219 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4220 __kmp_assert_valid_gtid(gtid);
4224 kmp_int64 iter_number;
4225 kmp_info_t *th = __kmp_threads[gtid];
4226 kmp_team_t *team = th->th.th_team;
4230 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4231 if (team->t.t_serialized) {
4232 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4238 pr_buf = th->th.th_dispatch;
4239 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4240 num_dims = (size_t)pr_buf->th_doacross_info[0];
4241 lo = pr_buf->th_doacross_info[2];
4242 st = pr_buf->th_doacross_info[4];
4243 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4244 ompt_dependence_t deps[num_dims];
4247 iter_number = vec[0] - lo;
4248 }
else if (st > 0) {
4249 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4251 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4253 #if OMPT_SUPPORT && OMPT_OPTIONAL
4254 deps[0].variable.value = iter_number;
4255 deps[0].dependence_type = ompt_dependence_type_source;
4257 for (i = 1; i < num_dims; ++i) {
4260 ln = pr_buf->th_doacross_info[j + 1];
4261 lo = pr_buf->th_doacross_info[j + 2];
4262 st = pr_buf->th_doacross_info[j + 4];
4265 }
else if (st > 0) {
4266 iter = (kmp_uint64)(vec[i] - lo) / st;
4268 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4270 iter_number = iter + ln * iter_number;
4271 #if OMPT_SUPPORT && OMPT_OPTIONAL
4272 deps[i].variable.value = iter;
4273 deps[i].dependence_type = ompt_dependence_type_source;
4276 #if OMPT_SUPPORT && OMPT_OPTIONAL
4277 if (ompt_enabled.ompt_callback_dependences) {
4278 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4279 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4282 shft = iter_number % 32;
4286 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4287 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4288 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4289 (iter_number << 5) + shft));
4292 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4293 __kmp_assert_valid_gtid(gtid);
4295 kmp_info_t *th = __kmp_threads[gtid];
4296 kmp_team_t *team = th->th.th_team;
4297 kmp_disp_t *pr_buf = th->th.th_dispatch;
4299 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4300 if (team->t.t_serialized) {
4301 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4305 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4306 if (num_done == th->th.th_team_nproc) {
4308 int idx = pr_buf->th_doacross_buf_idx - 1;
4309 dispatch_shared_info_t *sh_buf =
4310 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4311 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4312 (kmp_int64)&sh_buf->doacross_num_done);
4313 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4314 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4315 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4316 sh_buf->doacross_flags = NULL;
4317 sh_buf->doacross_num_done = 0;
4318 sh_buf->doacross_buf_idx +=
4319 __kmp_dispatch_num_buffers;
4322 pr_buf->th_doacross_flags = NULL;
4323 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4324 pr_buf->th_doacross_info = NULL;
4325 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4329 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4330 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4333 void *omp_aligned_alloc(
size_t align,
size_t size,
4334 omp_allocator_handle_t allocator) {
4335 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4338 void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4339 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4342 void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4343 omp_allocator_handle_t allocator) {
4344 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4347 void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4348 omp_allocator_handle_t free_allocator) {
4349 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4353 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4354 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4358 int __kmpc_get_target_offload(
void) {
4359 if (!__kmp_init_serial) {
4360 __kmp_serial_initialize();
4362 return __kmp_target_offload;
4365 int __kmpc_pause_resource(kmp_pause_status_t level) {
4366 if (!__kmp_init_serial) {
4369 return __kmp_pause_resource(level);
4372 void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4373 if (!__kmp_init_serial)
4374 __kmp_serial_initialize();
4376 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4379 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4380 ompt_callbacks.ompt_callback(ompt_callback_error)(
4381 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4382 OMPT_GET_RETURN_ADDRESS(0));
4388 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4390 __kmp_str_format(
"%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4391 __kmp_str_loc_free(&str_loc);
4393 src_loc = __kmp_str_format(
"unknown");
4396 if (severity == severity_warning)
4397 KMP_WARNING(UserDirectedWarning, src_loc, message);
4399 KMP_FATAL(UserDirectedError, src_loc, message);
4401 __kmp_str_free(&src_loc);
4405 void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4407 #if OMPT_SUPPORT && OMPT_OPTIONAL
4408 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4409 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4410 int tid = __kmp_tid_from_gtid(gtid);
4411 ompt_callbacks.ompt_callback(ompt_callback_work)(
4412 ompt_work_scope, ompt_scope_begin,
4413 &(team->t.ompt_team_info.parallel_data),
4414 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4415 OMPT_GET_RETURN_ADDRESS(0));
4421 void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4423 #if OMPT_SUPPORT && OMPT_OPTIONAL
4424 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4425 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4426 int tid = __kmp_tid_from_gtid(gtid);
4427 ompt_callbacks.ompt_callback(ompt_callback_work)(
4428 ompt_work_scope, ompt_scope_end,
4429 &(team->t.ompt_team_info.parallel_data),
4430 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4431 OMPT_GET_RETURN_ADDRESS(0));
4436 #ifdef KMP_USE_VERSION_SYMBOLS
4445 #ifdef omp_set_affinity_format
4446 #undef omp_set_affinity_format
4448 #ifdef omp_get_affinity_format
4449 #undef omp_get_affinity_format
4451 #ifdef omp_display_affinity
4452 #undef omp_display_affinity
4454 #ifdef omp_capture_affinity
4455 #undef omp_capture_affinity
4457 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4459 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4461 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4463 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)