16 #include "kmp_error.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 23 #define MAX_MESSAGE 512 39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
43 }
else if (__kmp_ignore_mppbeg() == FALSE) {
45 __kmp_internal_begin();
46 KC_TRACE(10, (
"__kmpc_begin: called\n"));
64 if (__kmp_ignore_mppend() == FALSE) {
65 KC_TRACE(10, (
"__kmpc_end: called\n"));
66 KA_TRACE(30, (
"__kmpc_end\n"));
68 __kmp_internal_end_thread(-1);
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 75 if (ompt_enabled.enabled)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
99 kmp_int32 gtid = __kmp_entry_gtid();
101 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
122 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 return TCR_4(__kmp_all_nth);
134 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
144 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
146 return __kmp_entry_thread()->th.th_team->t.t_nproc;
166 if (__kmp_par_range == 0) {
173 semi2 = strchr(semi2,
';');
177 semi2 = strchr(semi2 + 1,
';');
181 if (__kmp_par_range_filename[0]) {
182 const char *name = semi2 - 1;
183 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
186 if ((*name ==
'/') || (*name ==
';')) {
189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190 return __kmp_par_range < 0;
193 semi3 = strchr(semi2 + 1,
';');
194 if (__kmp_par_range_routine[0]) {
195 if ((semi3 != NULL) && (semi3 > semi2) &&
196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197 return __kmp_par_range < 0;
200 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202 return __kmp_par_range > 0;
204 return __kmp_par_range < 0;
218 return __kmp_entry_thread()->th.th_root->r.r_active;
231 kmp_int32 num_threads) {
232 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid, num_threads));
234 __kmp_assert_valid_gtid(global_tid);
235 __kmp_push_num_threads(loc, global_tid, num_threads);
238 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
239 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
243 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
244 kmp_int32 proc_bind) {
245 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 __kmp_assert_valid_gtid(global_tid);
248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
262 int gtid = __kmp_entry_gtid();
264 #if (KMP_STATS_ENABLED) 268 if (previous_state == stats_state_e::SERIAL_REGION) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
284 va_start(ap, microtask);
287 ompt_frame_t *ompt_frame;
288 if (ompt_enabled.enabled) {
289 kmp_info_t *master_th = __kmp_threads[gtid];
290 kmp_team_t *parent_team = master_th->th.th_team;
291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 ompt_frame = &(lwt->ompt_task_info.frame);
295 int tid = __kmp_tid_from_gtid(gtid);
297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
300 OMPT_STORE_RETURN_ADDRESS(gtid);
304 #if INCLUDE_SSC_MARKS 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
308 VOLATILE_CAST(microtask_t) microtask,
309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 #if INCLUDE_SSC_MARKS 314 __kmp_join_call(loc, gtid
324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
328 KMP_POP_PARTITIONED_TIMER();
330 #endif // KMP_STATS_ENABLED 345 kmp_int32 num_teams, kmp_int32 num_threads) {
347 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
365 int gtid = __kmp_entry_gtid();
366 kmp_info_t *this_thr = __kmp_threads[gtid];
368 va_start(ap, microtask);
370 #if KMP_STATS_ENABLED 373 if (previous_state == stats_state_e::SERIAL_REGION) {
374 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
376 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
381 this_thr->th.th_teams_microtask = microtask;
382 this_thr->th.th_teams_level =
383 this_thr->th.th_team->t.t_level;
386 kmp_team_t *parent_team = this_thr->th.th_team;
387 int tid = __kmp_tid_from_gtid(gtid);
388 if (ompt_enabled.enabled) {
389 parent_team->t.t_implicit_task_taskdata[tid]
390 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
392 OMPT_STORE_RETURN_ADDRESS(gtid);
397 if (this_thr->th.th_teams_size.nteams == 0) {
398 __kmp_push_num_teams(loc, gtid, 0, 0);
400 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
401 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
405 loc, gtid, fork_context_intel, argc,
406 VOLATILE_CAST(microtask_t) __kmp_teams_master,
407 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
408 __kmp_join_call(loc, gtid
416 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
417 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
418 this_thr->th.th_cg_roots = tmp->up;
419 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up" 420 " to node %p. cg_nthreads was %d\n",
421 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
422 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
423 int i = tmp->cg_nthreads--;
428 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
429 this_thr->th.th_current_task->td_icvs.thread_limit =
430 this_thr->th.th_cg_roots->cg_thread_limit;
432 this_thr->th.th_teams_microtask = NULL;
433 this_thr->th.th_teams_level = 0;
434 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
436 #if KMP_STATS_ENABLED 437 if (previous_state == stats_state_e::SERIAL_REGION) {
438 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
440 KMP_POP_PARTITIONED_TIMER();
442 #endif // KMP_STATS_ENABLED 449 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
467 __kmp_assert_valid_gtid(global_tid);
469 OMPT_STORE_RETURN_ADDRESS(global_tid);
471 __kmp_serialized_parallel(loc, global_tid);
482 kmp_internal_control_t *top;
483 kmp_info_t *this_thr;
484 kmp_team_t *serial_team;
487 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
495 __kmp_assert_valid_gtid(global_tid);
496 if (!TCR_4(__kmp_init_parallel))
497 __kmp_parallel_initialize();
499 __kmp_resume_if_soft_paused();
501 this_thr = __kmp_threads[global_tid];
502 serial_team = this_thr->th.th_serial_team;
504 kmp_task_team_t *task_team = this_thr->th.th_task_team;
506 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
507 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
510 KMP_DEBUG_ASSERT(serial_team);
511 KMP_ASSERT(serial_team->t.t_serialized);
512 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
513 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
514 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
515 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
518 if (ompt_enabled.enabled &&
519 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
520 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
521 if (ompt_enabled.ompt_callback_implicit_task) {
522 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
523 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
524 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
528 ompt_data_t *parent_task_data;
529 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
531 if (ompt_enabled.ompt_callback_parallel_end) {
532 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
533 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
534 ompt_parallel_invoker_program | ompt_parallel_team,
535 OMPT_LOAD_RETURN_ADDRESS(global_tid));
537 __ompt_lw_taskteam_unlink(this_thr);
538 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
544 top = serial_team->t.t_control_stack_top;
545 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
546 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
547 serial_team->t.t_control_stack_top = top->next;
552 serial_team->t.t_level--;
555 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
557 dispatch_private_info_t *disp_buffer =
558 serial_team->t.t_dispatch->th_disp_buffer;
559 serial_team->t.t_dispatch->th_disp_buffer =
560 serial_team->t.t_dispatch->th_disp_buffer->next;
561 __kmp_free(disp_buffer);
563 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
565 --serial_team->t.t_serialized;
566 if (serial_team->t.t_serialized == 0) {
570 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 571 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
572 __kmp_clear_x87_fpu_status_word();
573 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
574 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
578 this_thr->th.th_team = serial_team->t.t_parent;
579 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
582 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
583 this_thr->th.th_team_master =
584 serial_team->t.t_parent->t.t_threads[0];
585 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
588 this_thr->th.th_dispatch =
589 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
591 __kmp_pop_current_task_from_thread(this_thr);
593 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
594 this_thr->th.th_current_task->td_flags.executing = 1;
596 if (__kmp_tasking_mode != tskm_immediate_exec) {
598 this_thr->th.th_task_team =
599 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
601 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 603 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
606 if (__kmp_tasking_mode != tskm_immediate_exec) {
607 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 608 "depth of serial team %p to %d\n",
609 global_tid, serial_team, serial_team->t.t_serialized));
613 if (__kmp_env_consistency_check)
614 __kmp_pop_parallel(global_tid, NULL);
616 if (ompt_enabled.enabled)
617 this_thr->th.ompt_thread_info.state =
618 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
619 : ompt_state_work_parallel);
632 KC_TRACE(10, (
"__kmpc_flush: called\n"));
637 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 651 if (!__kmp_cpuinfo.initialized) {
652 __kmp_query_cpuid(&__kmp_cpuinfo);
654 if (!__kmp_cpuinfo.sse2) {
659 #elif KMP_COMPILER_MSVC 662 __sync_synchronize();
663 #endif // KMP_COMPILER_ICC 666 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 672 #error Unknown or unsupported architecture 675 #if OMPT_SUPPORT && OMPT_OPTIONAL 676 if (ompt_enabled.ompt_callback_flush) {
677 ompt_callbacks.ompt_callback(ompt_callback_flush)(
678 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
693 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
694 __kmp_assert_valid_gtid(global_tid);
696 if (!TCR_4(__kmp_init_parallel))
697 __kmp_parallel_initialize();
699 __kmp_resume_if_soft_paused();
701 if (__kmp_env_consistency_check) {
703 KMP_WARNING(ConstructIdentInvalid);
705 __kmp_check_barrier(global_tid, ct_barrier, loc);
709 ompt_frame_t *ompt_frame;
710 if (ompt_enabled.enabled) {
711 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
712 if (ompt_frame->enter_frame.ptr == NULL)
713 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
714 OMPT_STORE_RETURN_ADDRESS(global_tid);
717 __kmp_threads[global_tid]->th.th_ident = loc;
725 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
726 #if OMPT_SUPPORT && OMPT_OPTIONAL 727 if (ompt_enabled.enabled) {
728 ompt_frame->enter_frame = ompt_data_none;
743 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
744 __kmp_assert_valid_gtid(global_tid);
746 if (!TCR_4(__kmp_init_parallel))
747 __kmp_parallel_initialize();
749 __kmp_resume_if_soft_paused();
751 if (KMP_MASTER_GTID(global_tid)) {
753 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
757 #if OMPT_SUPPORT && OMPT_OPTIONAL 759 if (ompt_enabled.ompt_callback_master) {
760 kmp_info_t *this_thr = __kmp_threads[global_tid];
761 kmp_team_t *team = this_thr->th.th_team;
763 int tid = __kmp_tid_from_gtid(global_tid);
764 ompt_callbacks.ompt_callback(ompt_callback_master)(
765 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
767 OMPT_GET_RETURN_ADDRESS(0));
772 if (__kmp_env_consistency_check) {
773 #if KMP_USE_DYNAMIC_LOCK 775 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
777 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
780 __kmp_push_sync(global_tid, ct_master, loc, NULL);
782 __kmp_check_sync(global_tid, ct_master, loc, NULL);
798 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
799 __kmp_assert_valid_gtid(global_tid);
800 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
801 KMP_POP_PARTITIONED_TIMER();
803 #if OMPT_SUPPORT && OMPT_OPTIONAL 804 kmp_info_t *this_thr = __kmp_threads[global_tid];
805 kmp_team_t *team = this_thr->th.th_team;
806 if (ompt_enabled.ompt_callback_master) {
807 int tid = __kmp_tid_from_gtid(global_tid);
808 ompt_callbacks.ompt_callback(ompt_callback_master)(
809 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
811 OMPT_GET_RETURN_ADDRESS(0));
815 if (__kmp_env_consistency_check) {
816 if (KMP_MASTER_GTID(global_tid))
817 __kmp_pop_sync(global_tid, ct_master, loc);
831 KMP_DEBUG_ASSERT(__kmp_init_serial);
833 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
834 __kmp_assert_valid_gtid(gtid);
836 if (!TCR_4(__kmp_init_parallel))
837 __kmp_parallel_initialize();
839 __kmp_resume_if_soft_paused();
842 __kmp_itt_ordered_prep(gtid);
846 th = __kmp_threads[gtid];
848 #if OMPT_SUPPORT && OMPT_OPTIONAL 852 if (ompt_enabled.enabled) {
853 OMPT_STORE_RETURN_ADDRESS(gtid);
854 team = __kmp_team_from_gtid(gtid);
855 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
857 th->th.ompt_thread_info.wait_id = lck;
858 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
861 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
862 if (ompt_enabled.ompt_callback_mutex_acquire) {
863 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
864 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
870 if (th->th.th_dispatch->th_deo_fcn != 0)
871 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
873 __kmp_parallel_deo(>id, &cid, loc);
875 #if OMPT_SUPPORT && OMPT_OPTIONAL 876 if (ompt_enabled.enabled) {
878 th->th.ompt_thread_info.state = ompt_state_work_parallel;
879 th->th.ompt_thread_info.wait_id = 0;
882 if (ompt_enabled.ompt_callback_mutex_acquired) {
883 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
884 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
890 __kmp_itt_ordered_start(gtid);
905 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
906 __kmp_assert_valid_gtid(gtid);
909 __kmp_itt_ordered_end(gtid);
913 th = __kmp_threads[gtid];
915 if (th->th.th_dispatch->th_dxo_fcn != 0)
916 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
918 __kmp_parallel_dxo(>id, &cid, loc);
920 #if OMPT_SUPPORT && OMPT_OPTIONAL 921 OMPT_STORE_RETURN_ADDRESS(gtid);
922 if (ompt_enabled.ompt_callback_mutex_released) {
923 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
925 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
926 ->t.t_ordered.dt.t_value,
927 OMPT_LOAD_RETURN_ADDRESS(gtid));
932 #if KMP_USE_DYNAMIC_LOCK 934 static __forceinline
void 935 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
936 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
940 kmp_indirect_lock_t **lck;
941 lck = (kmp_indirect_lock_t **)crit;
942 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
943 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
944 KMP_SET_I_LOCK_LOCATION(ilk, loc);
945 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
947 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
949 __kmp_itt_critical_creating(ilk->lock, loc);
951 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
954 __kmp_itt_critical_destroyed(ilk->lock);
960 KMP_DEBUG_ASSERT(*lck != NULL);
964 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 966 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 967 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 968 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 969 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 970 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 972 KMP_FSYNC_PREPARE(l); \ 973 KMP_INIT_YIELD(spins); \ 974 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 976 if (TCR_4(__kmp_nth) > \ 977 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 980 KMP_YIELD_SPIN(spins); \ 982 __kmp_spin_backoff(&backoff); \ 984 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 985 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 987 KMP_FSYNC_ACQUIRED(l); \ 991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 993 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 994 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 995 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 996 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 997 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1002 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1006 #include <sys/syscall.h> 1009 #define FUTEX_WAIT 0 1012 #define FUTEX_WAKE 1 1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1018 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1019 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1021 KMP_FSYNC_PREPARE(ftx); \ 1022 kmp_int32 poll_val; \ 1023 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1024 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1025 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1026 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1028 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1030 KMP_LOCK_BUSY(1, futex))) { \ 1033 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1036 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1037 NULL, NULL, 0)) != 0) { \ 1042 KMP_FSYNC_ACQUIRED(ftx); \ 1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1048 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1049 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1050 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1051 KMP_FSYNC_ACQUIRED(ftx); \ 1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1061 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1063 KMP_FSYNC_RELEASING(ftx); \ 1064 kmp_int32 poll_val = \ 1065 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1066 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1067 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1068 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1071 KMP_YIELD_OVERSUB(); \ 1074 #endif // KMP_USE_FUTEX 1076 #else // KMP_USE_DYNAMIC_LOCK 1078 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1081 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1084 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1091 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1092 __kmp_init_user_lock_with_checks(lck);
1093 __kmp_set_user_lock_location(lck, loc);
1095 __kmp_itt_critical_creating(lck);
1106 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1111 __kmp_itt_critical_destroyed(lck);
1115 __kmp_destroy_user_lock_with_checks(lck);
1116 __kmp_user_lock_free(&idx, gtid, lck);
1117 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1118 KMP_DEBUG_ASSERT(lck != NULL);
1124 #endif // KMP_USE_DYNAMIC_LOCK 1137 kmp_critical_name *crit) {
1138 #if KMP_USE_DYNAMIC_LOCK 1139 #if OMPT_SUPPORT && OMPT_OPTIONAL 1140 OMPT_STORE_RETURN_ADDRESS(global_tid);
1141 #endif // OMPT_SUPPORT 1145 #if OMPT_SUPPORT && OMPT_OPTIONAL 1146 ompt_state_t prev_state = ompt_state_undefined;
1147 ompt_thread_info_t ti;
1149 kmp_user_lock_p lck;
1151 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1152 __kmp_assert_valid_gtid(global_tid);
1156 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1157 KMP_CHECK_USER_LOCK_INIT();
1159 if ((__kmp_user_lock_kind == lk_tas) &&
1160 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1161 lck = (kmp_user_lock_p)crit;
1164 else if ((__kmp_user_lock_kind == lk_futex) &&
1165 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1166 lck = (kmp_user_lock_p)crit;
1170 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1173 if (__kmp_env_consistency_check)
1174 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1182 __kmp_itt_critical_acquiring(lck);
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL 1185 OMPT_STORE_RETURN_ADDRESS(gtid);
1186 void *codeptr_ra = NULL;
1187 if (ompt_enabled.enabled) {
1188 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1190 prev_state = ti.state;
1191 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1192 ti.state = ompt_state_wait_critical;
1195 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1196 if (ompt_enabled.ompt_callback_mutex_acquire) {
1197 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1198 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1199 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1205 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1208 __kmp_itt_critical_acquired(lck);
1210 #if OMPT_SUPPORT && OMPT_OPTIONAL 1211 if (ompt_enabled.enabled) {
1213 ti.state = prev_state;
1217 if (ompt_enabled.ompt_callback_mutex_acquired) {
1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1219 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1223 KMP_POP_PARTITIONED_TIMER();
1225 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1226 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1227 #endif // KMP_USE_DYNAMIC_LOCK 1230 #if KMP_USE_DYNAMIC_LOCK 1233 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1235 #define KMP_TSX_LOCK(seq) lockseq_##seq 1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1243 #define KMP_CPUINFO_RTM 0 1247 if (hint & kmp_lock_hint_hle)
1248 return KMP_TSX_LOCK(hle);
1249 if (hint & kmp_lock_hint_rtm)
1250 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1251 if (hint & kmp_lock_hint_adaptive)
1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1255 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1256 return __kmp_user_lock_seq;
1257 if ((hint & omp_lock_hint_speculative) &&
1258 (hint & omp_lock_hint_nonspeculative))
1259 return __kmp_user_lock_seq;
1262 if (hint & omp_lock_hint_contended)
1263 return lockseq_queuing;
1266 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1270 if (hint & omp_lock_hint_speculative)
1271 return KMP_TSX_LOCK(hle);
1273 return __kmp_user_lock_seq;
1276 #if OMPT_SUPPORT && OMPT_OPTIONAL 1277 #if KMP_USE_DYNAMIC_LOCK 1278 static kmp_mutex_impl_t
1279 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1281 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1286 return kmp_mutex_impl_queuing;
1289 return kmp_mutex_impl_spin;
1292 return kmp_mutex_impl_speculative;
1295 return kmp_mutex_impl_none;
1297 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1300 switch (ilock->type) {
1302 case locktag_adaptive:
1304 return kmp_mutex_impl_speculative;
1306 case locktag_nested_tas:
1307 return kmp_mutex_impl_spin;
1309 case locktag_nested_futex:
1311 case locktag_ticket:
1312 case locktag_queuing:
1314 case locktag_nested_ticket:
1315 case locktag_nested_queuing:
1316 case locktag_nested_drdpa:
1317 return kmp_mutex_impl_queuing;
1319 return kmp_mutex_impl_none;
1324 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1325 switch (__kmp_user_lock_kind) {
1327 return kmp_mutex_impl_spin;
1334 return kmp_mutex_impl_queuing;
1339 return kmp_mutex_impl_speculative;
1342 return kmp_mutex_impl_none;
1345 #endif // KMP_USE_DYNAMIC_LOCK 1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1362 kmp_critical_name *crit, uint32_t hint) {
1364 kmp_user_lock_p lck;
1365 #if OMPT_SUPPORT && OMPT_OPTIONAL 1366 ompt_state_t prev_state = ompt_state_undefined;
1367 ompt_thread_info_t ti;
1369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1371 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1374 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1375 __kmp_assert_valid_gtid(global_tid);
1377 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1379 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1381 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1382 if (KMP_IS_D_LOCK(lckseq)) {
1383 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1384 KMP_GET_D_TAG(lckseq));
1386 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1392 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1393 lck = (kmp_user_lock_p)lk;
1394 if (__kmp_env_consistency_check) {
1395 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1396 __kmp_map_hint_to_lock(hint));
1399 __kmp_itt_critical_acquiring(lck);
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL 1402 if (ompt_enabled.enabled) {
1403 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1405 prev_state = ti.state;
1406 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1407 ti.state = ompt_state_wait_critical;
1410 if (ompt_enabled.ompt_callback_mutex_acquire) {
1411 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1412 ompt_mutex_critical, (
unsigned int)hint,
1413 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1418 #if KMP_USE_INLINED_TAS 1419 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1420 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1422 #elif KMP_USE_INLINED_FUTEX 1423 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1424 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1428 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1431 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1433 if (__kmp_env_consistency_check) {
1434 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1435 __kmp_map_hint_to_lock(hint));
1438 __kmp_itt_critical_acquiring(lck);
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL 1441 if (ompt_enabled.enabled) {
1442 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1444 prev_state = ti.state;
1445 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1446 ti.state = ompt_state_wait_critical;
1449 if (ompt_enabled.ompt_callback_mutex_acquire) {
1450 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1451 ompt_mutex_critical, (
unsigned int)hint,
1452 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1457 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1459 KMP_POP_PARTITIONED_TIMER();
1462 __kmp_itt_critical_acquired(lck);
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL 1465 if (ompt_enabled.enabled) {
1467 ti.state = prev_state;
1471 if (ompt_enabled.ompt_callback_mutex_acquired) {
1472 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1473 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1478 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1479 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1482 #endif // KMP_USE_DYNAMIC_LOCK 1494 kmp_critical_name *crit) {
1495 kmp_user_lock_p lck;
1497 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1499 #if KMP_USE_DYNAMIC_LOCK 1500 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1501 lck = (kmp_user_lock_p)crit;
1502 KMP_ASSERT(lck != NULL);
1503 if (__kmp_env_consistency_check) {
1504 __kmp_pop_sync(global_tid, ct_critical, loc);
1507 __kmp_itt_critical_releasing(lck);
1509 #if KMP_USE_INLINED_TAS 1510 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1511 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1513 #elif KMP_USE_INLINED_FUTEX 1514 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1515 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1519 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1522 kmp_indirect_lock_t *ilk =
1523 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1524 KMP_ASSERT(ilk != NULL);
1526 if (__kmp_env_consistency_check) {
1527 __kmp_pop_sync(global_tid, ct_critical, loc);
1530 __kmp_itt_critical_releasing(lck);
1532 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1535 #else // KMP_USE_DYNAMIC_LOCK 1537 if ((__kmp_user_lock_kind == lk_tas) &&
1538 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1539 lck = (kmp_user_lock_p)crit;
1542 else if ((__kmp_user_lock_kind == lk_futex) &&
1543 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1544 lck = (kmp_user_lock_p)crit;
1548 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1551 KMP_ASSERT(lck != NULL);
1553 if (__kmp_env_consistency_check)
1554 __kmp_pop_sync(global_tid, ct_critical, loc);
1557 __kmp_itt_critical_releasing(lck);
1561 __kmp_release_user_lock_with_checks(lck, global_tid);
1563 #endif // KMP_USE_DYNAMIC_LOCK 1565 #if OMPT_SUPPORT && OMPT_OPTIONAL 1568 OMPT_STORE_RETURN_ADDRESS(global_tid);
1569 if (ompt_enabled.ompt_callback_mutex_released) {
1570 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1571 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1572 OMPT_LOAD_RETURN_ADDRESS(0));
1576 KMP_POP_PARTITIONED_TIMER();
1577 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1591 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1592 __kmp_assert_valid_gtid(global_tid);
1594 if (!TCR_4(__kmp_init_parallel))
1595 __kmp_parallel_initialize();
1597 __kmp_resume_if_soft_paused();
1599 if (__kmp_env_consistency_check)
1600 __kmp_check_barrier(global_tid, ct_barrier, loc);
1603 ompt_frame_t *ompt_frame;
1604 if (ompt_enabled.enabled) {
1605 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1606 if (ompt_frame->enter_frame.ptr == NULL)
1607 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1608 OMPT_STORE_RETURN_ADDRESS(global_tid);
1612 __kmp_threads[global_tid]->th.th_ident = loc;
1614 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1615 #if OMPT_SUPPORT && OMPT_OPTIONAL 1616 if (ompt_enabled.enabled) {
1617 ompt_frame->enter_frame = ompt_data_none;
1621 return (status != 0) ? 0 : 1;
1634 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1635 __kmp_assert_valid_gtid(global_tid);
1636 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1651 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1652 __kmp_assert_valid_gtid(global_tid);
1654 if (!TCR_4(__kmp_init_parallel))
1655 __kmp_parallel_initialize();
1657 __kmp_resume_if_soft_paused();
1659 if (__kmp_env_consistency_check) {
1661 KMP_WARNING(ConstructIdentInvalid);
1663 __kmp_check_barrier(global_tid, ct_barrier, loc);
1667 ompt_frame_t *ompt_frame;
1668 if (ompt_enabled.enabled) {
1669 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1670 if (ompt_frame->enter_frame.ptr == NULL)
1671 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1672 OMPT_STORE_RETURN_ADDRESS(global_tid);
1676 __kmp_threads[global_tid]->th.th_ident = loc;
1678 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1679 #if OMPT_SUPPORT && OMPT_OPTIONAL 1680 if (ompt_enabled.enabled) {
1681 ompt_frame->enter_frame = ompt_data_none;
1687 if (__kmp_env_consistency_check) {
1693 __kmp_pop_sync(global_tid, ct_master, loc);
1713 __kmp_assert_valid_gtid(global_tid);
1714 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1719 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL 1723 kmp_info_t *this_thr = __kmp_threads[global_tid];
1724 kmp_team_t *team = this_thr->th.th_team;
1725 int tid = __kmp_tid_from_gtid(global_tid);
1727 if (ompt_enabled.enabled) {
1729 if (ompt_enabled.ompt_callback_work) {
1730 ompt_callbacks.ompt_callback(ompt_callback_work)(
1731 ompt_work_single_executor, ompt_scope_begin,
1732 &(team->t.ompt_team_info.parallel_data),
1733 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1734 1, OMPT_GET_RETURN_ADDRESS(0));
1737 if (ompt_enabled.ompt_callback_work) {
1738 ompt_callbacks.ompt_callback(ompt_callback_work)(
1739 ompt_work_single_other, ompt_scope_begin,
1740 &(team->t.ompt_team_info.parallel_data),
1741 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1742 1, OMPT_GET_RETURN_ADDRESS(0));
1743 ompt_callbacks.ompt_callback(ompt_callback_work)(
1744 ompt_work_single_other, ompt_scope_end,
1745 &(team->t.ompt_team_info.parallel_data),
1746 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1747 1, OMPT_GET_RETURN_ADDRESS(0));
1766 __kmp_assert_valid_gtid(global_tid);
1767 __kmp_exit_single(global_tid);
1768 KMP_POP_PARTITIONED_TIMER();
1770 #if OMPT_SUPPORT && OMPT_OPTIONAL 1771 kmp_info_t *this_thr = __kmp_threads[global_tid];
1772 kmp_team_t *team = this_thr->th.th_team;
1773 int tid = __kmp_tid_from_gtid(global_tid);
1775 if (ompt_enabled.ompt_callback_work) {
1776 ompt_callbacks.ompt_callback(ompt_callback_work)(
1777 ompt_work_single_executor, ompt_scope_end,
1778 &(team->t.ompt_team_info.parallel_data),
1779 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1780 OMPT_GET_RETURN_ADDRESS(0));
1793 KMP_POP_PARTITIONED_TIMER();
1794 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1796 #if OMPT_SUPPORT && OMPT_OPTIONAL 1797 if (ompt_enabled.ompt_callback_work) {
1798 ompt_work_t ompt_work_type = ompt_work_loop;
1799 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1800 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1804 ompt_work_type = ompt_work_loop;
1806 ompt_work_type = ompt_work_sections;
1808 ompt_work_type = ompt_work_distribute;
1813 KMP_DEBUG_ASSERT(ompt_work_type);
1815 ompt_callbacks.ompt_callback(ompt_callback_work)(
1816 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1817 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1820 if (__kmp_env_consistency_check)
1821 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1827 void ompc_set_num_threads(
int arg) {
1829 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1832 void ompc_set_dynamic(
int flag) {
1836 thread = __kmp_entry_thread();
1838 __kmp_save_internal_controls(thread);
1840 set__dynamic(thread, flag ? TRUE : FALSE);
1843 void ompc_set_nested(
int flag) {
1847 thread = __kmp_entry_thread();
1849 __kmp_save_internal_controls(thread);
1851 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1854 void ompc_set_max_active_levels(
int max_active_levels) {
1859 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1862 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1864 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1867 int ompc_get_ancestor_thread_num(
int level) {
1868 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1871 int ompc_get_team_size(
int level) {
1872 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1877 void ompc_set_affinity_format(
char const *format) {
1878 if (!__kmp_init_serial) {
1879 __kmp_serial_initialize();
1881 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1882 format, KMP_STRLEN(format) + 1);
1885 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1887 if (!__kmp_init_serial) {
1888 __kmp_serial_initialize();
1890 format_size = KMP_STRLEN(__kmp_affinity_format);
1891 if (buffer && size) {
1892 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1898 void ompc_display_affinity(
char const *format) {
1900 if (!TCR_4(__kmp_init_middle)) {
1901 __kmp_middle_initialize();
1903 gtid = __kmp_get_gtid();
1904 __kmp_aux_display_affinity(gtid, format);
1907 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1908 char const *format) {
1910 size_t num_required;
1911 kmp_str_buf_t capture_buf;
1912 if (!TCR_4(__kmp_init_middle)) {
1913 __kmp_middle_initialize();
1915 gtid = __kmp_get_gtid();
1916 __kmp_str_buf_init(&capture_buf);
1917 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1918 if (buffer && buf_size) {
1919 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1920 capture_buf.used + 1);
1922 __kmp_str_buf_free(&capture_buf);
1923 return num_required;
1926 void kmpc_set_stacksize(
int arg) {
1928 __kmp_aux_set_stacksize(arg);
1931 void kmpc_set_stacksize_s(
size_t arg) {
1933 __kmp_aux_set_stacksize(arg);
1936 void kmpc_set_blocktime(
int arg) {
1940 gtid = __kmp_entry_gtid();
1941 tid = __kmp_tid_from_gtid(gtid);
1942 thread = __kmp_thread_from_gtid(gtid);
1944 __kmp_aux_set_blocktime(arg, thread, tid);
1947 void kmpc_set_library(
int arg) {
1949 __kmp_user_set_library((
enum library_type)arg);
1952 void kmpc_set_defaults(
char const *str) {
1954 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1957 void kmpc_set_disp_num_buffers(
int arg) {
1960 if (__kmp_init_serial == 0 && arg > 0)
1961 __kmp_dispatch_num_buffers = arg;
1964 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1965 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1968 if (!TCR_4(__kmp_init_middle)) {
1969 __kmp_middle_initialize();
1971 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1975 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1976 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1979 if (!TCR_4(__kmp_init_middle)) {
1980 __kmp_middle_initialize();
1982 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1986 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1990 if (!TCR_4(__kmp_init_middle)) {
1991 __kmp_middle_initialize();
1993 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2043 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2046 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2047 __kmp_assert_valid_gtid(gtid);
2051 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2053 if (__kmp_env_consistency_check) {
2055 KMP_WARNING(ConstructIdentInvalid);
2062 *data_ptr = cpy_data;
2065 ompt_frame_t *ompt_frame;
2066 if (ompt_enabled.enabled) {
2067 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2068 if (ompt_frame->enter_frame.ptr == NULL)
2069 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2070 OMPT_STORE_RETURN_ADDRESS(gtid);
2075 __kmp_threads[gtid]->th.th_ident = loc;
2077 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2080 (*cpy_func)(cpy_data, *data_ptr);
2086 if (ompt_enabled.enabled) {
2087 OMPT_STORE_RETURN_ADDRESS(gtid);
2091 __kmp_threads[gtid]->th.th_ident = loc;
2094 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2095 #if OMPT_SUPPORT && OMPT_OPTIONAL 2096 if (ompt_enabled.enabled) {
2097 ompt_frame->enter_frame = ompt_data_none;
2104 #define INIT_LOCK __kmp_init_user_lock_with_checks 2105 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2106 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2107 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2108 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2109 #define ACQUIRE_NESTED_LOCK_TIMED \ 2110 __kmp_acquire_nested_user_lock_with_checks_timed 2111 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2112 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2113 #define TEST_LOCK __kmp_test_user_lock_with_checks 2114 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2115 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2116 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2121 #if KMP_USE_DYNAMIC_LOCK 2124 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2125 kmp_dyna_lockseq_t seq) {
2126 if (KMP_IS_D_LOCK(seq)) {
2127 KMP_INIT_D_LOCK(lock, seq);
2129 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2132 KMP_INIT_I_LOCK(lock, seq);
2134 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2135 __kmp_itt_lock_creating(ilk->lock, loc);
2141 static __forceinline
void 2142 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2143 kmp_dyna_lockseq_t seq) {
2146 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2147 seq = __kmp_user_lock_seq;
2151 seq = lockseq_nested_tas;
2155 seq = lockseq_nested_futex;
2158 case lockseq_ticket:
2159 seq = lockseq_nested_ticket;
2161 case lockseq_queuing:
2162 seq = lockseq_nested_queuing;
2165 seq = lockseq_nested_drdpa;
2168 seq = lockseq_nested_queuing;
2170 KMP_INIT_I_LOCK(lock, seq);
2172 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2173 __kmp_itt_lock_creating(ilk->lock, loc);
2178 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2180 KMP_DEBUG_ASSERT(__kmp_init_serial);
2181 if (__kmp_env_consistency_check && user_lock == NULL) {
2182 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2185 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2187 #if OMPT_SUPPORT && OMPT_OPTIONAL 2189 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2191 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2192 if (ompt_enabled.ompt_callback_lock_init) {
2193 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2194 ompt_mutex_lock, (omp_lock_hint_t)hint,
2195 __ompt_get_mutex_impl_type(user_lock),
2196 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2202 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2203 void **user_lock, uintptr_t hint) {
2204 KMP_DEBUG_ASSERT(__kmp_init_serial);
2205 if (__kmp_env_consistency_check && user_lock == NULL) {
2206 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2209 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2211 #if OMPT_SUPPORT && OMPT_OPTIONAL 2213 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2216 if (ompt_enabled.ompt_callback_lock_init) {
2217 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2218 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2219 __ompt_get_mutex_impl_type(user_lock),
2220 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2225 #endif // KMP_USE_DYNAMIC_LOCK 2228 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2229 #if KMP_USE_DYNAMIC_LOCK 2231 KMP_DEBUG_ASSERT(__kmp_init_serial);
2232 if (__kmp_env_consistency_check && user_lock == NULL) {
2233 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2235 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2237 #if OMPT_SUPPORT && OMPT_OPTIONAL 2239 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2241 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2242 if (ompt_enabled.ompt_callback_lock_init) {
2243 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2244 ompt_mutex_lock, omp_lock_hint_none,
2245 __ompt_get_mutex_impl_type(user_lock),
2246 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2250 #else // KMP_USE_DYNAMIC_LOCK 2252 static char const *
const func =
"omp_init_lock";
2253 kmp_user_lock_p lck;
2254 KMP_DEBUG_ASSERT(__kmp_init_serial);
2256 if (__kmp_env_consistency_check) {
2257 if (user_lock == NULL) {
2258 KMP_FATAL(LockIsUninitialized, func);
2262 KMP_CHECK_USER_LOCK_INIT();
2264 if ((__kmp_user_lock_kind == lk_tas) &&
2265 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2266 lck = (kmp_user_lock_p)user_lock;
2269 else if ((__kmp_user_lock_kind == lk_futex) &&
2270 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2271 lck = (kmp_user_lock_p)user_lock;
2275 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2278 __kmp_set_user_lock_location(lck, loc);
2280 #if OMPT_SUPPORT && OMPT_OPTIONAL 2282 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2284 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2285 if (ompt_enabled.ompt_callback_lock_init) {
2286 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2287 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2288 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2293 __kmp_itt_lock_creating(lck);
2296 #endif // KMP_USE_DYNAMIC_LOCK 2300 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2301 #if KMP_USE_DYNAMIC_LOCK 2303 KMP_DEBUG_ASSERT(__kmp_init_serial);
2304 if (__kmp_env_consistency_check && user_lock == NULL) {
2305 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2307 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2309 #if OMPT_SUPPORT && OMPT_OPTIONAL 2311 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2313 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2314 if (ompt_enabled.ompt_callback_lock_init) {
2315 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2316 ompt_mutex_nest_lock, omp_lock_hint_none,
2317 __ompt_get_mutex_impl_type(user_lock),
2318 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2322 #else // KMP_USE_DYNAMIC_LOCK 2324 static char const *
const func =
"omp_init_nest_lock";
2325 kmp_user_lock_p lck;
2326 KMP_DEBUG_ASSERT(__kmp_init_serial);
2328 if (__kmp_env_consistency_check) {
2329 if (user_lock == NULL) {
2330 KMP_FATAL(LockIsUninitialized, func);
2334 KMP_CHECK_USER_LOCK_INIT();
2336 if ((__kmp_user_lock_kind == lk_tas) &&
2337 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2338 OMP_NEST_LOCK_T_SIZE)) {
2339 lck = (kmp_user_lock_p)user_lock;
2342 else if ((__kmp_user_lock_kind == lk_futex) &&
2343 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2344 OMP_NEST_LOCK_T_SIZE)) {
2345 lck = (kmp_user_lock_p)user_lock;
2349 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2352 INIT_NESTED_LOCK(lck);
2353 __kmp_set_user_lock_location(lck, loc);
2355 #if OMPT_SUPPORT && OMPT_OPTIONAL 2357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2359 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2360 if (ompt_enabled.ompt_callback_lock_init) {
2361 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2362 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2363 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2368 __kmp_itt_lock_creating(lck);
2371 #endif // KMP_USE_DYNAMIC_LOCK 2374 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2375 #if KMP_USE_DYNAMIC_LOCK 2378 kmp_user_lock_p lck;
2379 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2380 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2382 lck = (kmp_user_lock_p)user_lock;
2384 __kmp_itt_lock_destroyed(lck);
2386 #if OMPT_SUPPORT && OMPT_OPTIONAL 2388 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2390 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2391 if (ompt_enabled.ompt_callback_lock_destroy) {
2392 kmp_user_lock_p lck;
2393 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2394 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2396 lck = (kmp_user_lock_p)user_lock;
2398 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2399 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2402 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2404 kmp_user_lock_p lck;
2406 if ((__kmp_user_lock_kind == lk_tas) &&
2407 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2408 lck = (kmp_user_lock_p)user_lock;
2411 else if ((__kmp_user_lock_kind == lk_futex) &&
2412 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2413 lck = (kmp_user_lock_p)user_lock;
2417 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2420 #if OMPT_SUPPORT && OMPT_OPTIONAL 2422 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2424 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2425 if (ompt_enabled.ompt_callback_lock_destroy) {
2426 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2427 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2432 __kmp_itt_lock_destroyed(lck);
2436 if ((__kmp_user_lock_kind == lk_tas) &&
2437 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2441 else if ((__kmp_user_lock_kind == lk_futex) &&
2442 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2447 __kmp_user_lock_free(user_lock, gtid, lck);
2449 #endif // KMP_USE_DYNAMIC_LOCK 2453 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2454 #if KMP_USE_DYNAMIC_LOCK 2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2458 __kmp_itt_lock_destroyed(ilk->lock);
2460 #if OMPT_SUPPORT && OMPT_OPTIONAL 2462 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2464 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2465 if (ompt_enabled.ompt_callback_lock_destroy) {
2466 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2467 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2470 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2472 #else // KMP_USE_DYNAMIC_LOCK 2474 kmp_user_lock_p lck;
2476 if ((__kmp_user_lock_kind == lk_tas) &&
2477 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2478 OMP_NEST_LOCK_T_SIZE)) {
2479 lck = (kmp_user_lock_p)user_lock;
2482 else if ((__kmp_user_lock_kind == lk_futex) &&
2483 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2484 OMP_NEST_LOCK_T_SIZE)) {
2485 lck = (kmp_user_lock_p)user_lock;
2489 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2492 #if OMPT_SUPPORT && OMPT_OPTIONAL 2494 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2496 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2497 if (ompt_enabled.ompt_callback_lock_destroy) {
2498 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2499 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2504 __kmp_itt_lock_destroyed(lck);
2507 DESTROY_NESTED_LOCK(lck);
2509 if ((__kmp_user_lock_kind == lk_tas) &&
2510 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2511 OMP_NEST_LOCK_T_SIZE)) {
2515 else if ((__kmp_user_lock_kind == lk_futex) &&
2516 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2517 OMP_NEST_LOCK_T_SIZE)) {
2522 __kmp_user_lock_free(user_lock, gtid, lck);
2524 #endif // KMP_USE_DYNAMIC_LOCK 2527 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2529 #if KMP_USE_DYNAMIC_LOCK 2530 int tag = KMP_EXTRACT_D_TAG(user_lock);
2532 __kmp_itt_lock_acquiring(
2536 #if OMPT_SUPPORT && OMPT_OPTIONAL 2538 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2541 if (ompt_enabled.ompt_callback_mutex_acquire) {
2542 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2543 ompt_mutex_lock, omp_lock_hint_none,
2544 __ompt_get_mutex_impl_type(user_lock),
2545 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2548 #if KMP_USE_INLINED_TAS 2549 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2550 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2552 #elif KMP_USE_INLINED_FUTEX 2553 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2554 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2558 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2561 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2563 #if OMPT_SUPPORT && OMPT_OPTIONAL 2564 if (ompt_enabled.ompt_callback_mutex_acquired) {
2565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2566 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2570 #else // KMP_USE_DYNAMIC_LOCK 2572 kmp_user_lock_p lck;
2574 if ((__kmp_user_lock_kind == lk_tas) &&
2575 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2576 lck = (kmp_user_lock_p)user_lock;
2579 else if ((__kmp_user_lock_kind == lk_futex) &&
2580 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2581 lck = (kmp_user_lock_p)user_lock;
2585 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2589 __kmp_itt_lock_acquiring(lck);
2591 #if OMPT_SUPPORT && OMPT_OPTIONAL 2593 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2595 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2596 if (ompt_enabled.ompt_callback_mutex_acquire) {
2597 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2598 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2599 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2603 ACQUIRE_LOCK(lck, gtid);
2606 __kmp_itt_lock_acquired(lck);
2609 #if OMPT_SUPPORT && OMPT_OPTIONAL 2610 if (ompt_enabled.ompt_callback_mutex_acquired) {
2611 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2612 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2616 #endif // KMP_USE_DYNAMIC_LOCK 2619 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2620 #if KMP_USE_DYNAMIC_LOCK 2623 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2625 #if OMPT_SUPPORT && OMPT_OPTIONAL 2627 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2629 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2630 if (ompt_enabled.enabled) {
2631 if (ompt_enabled.ompt_callback_mutex_acquire) {
2632 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2633 ompt_mutex_nest_lock, omp_lock_hint_none,
2634 __ompt_get_mutex_impl_type(user_lock),
2635 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2639 int acquire_status =
2640 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2641 (void) acquire_status;
2643 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2646 #if OMPT_SUPPORT && OMPT_OPTIONAL 2647 if (ompt_enabled.enabled) {
2648 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2649 if (ompt_enabled.ompt_callback_mutex_acquired) {
2651 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2652 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2656 if (ompt_enabled.ompt_callback_nest_lock) {
2658 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2659 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2665 #else // KMP_USE_DYNAMIC_LOCK 2667 kmp_user_lock_p lck;
2669 if ((__kmp_user_lock_kind == lk_tas) &&
2670 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2671 OMP_NEST_LOCK_T_SIZE)) {
2672 lck = (kmp_user_lock_p)user_lock;
2675 else if ((__kmp_user_lock_kind == lk_futex) &&
2676 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2677 OMP_NEST_LOCK_T_SIZE)) {
2678 lck = (kmp_user_lock_p)user_lock;
2682 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2686 __kmp_itt_lock_acquiring(lck);
2688 #if OMPT_SUPPORT && OMPT_OPTIONAL 2690 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2693 if (ompt_enabled.enabled) {
2694 if (ompt_enabled.ompt_callback_mutex_acquire) {
2695 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2696 ompt_mutex_nest_lock, omp_lock_hint_none,
2697 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2703 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2706 __kmp_itt_lock_acquired(lck);
2709 #if OMPT_SUPPORT && OMPT_OPTIONAL 2710 if (ompt_enabled.enabled) {
2711 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2712 if (ompt_enabled.ompt_callback_mutex_acquired) {
2714 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2715 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2718 if (ompt_enabled.ompt_callback_nest_lock) {
2720 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2721 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2727 #endif // KMP_USE_DYNAMIC_LOCK 2730 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2731 #if KMP_USE_DYNAMIC_LOCK 2733 int tag = KMP_EXTRACT_D_TAG(user_lock);
2735 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2737 #if KMP_USE_INLINED_TAS 2738 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2739 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2741 #elif KMP_USE_INLINED_FUTEX 2742 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2743 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2747 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2750 #if OMPT_SUPPORT && OMPT_OPTIONAL 2752 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2754 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2755 if (ompt_enabled.ompt_callback_mutex_released) {
2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2757 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2761 #else // KMP_USE_DYNAMIC_LOCK 2763 kmp_user_lock_p lck;
2768 if ((__kmp_user_lock_kind == lk_tas) &&
2769 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2770 #if KMP_OS_LINUX && \ 2771 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2774 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2776 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2779 #if OMPT_SUPPORT && OMPT_OPTIONAL 2781 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2783 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2784 if (ompt_enabled.ompt_callback_mutex_released) {
2785 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2786 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2792 lck = (kmp_user_lock_p)user_lock;
2796 else if ((__kmp_user_lock_kind == lk_futex) &&
2797 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2798 lck = (kmp_user_lock_p)user_lock;
2802 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2806 __kmp_itt_lock_releasing(lck);
2809 RELEASE_LOCK(lck, gtid);
2811 #if OMPT_SUPPORT && OMPT_OPTIONAL 2813 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2815 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2816 if (ompt_enabled.ompt_callback_mutex_released) {
2817 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2818 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2822 #endif // KMP_USE_DYNAMIC_LOCK 2826 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2827 #if KMP_USE_DYNAMIC_LOCK 2830 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2832 int release_status =
2833 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2834 (void) release_status;
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL 2838 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2840 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841 if (ompt_enabled.enabled) {
2842 if (release_status == KMP_LOCK_RELEASED) {
2843 if (ompt_enabled.ompt_callback_mutex_released) {
2845 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2846 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2849 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2851 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2852 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2857 #else // KMP_USE_DYNAMIC_LOCK 2859 kmp_user_lock_p lck;
2863 if ((__kmp_user_lock_kind == lk_tas) &&
2864 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2865 OMP_NEST_LOCK_T_SIZE)) {
2866 #if KMP_OS_LINUX && \ 2867 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2869 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2871 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL 2875 int release_status = KMP_LOCK_STILL_HELD;
2878 if (--(tl->lk.depth_locked) == 0) {
2879 TCW_4(tl->lk.poll, 0);
2880 #if OMPT_SUPPORT && OMPT_OPTIONAL 2881 release_status = KMP_LOCK_RELEASED;
2886 #if OMPT_SUPPORT && OMPT_OPTIONAL 2888 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2890 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2891 if (ompt_enabled.enabled) {
2892 if (release_status == KMP_LOCK_RELEASED) {
2893 if (ompt_enabled.ompt_callback_mutex_released) {
2895 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2896 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2898 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2900 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2901 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2908 lck = (kmp_user_lock_p)user_lock;
2912 else if ((__kmp_user_lock_kind == lk_futex) &&
2913 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2914 OMP_NEST_LOCK_T_SIZE)) {
2915 lck = (kmp_user_lock_p)user_lock;
2919 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2923 __kmp_itt_lock_releasing(lck);
2927 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2928 #if OMPT_SUPPORT && OMPT_OPTIONAL 2930 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2932 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2933 if (ompt_enabled.enabled) {
2934 if (release_status == KMP_LOCK_RELEASED) {
2935 if (ompt_enabled.ompt_callback_mutex_released) {
2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2938 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2942 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2943 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2948 #endif // KMP_USE_DYNAMIC_LOCK 2952 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2955 #if KMP_USE_DYNAMIC_LOCK 2957 int tag = KMP_EXTRACT_D_TAG(user_lock);
2959 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2961 #if OMPT_SUPPORT && OMPT_OPTIONAL 2963 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2965 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2966 if (ompt_enabled.ompt_callback_mutex_acquire) {
2967 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2968 ompt_mutex_lock, omp_lock_hint_none,
2969 __ompt_get_mutex_impl_type(user_lock),
2970 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2973 #if KMP_USE_INLINED_TAS 2974 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2975 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2977 #elif KMP_USE_INLINED_FUTEX 2978 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2979 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2983 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2987 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2989 #if OMPT_SUPPORT && OMPT_OPTIONAL 2990 if (ompt_enabled.ompt_callback_mutex_acquired) {
2991 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2992 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2998 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3003 #else // KMP_USE_DYNAMIC_LOCK 3005 kmp_user_lock_p lck;
3008 if ((__kmp_user_lock_kind == lk_tas) &&
3009 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3010 lck = (kmp_user_lock_p)user_lock;
3013 else if ((__kmp_user_lock_kind == lk_futex) &&
3014 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3015 lck = (kmp_user_lock_p)user_lock;
3019 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3023 __kmp_itt_lock_acquiring(lck);
3025 #if OMPT_SUPPORT && OMPT_OPTIONAL 3027 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3030 if (ompt_enabled.ompt_callback_mutex_acquire) {
3031 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3032 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3033 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3037 rc = TEST_LOCK(lck, gtid);
3040 __kmp_itt_lock_acquired(lck);
3042 __kmp_itt_lock_cancelled(lck);
3045 #if OMPT_SUPPORT && OMPT_OPTIONAL 3046 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3047 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3048 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3052 return (rc ? FTN_TRUE : FTN_FALSE);
3056 #endif // KMP_USE_DYNAMIC_LOCK 3060 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3061 #if KMP_USE_DYNAMIC_LOCK 3064 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3066 #if OMPT_SUPPORT && OMPT_OPTIONAL 3068 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3070 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3071 if (ompt_enabled.ompt_callback_mutex_acquire) {
3072 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3073 ompt_mutex_nest_lock, omp_lock_hint_none,
3074 __ompt_get_mutex_impl_type(user_lock),
3075 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3078 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3081 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3083 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3086 #if OMPT_SUPPORT && OMPT_OPTIONAL 3087 if (ompt_enabled.enabled && rc) {
3089 if (ompt_enabled.ompt_callback_mutex_acquired) {
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3092 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3096 if (ompt_enabled.ompt_callback_nest_lock) {
3098 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3099 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3106 #else // KMP_USE_DYNAMIC_LOCK 3108 kmp_user_lock_p lck;
3111 if ((__kmp_user_lock_kind == lk_tas) &&
3112 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3113 OMP_NEST_LOCK_T_SIZE)) {
3114 lck = (kmp_user_lock_p)user_lock;
3117 else if ((__kmp_user_lock_kind == lk_futex) &&
3118 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3119 OMP_NEST_LOCK_T_SIZE)) {
3120 lck = (kmp_user_lock_p)user_lock;
3124 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3128 __kmp_itt_lock_acquiring(lck);
3131 #if OMPT_SUPPORT && OMPT_OPTIONAL 3133 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3135 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3136 if (ompt_enabled.enabled) &&
3137 ompt_enabled.ompt_callback_mutex_acquire) {
3138 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3139 ompt_mutex_nest_lock, omp_lock_hint_none,
3140 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3145 rc = TEST_NESTED_LOCK(lck, gtid);
3148 __kmp_itt_lock_acquired(lck);
3150 __kmp_itt_lock_cancelled(lck);
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL 3154 if (ompt_enabled.enabled && rc) {
3156 if (ompt_enabled.ompt_callback_mutex_acquired) {
3158 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3159 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3162 if (ompt_enabled.ompt_callback_nest_lock) {
3164 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3165 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3174 #endif // KMP_USE_DYNAMIC_LOCK 3184 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3185 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3187 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3188 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3194 static __forceinline
void 3195 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3196 kmp_critical_name *crit) {
3202 kmp_user_lock_p lck;
3204 #if KMP_USE_DYNAMIC_LOCK 3206 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3209 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3210 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3211 KMP_GET_D_TAG(__kmp_user_lock_seq));
3213 __kmp_init_indirect_csptr(crit, loc, global_tid,
3214 KMP_GET_I_TAG(__kmp_user_lock_seq));
3220 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3221 lck = (kmp_user_lock_p)lk;
3222 KMP_DEBUG_ASSERT(lck != NULL);
3223 if (__kmp_env_consistency_check) {
3224 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3226 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
3228 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3230 KMP_DEBUG_ASSERT(lck != NULL);
3231 if (__kmp_env_consistency_check) {
3232 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3234 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
3237 #else // KMP_USE_DYNAMIC_LOCK 3242 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3243 lck = (kmp_user_lock_p)crit;
3245 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3247 KMP_DEBUG_ASSERT(lck != NULL);
3249 if (__kmp_env_consistency_check)
3250 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3252 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3254 #endif // KMP_USE_DYNAMIC_LOCK 3258 static __forceinline
void 3259 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3260 kmp_critical_name *crit) {
3262 kmp_user_lock_p lck;
3264 #if KMP_USE_DYNAMIC_LOCK 3266 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3267 lck = (kmp_user_lock_p)crit;
3268 if (__kmp_env_consistency_check)
3269 __kmp_pop_sync(global_tid, ct_critical, loc);
3270 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3272 kmp_indirect_lock_t *ilk =
3273 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3274 if (__kmp_env_consistency_check)
3275 __kmp_pop_sync(global_tid, ct_critical, loc);
3276 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3279 #else // KMP_USE_DYNAMIC_LOCK 3284 if (__kmp_base_user_lock_size > 32) {
3285 lck = *((kmp_user_lock_p *)crit);
3286 KMP_ASSERT(lck != NULL);
3288 lck = (kmp_user_lock_p)crit;
3291 if (__kmp_env_consistency_check)
3292 __kmp_pop_sync(global_tid, ct_critical, loc);
3294 __kmp_release_user_lock_with_checks(lck, global_tid);
3296 #endif // KMP_USE_DYNAMIC_LOCK 3299 static __forceinline
int 3300 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3305 if (th->th.th_teams_microtask) {
3306 *team_p = team = th->th.th_team;
3307 if (team->t.t_level == th->th.th_teams_level) {
3309 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3311 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3312 th->th.th_team = team->t.t_parent;
3313 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3314 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3315 *task_state = th->th.th_task_state;
3316 th->th.th_task_state = 0;
3324 static __forceinline
void 3325 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3327 th->th.th_info.ds.ds_tid = 0;
3328 th->th.th_team = team;
3329 th->th.th_team_nproc = team->t.t_nproc;
3330 th->th.th_task_team = team->t.t_task_team[task_state];
3331 th->th.th_task_state = task_state;
3352 size_t reduce_size,
void *reduce_data,
3353 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3354 kmp_critical_name *lck) {
3358 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3361 int teams_swapped = 0, task_state;
3362 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3363 __kmp_assert_valid_gtid(global_tid);
3371 if (!TCR_4(__kmp_init_parallel))
3372 __kmp_parallel_initialize();
3374 __kmp_resume_if_soft_paused();
3377 #if KMP_USE_DYNAMIC_LOCK 3378 if (__kmp_env_consistency_check)
3379 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3381 if (__kmp_env_consistency_check)
3382 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3385 th = __kmp_thread_from_gtid(global_tid);
3386 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3404 packed_reduction_method = __kmp_determine_reduction_method(
3405 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3406 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3408 OMPT_REDUCTION_DECL(th, global_tid);
3409 if (packed_reduction_method == critical_reduce_block) {
3411 OMPT_REDUCTION_BEGIN;
3413 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3416 }
else if (packed_reduction_method == empty_reduce_block) {
3418 OMPT_REDUCTION_BEGIN;
3424 }
else if (packed_reduction_method == atomic_reduce_block) {
3434 if (__kmp_env_consistency_check)
3435 __kmp_pop_sync(global_tid, ct_reduce, loc);
3437 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3438 tree_reduce_block)) {
3458 ompt_frame_t *ompt_frame;
3459 if (ompt_enabled.enabled) {
3460 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3461 if (ompt_frame->enter_frame.ptr == NULL)
3462 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3463 OMPT_STORE_RETURN_ADDRESS(global_tid);
3467 __kmp_threads[global_tid]->th.th_ident = loc;
3470 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3471 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3472 retval = (retval != 0) ? (0) : (1);
3473 #if OMPT_SUPPORT && OMPT_OPTIONAL 3474 if (ompt_enabled.enabled) {
3475 ompt_frame->enter_frame = ompt_data_none;
3481 if (__kmp_env_consistency_check) {
3483 __kmp_pop_sync(global_tid, ct_reduce, loc);
3492 if (teams_swapped) {
3493 __kmp_restore_swapped_teams(th, team, task_state);
3497 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3498 global_tid, packed_reduction_method, retval));
3512 kmp_critical_name *lck) {
3514 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3516 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3517 __kmp_assert_valid_gtid(global_tid);
3519 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3521 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3523 if (packed_reduction_method == critical_reduce_block) {
3525 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3528 }
else if (packed_reduction_method == empty_reduce_block) {
3535 }
else if (packed_reduction_method == atomic_reduce_block) {
3542 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3543 tree_reduce_block)) {
3554 if (__kmp_env_consistency_check)
3555 __kmp_pop_sync(global_tid, ct_reduce, loc);
3557 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3558 global_tid, packed_reduction_method));
3581 size_t reduce_size,
void *reduce_data,
3582 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3583 kmp_critical_name *lck) {
3586 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3589 int teams_swapped = 0, task_state;
3591 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3592 __kmp_assert_valid_gtid(global_tid);
3600 if (!TCR_4(__kmp_init_parallel))
3601 __kmp_parallel_initialize();
3603 __kmp_resume_if_soft_paused();
3606 #if KMP_USE_DYNAMIC_LOCK 3607 if (__kmp_env_consistency_check)
3608 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3610 if (__kmp_env_consistency_check)
3611 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3614 th = __kmp_thread_from_gtid(global_tid);
3615 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3617 packed_reduction_method = __kmp_determine_reduction_method(
3618 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3619 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3621 OMPT_REDUCTION_DECL(th, global_tid);
3623 if (packed_reduction_method == critical_reduce_block) {
3625 OMPT_REDUCTION_BEGIN;
3626 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3629 }
else if (packed_reduction_method == empty_reduce_block) {
3631 OMPT_REDUCTION_BEGIN;
3636 }
else if (packed_reduction_method == atomic_reduce_block) {
3640 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3641 tree_reduce_block)) {
3647 ompt_frame_t *ompt_frame;
3648 if (ompt_enabled.enabled) {
3649 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3650 if (ompt_frame->enter_frame.ptr == NULL)
3651 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3652 OMPT_STORE_RETURN_ADDRESS(global_tid);
3656 __kmp_threads[global_tid]->th.th_ident =
3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3661 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3662 retval = (retval != 0) ? (0) : (1);
3663 #if OMPT_SUPPORT && OMPT_OPTIONAL 3664 if (ompt_enabled.enabled) {
3665 ompt_frame->enter_frame = ompt_data_none;
3671 if (__kmp_env_consistency_check) {
3673 __kmp_pop_sync(global_tid, ct_reduce, loc);
3682 if (teams_swapped) {
3683 __kmp_restore_swapped_teams(th, team, task_state);
3687 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3688 global_tid, packed_reduction_method, retval));
3703 kmp_critical_name *lck) {
3705 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3708 int teams_swapped = 0, task_state;
3710 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3711 __kmp_assert_valid_gtid(global_tid);
3713 th = __kmp_thread_from_gtid(global_tid);
3714 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3716 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3720 OMPT_REDUCTION_DECL(th, global_tid);
3722 if (packed_reduction_method == critical_reduce_block) {
3723 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3729 ompt_frame_t *ompt_frame;
3730 if (ompt_enabled.enabled) {
3731 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3732 if (ompt_frame->enter_frame.ptr == NULL)
3733 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3734 OMPT_STORE_RETURN_ADDRESS(global_tid);
3738 __kmp_threads[global_tid]->th.th_ident = loc;
3740 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3741 #if OMPT_SUPPORT && OMPT_OPTIONAL 3742 if (ompt_enabled.enabled) {
3743 ompt_frame->enter_frame = ompt_data_none;
3747 }
else if (packed_reduction_method == empty_reduce_block) {
3755 ompt_frame_t *ompt_frame;
3756 if (ompt_enabled.enabled) {
3757 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3758 if (ompt_frame->enter_frame.ptr == NULL)
3759 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3760 OMPT_STORE_RETURN_ADDRESS(global_tid);
3764 __kmp_threads[global_tid]->th.th_ident = loc;
3766 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3767 #if OMPT_SUPPORT && OMPT_OPTIONAL 3768 if (ompt_enabled.enabled) {
3769 ompt_frame->enter_frame = ompt_data_none;
3773 }
else if (packed_reduction_method == atomic_reduce_block) {
3776 ompt_frame_t *ompt_frame;
3777 if (ompt_enabled.enabled) {
3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3779 if (ompt_frame->enter_frame.ptr == NULL)
3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3781 OMPT_STORE_RETURN_ADDRESS(global_tid);
3786 __kmp_threads[global_tid]->th.th_ident = loc;
3788 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3789 #if OMPT_SUPPORT && OMPT_OPTIONAL 3790 if (ompt_enabled.enabled) {
3791 ompt_frame->enter_frame = ompt_data_none;
3795 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3796 tree_reduce_block)) {
3799 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3807 if (teams_swapped) {
3808 __kmp_restore_swapped_teams(th, team, task_state);
3811 if (__kmp_env_consistency_check)
3812 __kmp_pop_sync(global_tid, ct_reduce, loc);
3814 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3815 global_tid, packed_reduction_method));
3820 #undef __KMP_GET_REDUCTION_METHOD 3821 #undef __KMP_SET_REDUCTION_METHOD 3825 kmp_uint64 __kmpc_get_taskid() {
3830 gtid = __kmp_get_gtid();
3834 thread = __kmp_thread_from_gtid(gtid);
3835 return thread->th.th_current_task->td_task_id;
3839 kmp_uint64 __kmpc_get_parent_taskid() {
3843 kmp_taskdata_t *parent_task;
3845 gtid = __kmp_get_gtid();
3849 thread = __kmp_thread_from_gtid(gtid);
3850 parent_task = thread->th.th_current_task->td_parent;
3851 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3867 const struct kmp_dim *dims) {
3868 __kmp_assert_valid_gtid(gtid);
3870 kmp_int64 last, trace_count;
3871 kmp_info_t *th = __kmp_threads[gtid];
3872 kmp_team_t *team = th->th.th_team;
3874 kmp_disp_t *pr_buf = th->th.th_dispatch;
3875 dispatch_shared_info_t *sh_buf;
3879 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3880 gtid, num_dims, !team->t.t_serialized));
3881 KMP_DEBUG_ASSERT(dims != NULL);
3882 KMP_DEBUG_ASSERT(num_dims > 0);
3884 if (team->t.t_serialized) {
3885 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3888 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3889 idx = pr_buf->th_doacross_buf_idx++;
3891 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3894 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3895 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3896 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3897 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3898 pr_buf->th_doacross_info[0] =
3899 (kmp_int64)num_dims;
3902 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3903 pr_buf->th_doacross_info[2] = dims[0].lo;
3904 pr_buf->th_doacross_info[3] = dims[0].up;
3905 pr_buf->th_doacross_info[4] = dims[0].st;
3907 for (j = 1; j < num_dims; ++j) {
3910 if (dims[j].st == 1) {
3912 range_length = dims[j].up - dims[j].lo + 1;
3914 if (dims[j].st > 0) {
3915 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3916 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3918 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3920 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3923 pr_buf->th_doacross_info[last++] = range_length;
3924 pr_buf->th_doacross_info[last++] = dims[j].lo;
3925 pr_buf->th_doacross_info[last++] = dims[j].up;
3926 pr_buf->th_doacross_info[last++] = dims[j].st;
3931 if (dims[0].st == 1) {
3932 trace_count = dims[0].up - dims[0].lo + 1;
3933 }
else if (dims[0].st > 0) {
3934 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3935 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3937 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3938 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3940 for (j = 1; j < num_dims; ++j) {
3941 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3943 KMP_DEBUG_ASSERT(trace_count > 0);
3947 if (idx != sh_buf->doacross_buf_idx) {
3949 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3956 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3957 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3959 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3960 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3962 if (flags == NULL) {
3964 size_t size = trace_count / 8 + 8;
3965 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3967 sh_buf->doacross_flags = flags;
3968 }
else if (flags == (kmp_uint32 *)1) {
3971 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3973 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3980 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3981 pr_buf->th_doacross_flags =
3982 sh_buf->doacross_flags;
3984 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3987 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
3988 __kmp_assert_valid_gtid(gtid);
3989 kmp_int32 shft, num_dims, i;
3991 kmp_int64 iter_number;
3992 kmp_info_t *th = __kmp_threads[gtid];
3993 kmp_team_t *team = th->th.th_team;
3995 kmp_int64 lo, up, st;
3997 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3998 if (team->t.t_serialized) {
3999 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4004 pr_buf = th->th.th_dispatch;
4005 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4006 num_dims = pr_buf->th_doacross_info[0];
4007 lo = pr_buf->th_doacross_info[2];
4008 up = pr_buf->th_doacross_info[3];
4009 st = pr_buf->th_doacross_info[4];
4010 #if OMPT_SUPPORT && OMPT_OPTIONAL 4011 ompt_dependence_t deps[num_dims];
4014 if (vec[0] < lo || vec[0] > up) {
4015 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4016 "bounds [%lld,%lld]\n",
4017 gtid, vec[0], lo, up));
4020 iter_number = vec[0] - lo;
4021 }
else if (st > 0) {
4022 if (vec[0] < lo || vec[0] > up) {
4023 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4024 "bounds [%lld,%lld]\n",
4025 gtid, vec[0], lo, up));
4028 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4030 if (vec[0] > lo || vec[0] < up) {
4031 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4032 "bounds [%lld,%lld]\n",
4033 gtid, vec[0], lo, up));
4036 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4038 #if OMPT_SUPPORT && OMPT_OPTIONAL 4039 deps[0].variable.value = iter_number;
4040 deps[0].dependence_type = ompt_dependence_type_sink;
4042 for (i = 1; i < num_dims; ++i) {
4044 kmp_int32 j = i * 4;
4045 ln = pr_buf->th_doacross_info[j + 1];
4046 lo = pr_buf->th_doacross_info[j + 2];
4047 up = pr_buf->th_doacross_info[j + 3];
4048 st = pr_buf->th_doacross_info[j + 4];
4050 if (vec[i] < lo || vec[i] > up) {
4051 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4052 "bounds [%lld,%lld]\n",
4053 gtid, vec[i], lo, up));
4057 }
else if (st > 0) {
4058 if (vec[i] < lo || vec[i] > up) {
4059 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4060 "bounds [%lld,%lld]\n",
4061 gtid, vec[i], lo, up));
4064 iter = (kmp_uint64)(vec[i] - lo) / st;
4066 if (vec[i] > lo || vec[i] < up) {
4067 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4068 "bounds [%lld,%lld]\n",
4069 gtid, vec[i], lo, up));
4072 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4074 iter_number = iter + ln * iter_number;
4075 #if OMPT_SUPPORT && OMPT_OPTIONAL 4076 deps[i].variable.value = iter;
4077 deps[i].dependence_type = ompt_dependence_type_sink;
4080 shft = iter_number % 32;
4083 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4087 #if OMPT_SUPPORT && OMPT_OPTIONAL 4088 if (ompt_enabled.ompt_callback_dependences) {
4089 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4090 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4094 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4095 gtid, (iter_number << 5) + shft));
4098 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4099 __kmp_assert_valid_gtid(gtid);
4100 kmp_int32 shft, num_dims, i;
4102 kmp_int64 iter_number;
4103 kmp_info_t *th = __kmp_threads[gtid];
4104 kmp_team_t *team = th->th.th_team;
4108 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4109 if (team->t.t_serialized) {
4110 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4116 pr_buf = th->th.th_dispatch;
4117 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4118 num_dims = pr_buf->th_doacross_info[0];
4119 lo = pr_buf->th_doacross_info[2];
4120 st = pr_buf->th_doacross_info[4];
4121 #if OMPT_SUPPORT && OMPT_OPTIONAL 4122 ompt_dependence_t deps[num_dims];
4125 iter_number = vec[0] - lo;
4126 }
else if (st > 0) {
4127 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4129 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4131 #if OMPT_SUPPORT && OMPT_OPTIONAL 4132 deps[0].variable.value = iter_number;
4133 deps[0].dependence_type = ompt_dependence_type_source;
4135 for (i = 1; i < num_dims; ++i) {
4137 kmp_int32 j = i * 4;
4138 ln = pr_buf->th_doacross_info[j + 1];
4139 lo = pr_buf->th_doacross_info[j + 2];
4140 st = pr_buf->th_doacross_info[j + 4];
4143 }
else if (st > 0) {
4144 iter = (kmp_uint64)(vec[i] - lo) / st;
4146 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4148 iter_number = iter + ln * iter_number;
4149 #if OMPT_SUPPORT && OMPT_OPTIONAL 4150 deps[i].variable.value = iter;
4151 deps[i].dependence_type = ompt_dependence_type_source;
4154 #if OMPT_SUPPORT && OMPT_OPTIONAL 4155 if (ompt_enabled.ompt_callback_dependences) {
4156 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4157 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4160 shft = iter_number % 32;
4164 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4165 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4166 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4167 (iter_number << 5) + shft));
4170 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4171 __kmp_assert_valid_gtid(gtid);
4173 kmp_info_t *th = __kmp_threads[gtid];
4174 kmp_team_t *team = th->th.th_team;
4175 kmp_disp_t *pr_buf = th->th.th_dispatch;
4177 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4178 if (team->t.t_serialized) {
4179 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4182 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4183 if (num_done == th->th.th_team_nproc) {
4185 int idx = pr_buf->th_doacross_buf_idx - 1;
4186 dispatch_shared_info_t *sh_buf =
4187 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4188 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4189 (kmp_int64)&sh_buf->doacross_num_done);
4190 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4191 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4192 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4193 sh_buf->doacross_flags = NULL;
4194 sh_buf->doacross_num_done = 0;
4195 sh_buf->doacross_buf_idx +=
4196 __kmp_dispatch_num_buffers;
4199 pr_buf->th_doacross_flags = NULL;
4200 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4201 pr_buf->th_doacross_info = NULL;
4202 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4206 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4207 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4210 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4211 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4214 int __kmpc_get_target_offload(
void) {
4215 if (!__kmp_init_serial) {
4216 __kmp_serial_initialize();
4218 return __kmp_target_offload;
4221 int __kmpc_pause_resource(kmp_pause_status_t level) {
4222 if (!__kmp_init_serial) {
4225 return __kmp_pause_resource(level);
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)