LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "kmp_utils.h"
22#include "ompt-specific.h"
23
24#define MAX_MESSAGE 512
25
26// flags will be used in future, e.g. to implement openmp_strict library
27// restrictions
28
37void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
47 __kmp_internal_begin();
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50}
51
60void __kmpc_end(ident_t *loc) {
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
70 __kmp_internal_end_thread(-1);
71 }
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
79#endif
80}
81
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106}
107
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127}
128
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
138}
139
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149}
150
157kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211}
212
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221}
222
232void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
238}
239
240void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
241 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
242 /* the num_threads are automatically popped */
243}
244
245void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
246 kmp_int32 proc_bind) {
247 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 proc_bind));
249 __kmp_assert_valid_gtid(global_tid);
250 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
251}
252
263void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
264 int gtid = __kmp_entry_gtid();
265
266#if (KMP_STATS_ENABLED)
267 // If we were in a serial region, then stop the serial timer, record
268 // the event, and start parallel region timer
269 stats_state_e previous_state = KMP_GET_THREAD_STATE();
270 if (previous_state == stats_state_e::SERIAL_REGION) {
271 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 } else {
273 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
274 }
275 int inParallel = __kmpc_in_parallel(loc);
276 if (inParallel) {
277 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
278 } else {
279 KMP_COUNT_BLOCK(OMP_PARALLEL);
280 }
281#endif
282
283 // maybe to save thr_state is enough here
284 {
285 va_list ap;
286 va_start(ap, microtask);
287
288#if OMPT_SUPPORT
289 ompt_frame_t *ompt_frame;
290 if (ompt_enabled.enabled) {
291 kmp_info_t *master_th = __kmp_threads[gtid];
292 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
293 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 }
295 OMPT_STORE_RETURN_ADDRESS(gtid);
296#endif
297
298#if INCLUDE_SSC_MARKS
299 SSC_MARK_FORKING();
300#endif
301 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
302 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
303 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
304 kmp_va_addr_of(ap));
305#if INCLUDE_SSC_MARKS
306 SSC_MARK_JOINING();
307#endif
308 __kmp_join_call(loc, gtid
309#if OMPT_SUPPORT
310 ,
311 fork_context_intel
312#endif
313 );
314
315 va_end(ap);
316
317#if OMPT_SUPPORT
318 if (ompt_enabled.enabled) {
319 ompt_frame->enter_frame = ompt_data_none;
320 }
321#endif
322 }
323
324#if KMP_STATS_ENABLED
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327 KMP_SET_THREAD_STATE(previous_state);
328 } else {
329 KMP_POP_PARTITIONED_TIMER();
330 }
331#endif // KMP_STATS_ENABLED
332}
333
344void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
345 kmp_int32 cond, void *args) {
346 int gtid = __kmp_entry_gtid();
347 if (cond) {
348 if (args)
349 __kmpc_fork_call(loc, argc, microtask, args);
350 else
351 __kmpc_fork_call(loc, argc, microtask);
352 } else {
354
355#if OMPT_SUPPORT
356 void *exit_frame_ptr;
357#endif
358
359 if (args)
360 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
361 /*npr=*/0,
362 /*argc=*/1, &args
363#if OMPT_SUPPORT
364 ,
365 &exit_frame_ptr
366#endif
367 );
368 else
369 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
370 /*npr=*/0,
371 /*argc=*/0,
372 /*args=*/nullptr
373#if OMPT_SUPPORT
374 ,
375 &exit_frame_ptr
376#endif
377 );
378
380 }
381}
382
394void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
395 kmp_int32 num_teams, kmp_int32 num_threads) {
396 KA_TRACE(20,
397 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
398 global_tid, num_teams, num_threads));
399 __kmp_assert_valid_gtid(global_tid);
400 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
401}
402
413void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
414 kmp_int32 thread_limit) {
415 __kmp_assert_valid_gtid(global_tid);
416 kmp_info_t *thread = __kmp_threads[global_tid];
417 if (thread_limit > 0)
418 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
419}
420
437void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
438 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
439 kmp_int32 num_threads) {
440 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
441 " num_teams_ub=%d num_threads=%d\n",
442 global_tid, num_teams_lb, num_teams_ub, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
445 num_threads);
446}
447
458void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
459 ...) {
460 int gtid = __kmp_entry_gtid();
461 kmp_info_t *this_thr = __kmp_threads[gtid];
462 va_list ap;
463 va_start(ap, microtask);
464
465#if KMP_STATS_ENABLED
466 KMP_COUNT_BLOCK(OMP_TEAMS);
467 stats_state_e previous_state = KMP_GET_THREAD_STATE();
468 if (previous_state == stats_state_e::SERIAL_REGION) {
469 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
470 } else {
471 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
472 }
473#endif
474
475 // remember teams entry point and nesting level
476 this_thr->th.th_teams_microtask = microtask;
477 this_thr->th.th_teams_level =
478 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
479
480#if OMPT_SUPPORT
481 kmp_team_t *parent_team = this_thr->th.th_team;
482 int tid = __kmp_tid_from_gtid(gtid);
483 if (ompt_enabled.enabled) {
484 parent_team->t.t_implicit_task_taskdata[tid]
485 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
486 }
487 OMPT_STORE_RETURN_ADDRESS(gtid);
488#endif
489
490 // check if __kmpc_push_num_teams called, set default number of teams
491 // otherwise
492 if (this_thr->th.th_teams_size.nteams == 0) {
493 __kmp_push_num_teams(loc, gtid, 0, 0);
494 }
495 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
497 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
498
499 __kmp_fork_call(
500 loc, gtid, fork_context_intel, argc,
501 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
502 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
503 __kmp_join_call(loc, gtid
504#if OMPT_SUPPORT
505 ,
506 fork_context_intel
507#endif
508 );
509
510 // Pop current CG root off list
511 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
512 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
513 this_thr->th.th_cg_roots = tmp->up;
514 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
515 " to node %p. cg_nthreads was %d\n",
516 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
517 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
518 int i = tmp->cg_nthreads--;
519 if (i == 1) { // check is we are the last thread in CG (not always the case)
520 __kmp_free(tmp);
521 }
522 // Restore current task's thread_limit from CG root
523 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
524 this_thr->th.th_current_task->td_icvs.thread_limit =
525 this_thr->th.th_cg_roots->cg_thread_limit;
526
527 this_thr->th.th_teams_microtask = NULL;
528 this_thr->th.th_teams_level = 0;
529 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
530 va_end(ap);
531#if KMP_STATS_ENABLED
532 if (previous_state == stats_state_e::SERIAL_REGION) {
533 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
534 KMP_SET_THREAD_STATE(previous_state);
535 } else {
536 KMP_POP_PARTITIONED_TIMER();
537 }
538#endif // KMP_STATS_ENABLED
539}
540
541// I don't think this function should ever have been exported.
542// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
543// openmp code ever called it, but it's been exported from the RTL for so
544// long that I'm afraid to remove the definition.
545int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
546
559void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
560 // The implementation is now in kmp_runtime.cpp so that it can share static
561 // functions with kmp_fork_call since the tasks to be done are similar in
562 // each case.
563 __kmp_assert_valid_gtid(global_tid);
564#if OMPT_SUPPORT
565 OMPT_STORE_RETURN_ADDRESS(global_tid);
566#endif
567 __kmp_serialized_parallel(loc, global_tid);
568}
569
577void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
578 kmp_internal_control_t *top;
579 kmp_info_t *this_thr;
580 kmp_team_t *serial_team;
581
582 KC_TRACE(10,
583 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
584
585 /* skip all this code for autopar serialized loops since it results in
586 unacceptable overhead */
587 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
588 return;
589
590 // Not autopar code
591 __kmp_assert_valid_gtid(global_tid);
592 if (!TCR_4(__kmp_init_parallel))
593 __kmp_parallel_initialize();
594
595 __kmp_resume_if_soft_paused();
596
597 this_thr = __kmp_threads[global_tid];
598 serial_team = this_thr->th.th_serial_team;
599
600 kmp_task_team_t *task_team = this_thr->th.th_task_team;
601 // we need to wait for the proxy tasks before finishing the thread
602 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
603 task_team->tt.tt_hidden_helper_task_encountered))
604 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
605
606 KMP_MB();
607 KMP_DEBUG_ASSERT(serial_team);
608 KMP_ASSERT(serial_team->t.t_serialized);
609 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
610 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
612 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
613
614#if OMPT_SUPPORT
615 if (ompt_enabled.enabled &&
616 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
617 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
618 if (ompt_enabled.ompt_callback_implicit_task) {
619 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
620 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
621 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
622 }
623
624 // reset clear the task id only after unlinking the task
625 ompt_data_t *parent_task_data;
626 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
627
628 if (ompt_enabled.ompt_callback_parallel_end) {
629 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
630 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
631 ompt_parallel_invoker_program | ompt_parallel_team,
632 OMPT_LOAD_RETURN_ADDRESS(global_tid));
633 }
634 __ompt_lw_taskteam_unlink(this_thr);
635 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
636 }
637#endif
638
639 /* If necessary, pop the internal control stack values and replace the team
640 * values */
641 top = serial_team->t.t_control_stack_top;
642 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
643 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
644 serial_team->t.t_control_stack_top = top->next;
645 __kmp_free(top);
646 }
647
648 /* pop dispatch buffers stack */
649 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
650 {
651 dispatch_private_info_t *disp_buffer =
652 serial_team->t.t_dispatch->th_disp_buffer;
653 serial_team->t.t_dispatch->th_disp_buffer =
654 serial_team->t.t_dispatch->th_disp_buffer->next;
655 __kmp_free(disp_buffer);
656 }
657 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
658
659 --serial_team->t.t_serialized;
660 if (serial_team->t.t_serialized == 0) {
661
662 /* return to the parallel section */
663
664#if KMP_ARCH_X86 || KMP_ARCH_X86_64
665 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
666 __kmp_clear_x87_fpu_status_word();
667 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
668 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
669 }
670#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
671
672 __kmp_pop_current_task_from_thread(this_thr);
673#if OMPD_SUPPORT
674 if (ompd_state & OMPD_ENABLE_BP)
675 ompd_bp_parallel_end();
676#endif
677
678 this_thr->th.th_team = serial_team->t.t_parent;
679 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
680
681 /* restore values cached in the thread */
682 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
683 this_thr->th.th_team_master =
684 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
685 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
686
687 /* TODO the below shouldn't need to be adjusted for serialized teams */
688 this_thr->th.th_dispatch =
689 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
690
691 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
692 this_thr->th.th_current_task->td_flags.executing = 1;
693
694 if (__kmp_tasking_mode != tskm_immediate_exec) {
695 // Copy the task team from the new child / old parent team to the thread.
696 this_thr->th.th_task_team =
697 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
698 KA_TRACE(20,
699 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
700 "team %p\n",
701 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
702 }
703#if KMP_AFFINITY_SUPPORTED
704 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
705 __kmp_reset_root_init_mask(global_tid);
706 }
707#endif
708 } else {
709 if (__kmp_tasking_mode != tskm_immediate_exec) {
710 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
711 "depth of serial team %p to %d\n",
712 global_tid, serial_team, serial_team->t.t_serialized));
713 }
714 }
715
716 serial_team->t.t_level--;
717 if (__kmp_env_consistency_check)
718 __kmp_pop_parallel(global_tid, NULL);
719#if OMPT_SUPPORT
720 if (ompt_enabled.enabled)
721 this_thr->th.ompt_thread_info.state =
722 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
723 : ompt_state_work_parallel);
724#endif
725}
726
736 KC_TRACE(10, ("__kmpc_flush: called\n"));
737
738 /* need explicit __mf() here since use volatile instead in library */
739 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
740
741#if OMPT_SUPPORT && OMPT_OPTIONAL
742 if (ompt_enabled.ompt_callback_flush) {
743 ompt_callbacks.ompt_callback(ompt_callback_flush)(
744 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
745 }
746#endif
747}
748
749/* -------------------------------------------------------------------------- */
757void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
758 KMP_COUNT_BLOCK(OMP_BARRIER);
759 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
760 __kmp_assert_valid_gtid(global_tid);
761
762 if (!TCR_4(__kmp_init_parallel))
763 __kmp_parallel_initialize();
764
765 __kmp_resume_if_soft_paused();
766
767 if (__kmp_env_consistency_check) {
768 if (loc == 0) {
769 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
770 }
771 __kmp_check_barrier(global_tid, ct_barrier, loc);
772 }
773
774#if OMPT_SUPPORT
775 ompt_frame_t *ompt_frame;
776 if (ompt_enabled.enabled) {
777 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
778 if (ompt_frame->enter_frame.ptr == NULL)
779 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
780 }
781 OMPT_STORE_RETURN_ADDRESS(global_tid);
782#endif
783 __kmp_threads[global_tid]->th.th_ident = loc;
784 // TODO: explicit barrier_wait_id:
785 // this function is called when 'barrier' directive is present or
786 // implicit barrier at the end of a worksharing construct.
787 // 1) better to add a per-thread barrier counter to a thread data structure
788 // 2) set to 0 when a new team is created
789 // 4) no sync is required
790
791 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
792#if OMPT_SUPPORT && OMPT_OPTIONAL
793 if (ompt_enabled.enabled) {
794 ompt_frame->enter_frame = ompt_data_none;
795 }
796#endif
797}
798
799/* The BARRIER for a MASTER section is always explicit */
806kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
807 int status = 0;
808
809 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
810 __kmp_assert_valid_gtid(global_tid);
811
812 if (!TCR_4(__kmp_init_parallel))
813 __kmp_parallel_initialize();
814
815 __kmp_resume_if_soft_paused();
816
817 if (KMP_MASTER_GTID(global_tid)) {
818 KMP_COUNT_BLOCK(OMP_MASTER);
819 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
820 status = 1;
821 }
822
823#if OMPT_SUPPORT && OMPT_OPTIONAL
824 if (status) {
825 if (ompt_enabled.ompt_callback_masked) {
826 kmp_info_t *this_thr = __kmp_threads[global_tid];
827 kmp_team_t *team = this_thr->th.th_team;
828
829 int tid = __kmp_tid_from_gtid(global_tid);
830 ompt_callbacks.ompt_callback(ompt_callback_masked)(
831 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
832 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
833 OMPT_GET_RETURN_ADDRESS(0));
834 }
835 }
836#endif
837
838 if (__kmp_env_consistency_check) {
839#if KMP_USE_DYNAMIC_LOCK
840 if (status)
841 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
842 else
843 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
844#else
845 if (status)
846 __kmp_push_sync(global_tid, ct_master, loc, NULL);
847 else
848 __kmp_check_sync(global_tid, ct_master, loc, NULL);
849#endif
850 }
851
852 return status;
853}
854
863void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
864 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
866 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
867 KMP_POP_PARTITIONED_TIMER();
868
869#if OMPT_SUPPORT && OMPT_OPTIONAL
870 kmp_info_t *this_thr = __kmp_threads[global_tid];
871 kmp_team_t *team = this_thr->th.th_team;
872 if (ompt_enabled.ompt_callback_masked) {
873 int tid = __kmp_tid_from_gtid(global_tid);
874 ompt_callbacks.ompt_callback(ompt_callback_masked)(
875 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
876 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
877 OMPT_GET_RETURN_ADDRESS(0));
878 }
879#endif
880
881 if (__kmp_env_consistency_check) {
882 if (KMP_MASTER_GTID(global_tid))
883 __kmp_pop_sync(global_tid, ct_master, loc);
884 }
885}
886
895kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
896 int status = 0;
897 int tid;
898 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
899 __kmp_assert_valid_gtid(global_tid);
900
901 if (!TCR_4(__kmp_init_parallel))
902 __kmp_parallel_initialize();
903
904 __kmp_resume_if_soft_paused();
905
906 tid = __kmp_tid_from_gtid(global_tid);
907 if (tid == filter) {
908 KMP_COUNT_BLOCK(OMP_MASKED);
909 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
910 status = 1;
911 }
912
913#if OMPT_SUPPORT && OMPT_OPTIONAL
914 if (status) {
915 if (ompt_enabled.ompt_callback_masked) {
916 kmp_info_t *this_thr = __kmp_threads[global_tid];
917 kmp_team_t *team = this_thr->th.th_team;
918 ompt_callbacks.ompt_callback(ompt_callback_masked)(
919 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
920 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
921 OMPT_GET_RETURN_ADDRESS(0));
922 }
923 }
924#endif
925
926 if (__kmp_env_consistency_check) {
927#if KMP_USE_DYNAMIC_LOCK
928 if (status)
929 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
930 else
931 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
932#else
933 if (status)
934 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
935 else
936 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
937#endif
938 }
939
940 return status;
941}
942
951void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
952 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
953 __kmp_assert_valid_gtid(global_tid);
954 KMP_POP_PARTITIONED_TIMER();
955
956#if OMPT_SUPPORT && OMPT_OPTIONAL
957 kmp_info_t *this_thr = __kmp_threads[global_tid];
958 kmp_team_t *team = this_thr->th.th_team;
959 if (ompt_enabled.ompt_callback_masked) {
960 int tid = __kmp_tid_from_gtid(global_tid);
961 ompt_callbacks.ompt_callback(ompt_callback_masked)(
962 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
963 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
964 OMPT_GET_RETURN_ADDRESS(0));
965 }
966#endif
967
968 if (__kmp_env_consistency_check) {
969 __kmp_pop_sync(global_tid, ct_masked, loc);
970 }
971}
972
980void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
981 int cid = 0;
982 kmp_info_t *th;
983 KMP_DEBUG_ASSERT(__kmp_init_serial);
984
985 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
986 __kmp_assert_valid_gtid(gtid);
987
988 if (!TCR_4(__kmp_init_parallel))
989 __kmp_parallel_initialize();
990
991 __kmp_resume_if_soft_paused();
992
993#if USE_ITT_BUILD
994 __kmp_itt_ordered_prep(gtid);
995// TODO: ordered_wait_id
996#endif /* USE_ITT_BUILD */
997
998 th = __kmp_threads[gtid];
999
1000#if OMPT_SUPPORT && OMPT_OPTIONAL
1001 kmp_team_t *team;
1002 ompt_wait_id_t lck;
1003 void *codeptr_ra;
1004 OMPT_STORE_RETURN_ADDRESS(gtid);
1005 if (ompt_enabled.enabled) {
1006 team = __kmp_team_from_gtid(gtid);
1007 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1008 /* OMPT state update */
1009 th->th.ompt_thread_info.wait_id = lck;
1010 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1011
1012 /* OMPT event callback */
1013 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1014 if (ompt_enabled.ompt_callback_mutex_acquire) {
1015 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1016 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1017 codeptr_ra);
1018 }
1019 }
1020#endif
1021
1022 if (th->th.th_dispatch->th_deo_fcn != 0)
1023 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1024 else
1025 __kmp_parallel_deo(&gtid, &cid, loc);
1026
1027#if OMPT_SUPPORT && OMPT_OPTIONAL
1028 if (ompt_enabled.enabled) {
1029 /* OMPT state update */
1030 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1031 th->th.ompt_thread_info.wait_id = 0;
1032
1033 /* OMPT event callback */
1034 if (ompt_enabled.ompt_callback_mutex_acquired) {
1035 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1036 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1037 }
1038 }
1039#endif
1040
1041#if USE_ITT_BUILD
1042 __kmp_itt_ordered_start(gtid);
1043#endif /* USE_ITT_BUILD */
1044}
1045
1053void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1054 int cid = 0;
1055 kmp_info_t *th;
1056
1057 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1058 __kmp_assert_valid_gtid(gtid);
1059
1060#if USE_ITT_BUILD
1061 __kmp_itt_ordered_end(gtid);
1062// TODO: ordered_wait_id
1063#endif /* USE_ITT_BUILD */
1064
1065 th = __kmp_threads[gtid];
1066
1067 if (th->th.th_dispatch->th_dxo_fcn != 0)
1068 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1069 else
1070 __kmp_parallel_dxo(&gtid, &cid, loc);
1071
1072#if OMPT_SUPPORT && OMPT_OPTIONAL
1073 OMPT_STORE_RETURN_ADDRESS(gtid);
1074 if (ompt_enabled.ompt_callback_mutex_released) {
1075 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1076 ompt_mutex_ordered,
1077 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1078 ->t.t_ordered.dt.t_value,
1079 OMPT_LOAD_RETURN_ADDRESS(gtid));
1080 }
1081#endif
1082}
1083
1084#if KMP_USE_DYNAMIC_LOCK
1085
1086static __forceinline void
1087__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1088 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1089 // Pointer to the allocated indirect lock is written to crit, while indexing
1090 // is ignored.
1091 void *idx;
1092 kmp_indirect_lock_t **lck;
1093 lck = (kmp_indirect_lock_t **)crit;
1094 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1095 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1096 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1097 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1098 KA_TRACE(20,
1099 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1100#if USE_ITT_BUILD
1101 __kmp_itt_critical_creating(ilk->lock, loc);
1102#endif
1103 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1104 if (status == 0) {
1105#if USE_ITT_BUILD
1106 __kmp_itt_critical_destroyed(ilk->lock);
1107#endif
1108 // We don't really need to destroy the unclaimed lock here since it will be
1109 // cleaned up at program exit.
1110 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1111 }
1112 KMP_DEBUG_ASSERT(*lck != NULL);
1113}
1114
1115// Fast-path acquire tas lock
1116#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1117 { \
1118 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1119 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1120 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1121 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1122 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1123 kmp_uint32 spins; \
1124 KMP_FSYNC_PREPARE(l); \
1125 KMP_INIT_YIELD(spins); \
1126 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1127 do { \
1128 if (TCR_4(__kmp_nth) > \
1129 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1130 KMP_YIELD(TRUE); \
1131 } else { \
1132 KMP_YIELD_SPIN(spins); \
1133 } \
1134 __kmp_spin_backoff(&backoff); \
1135 } while ( \
1136 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1137 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1138 } \
1139 KMP_FSYNC_ACQUIRED(l); \
1140 }
1141
1142// Fast-path test tas lock
1143#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1144 { \
1145 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1146 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1147 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1148 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1149 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1150 }
1151
1152// Fast-path release tas lock
1153#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1154 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1155
1156#if KMP_USE_FUTEX
1157
1158#include <sys/syscall.h>
1159#include <unistd.h>
1160#ifndef FUTEX_WAIT
1161#define FUTEX_WAIT 0
1162#endif
1163#ifndef FUTEX_WAKE
1164#define FUTEX_WAKE 1
1165#endif
1166
1167// Fast-path acquire futex lock
1168#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1169 { \
1170 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1171 kmp_int32 gtid_code = (gtid + 1) << 1; \
1172 KMP_MB(); \
1173 KMP_FSYNC_PREPARE(ftx); \
1174 kmp_int32 poll_val; \
1175 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1176 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1177 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1178 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1179 if (!cond) { \
1180 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1181 poll_val | \
1182 KMP_LOCK_BUSY(1, futex))) { \
1183 continue; \
1184 } \
1185 poll_val |= KMP_LOCK_BUSY(1, futex); \
1186 } \
1187 kmp_int32 rc; \
1188 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1189 NULL, NULL, 0)) != 0) { \
1190 continue; \
1191 } \
1192 gtid_code |= 1; \
1193 } \
1194 KMP_FSYNC_ACQUIRED(ftx); \
1195 }
1196
1197// Fast-path test futex lock
1198#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1199 { \
1200 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1201 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1202 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1203 KMP_FSYNC_ACQUIRED(ftx); \
1204 rc = TRUE; \
1205 } else { \
1206 rc = FALSE; \
1207 } \
1208 }
1209
1210// Fast-path release futex lock
1211#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1212 { \
1213 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1214 KMP_MB(); \
1215 KMP_FSYNC_RELEASING(ftx); \
1216 kmp_int32 poll_val = \
1217 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1218 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1219 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1220 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1221 } \
1222 KMP_MB(); \
1223 KMP_YIELD_OVERSUB(); \
1224 }
1225
1226#endif // KMP_USE_FUTEX
1227
1228#else // KMP_USE_DYNAMIC_LOCK
1229
1230static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1231 ident_t const *loc,
1232 kmp_int32 gtid) {
1233 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1234
1235 // Because of the double-check, the following load doesn't need to be volatile
1236 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1237
1238 if (lck == NULL) {
1239 void *idx;
1240
1241 // Allocate & initialize the lock.
1242 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1243 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1244 __kmp_init_user_lock_with_checks(lck);
1245 __kmp_set_user_lock_location(lck, loc);
1246#if USE_ITT_BUILD
1247 __kmp_itt_critical_creating(lck);
1248// __kmp_itt_critical_creating() should be called *before* the first usage
1249// of underlying lock. It is the only place where we can guarantee it. There
1250// are chances the lock will destroyed with no usage, but it is not a
1251// problem, because this is not real event seen by user but rather setting
1252// name for object (lock). See more details in kmp_itt.h.
1253#endif /* USE_ITT_BUILD */
1254
1255 // Use a cmpxchg instruction to slam the start of the critical section with
1256 // the lock pointer. If another thread beat us to it, deallocate the lock,
1257 // and use the lock that the other thread allocated.
1258 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1259
1260 if (status == 0) {
1261// Deallocate the lock and reload the value.
1262#if USE_ITT_BUILD
1263 __kmp_itt_critical_destroyed(lck);
1264// Let ITT know the lock is destroyed and the same memory location may be reused
1265// for another purpose.
1266#endif /* USE_ITT_BUILD */
1267 __kmp_destroy_user_lock_with_checks(lck);
1268 __kmp_user_lock_free(&idx, gtid, lck);
1269 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1270 KMP_DEBUG_ASSERT(lck != NULL);
1271 }
1272 }
1273 return lck;
1274}
1275
1276#endif // KMP_USE_DYNAMIC_LOCK
1277
1288void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1289 kmp_critical_name *crit) {
1290#if KMP_USE_DYNAMIC_LOCK
1291#if OMPT_SUPPORT && OMPT_OPTIONAL
1292 OMPT_STORE_RETURN_ADDRESS(global_tid);
1293#endif // OMPT_SUPPORT
1294 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1295#else
1296 KMP_COUNT_BLOCK(OMP_CRITICAL);
1297#if OMPT_SUPPORT && OMPT_OPTIONAL
1298 ompt_state_t prev_state = ompt_state_undefined;
1299 ompt_thread_info_t ti;
1300#endif
1301 kmp_user_lock_p lck;
1302
1303 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1304 __kmp_assert_valid_gtid(global_tid);
1305
1306 // TODO: add THR_OVHD_STATE
1307
1308 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1309 KMP_CHECK_USER_LOCK_INIT();
1310
1311 if ((__kmp_user_lock_kind == lk_tas) &&
1312 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1313 lck = (kmp_user_lock_p)crit;
1314 }
1315#if KMP_USE_FUTEX
1316 else if ((__kmp_user_lock_kind == lk_futex) &&
1317 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1318 lck = (kmp_user_lock_p)crit;
1319 }
1320#endif
1321 else { // ticket, queuing or drdpa
1322 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1323 }
1324
1325 if (__kmp_env_consistency_check)
1326 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1327
1328 // since the critical directive binds to all threads, not just the current
1329 // team we have to check this even if we are in a serialized team.
1330 // also, even if we are the uber thread, we still have to conduct the lock,
1331 // as we have to contend with sibling threads.
1332
1333#if USE_ITT_BUILD
1334 __kmp_itt_critical_acquiring(lck);
1335#endif /* USE_ITT_BUILD */
1336#if OMPT_SUPPORT && OMPT_OPTIONAL
1337 OMPT_STORE_RETURN_ADDRESS(gtid);
1338 void *codeptr_ra = NULL;
1339 if (ompt_enabled.enabled) {
1340 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1341 /* OMPT state update */
1342 prev_state = ti.state;
1343 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1344 ti.state = ompt_state_wait_critical;
1345
1346 /* OMPT event callback */
1347 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1348 if (ompt_enabled.ompt_callback_mutex_acquire) {
1349 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1350 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1351 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1352 }
1353 }
1354#endif
1355 // Value of 'crit' should be good for using as a critical_id of the critical
1356 // section directive.
1357 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1358
1359#if USE_ITT_BUILD
1360 __kmp_itt_critical_acquired(lck);
1361#endif /* USE_ITT_BUILD */
1362#if OMPT_SUPPORT && OMPT_OPTIONAL
1363 if (ompt_enabled.enabled) {
1364 /* OMPT state update */
1365 ti.state = prev_state;
1366 ti.wait_id = 0;
1367
1368 /* OMPT event callback */
1369 if (ompt_enabled.ompt_callback_mutex_acquired) {
1370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1371 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1372 }
1373 }
1374#endif
1375 KMP_POP_PARTITIONED_TIMER();
1376
1377 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1378 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1379#endif // KMP_USE_DYNAMIC_LOCK
1380}
1381
1382#if KMP_USE_DYNAMIC_LOCK
1383
1384// Converts the given hint to an internal lock implementation
1385static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1386#if KMP_USE_TSX
1387#define KMP_TSX_LOCK(seq) lockseq_##seq
1388#else
1389#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1390#endif
1391
1392#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1393#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1394#else
1395#define KMP_CPUINFO_RTM 0
1396#endif
1397
1398 // Hints that do not require further logic
1399 if (hint & kmp_lock_hint_hle)
1400 return KMP_TSX_LOCK(hle);
1401 if (hint & kmp_lock_hint_rtm)
1402 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1403 if (hint & kmp_lock_hint_adaptive)
1404 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1405
1406 // Rule out conflicting hints first by returning the default lock
1407 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1408 return __kmp_user_lock_seq;
1409 if ((hint & omp_lock_hint_speculative) &&
1410 (hint & omp_lock_hint_nonspeculative))
1411 return __kmp_user_lock_seq;
1412
1413 // Do not even consider speculation when it appears to be contended
1414 if (hint & omp_lock_hint_contended)
1415 return lockseq_queuing;
1416
1417 // Uncontended lock without speculation
1418 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1419 return lockseq_tas;
1420
1421 // Use RTM lock for speculation
1422 if (hint & omp_lock_hint_speculative)
1423 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1424
1425 return __kmp_user_lock_seq;
1426}
1427
1428#if OMPT_SUPPORT && OMPT_OPTIONAL
1429#if KMP_USE_DYNAMIC_LOCK
1430static kmp_mutex_impl_t
1431__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1432 if (user_lock) {
1433 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1434 case 0:
1435 break;
1436#if KMP_USE_FUTEX
1437 case locktag_futex:
1438 return kmp_mutex_impl_queuing;
1439#endif
1440 case locktag_tas:
1441 return kmp_mutex_impl_spin;
1442#if KMP_USE_TSX
1443 case locktag_hle:
1444 case locktag_rtm_spin:
1445 return kmp_mutex_impl_speculative;
1446#endif
1447 default:
1448 return kmp_mutex_impl_none;
1449 }
1450 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1451 }
1452 KMP_ASSERT(ilock);
1453 switch (ilock->type) {
1454#if KMP_USE_TSX
1455 case locktag_adaptive:
1456 case locktag_rtm_queuing:
1457 return kmp_mutex_impl_speculative;
1458#endif
1459 case locktag_nested_tas:
1460 return kmp_mutex_impl_spin;
1461#if KMP_USE_FUTEX
1462 case locktag_nested_futex:
1463#endif
1464 case locktag_ticket:
1465 case locktag_queuing:
1466 case locktag_drdpa:
1467 case locktag_nested_ticket:
1468 case locktag_nested_queuing:
1469 case locktag_nested_drdpa:
1470 return kmp_mutex_impl_queuing;
1471 default:
1472 return kmp_mutex_impl_none;
1473 }
1474}
1475#else
1476// For locks without dynamic binding
1477static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1478 switch (__kmp_user_lock_kind) {
1479 case lk_tas:
1480 return kmp_mutex_impl_spin;
1481#if KMP_USE_FUTEX
1482 case lk_futex:
1483#endif
1484 case lk_ticket:
1485 case lk_queuing:
1486 case lk_drdpa:
1487 return kmp_mutex_impl_queuing;
1488#if KMP_USE_TSX
1489 case lk_hle:
1490 case lk_rtm_queuing:
1491 case lk_rtm_spin:
1492 case lk_adaptive:
1493 return kmp_mutex_impl_speculative;
1494#endif
1495 default:
1496 return kmp_mutex_impl_none;
1497 }
1498}
1499#endif // KMP_USE_DYNAMIC_LOCK
1500#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1501
1515void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1516 kmp_critical_name *crit, uint32_t hint) {
1517 KMP_COUNT_BLOCK(OMP_CRITICAL);
1518 kmp_user_lock_p lck;
1519#if OMPT_SUPPORT && OMPT_OPTIONAL
1520 ompt_state_t prev_state = ompt_state_undefined;
1521 ompt_thread_info_t ti;
1522 // This is the case, if called from __kmpc_critical:
1523 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1524 if (!codeptr)
1525 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1526#endif
1527
1528 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1529 __kmp_assert_valid_gtid(global_tid);
1530
1531 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1532 // Check if it is initialized.
1533 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1534 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1535 if (*lk == 0) {
1536 if (KMP_IS_D_LOCK(lockseq)) {
1537 KMP_COMPARE_AND_STORE_ACQ32(
1538 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1539 KMP_GET_D_TAG(lockseq));
1540 } else {
1541 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1542 }
1543 }
1544 // Branch for accessing the actual lock object and set operation. This
1545 // branching is inevitable since this lock initialization does not follow the
1546 // normal dispatch path (lock table is not used).
1547 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1548 lck = (kmp_user_lock_p)lk;
1549 if (__kmp_env_consistency_check) {
1550 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1551 __kmp_map_hint_to_lock(hint));
1552 }
1553#if USE_ITT_BUILD
1554 __kmp_itt_critical_acquiring(lck);
1555#endif
1556#if OMPT_SUPPORT && OMPT_OPTIONAL
1557 if (ompt_enabled.enabled) {
1558 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1559 /* OMPT state update */
1560 prev_state = ti.state;
1561 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1562 ti.state = ompt_state_wait_critical;
1563
1564 /* OMPT event callback */
1565 if (ompt_enabled.ompt_callback_mutex_acquire) {
1566 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1567 ompt_mutex_critical, (unsigned int)hint,
1568 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1569 codeptr);
1570 }
1571 }
1572#endif
1573#if KMP_USE_INLINED_TAS
1574 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1575 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1576 } else
1577#elif KMP_USE_INLINED_FUTEX
1578 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1579 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1580 } else
1581#endif
1582 {
1583 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1584 }
1585 } else {
1586 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1587 lck = ilk->lock;
1588 if (__kmp_env_consistency_check) {
1589 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1590 __kmp_map_hint_to_lock(hint));
1591 }
1592#if USE_ITT_BUILD
1593 __kmp_itt_critical_acquiring(lck);
1594#endif
1595#if OMPT_SUPPORT && OMPT_OPTIONAL
1596 if (ompt_enabled.enabled) {
1597 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1598 /* OMPT state update */
1599 prev_state = ti.state;
1600 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1601 ti.state = ompt_state_wait_critical;
1602
1603 /* OMPT event callback */
1604 if (ompt_enabled.ompt_callback_mutex_acquire) {
1605 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1606 ompt_mutex_critical, (unsigned int)hint,
1607 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1608 codeptr);
1609 }
1610 }
1611#endif
1612 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1613 }
1614 KMP_POP_PARTITIONED_TIMER();
1615
1616#if USE_ITT_BUILD
1617 __kmp_itt_critical_acquired(lck);
1618#endif /* USE_ITT_BUILD */
1619#if OMPT_SUPPORT && OMPT_OPTIONAL
1620 if (ompt_enabled.enabled) {
1621 /* OMPT state update */
1622 ti.state = prev_state;
1623 ti.wait_id = 0;
1624
1625 /* OMPT event callback */
1626 if (ompt_enabled.ompt_callback_mutex_acquired) {
1627 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1628 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1629 }
1630 }
1631#endif
1632
1633 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1634 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1635} // __kmpc_critical_with_hint
1636
1637#endif // KMP_USE_DYNAMIC_LOCK
1638
1648void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1649 kmp_critical_name *crit) {
1650 kmp_user_lock_p lck;
1651
1652 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1653
1654#if KMP_USE_DYNAMIC_LOCK
1655 int locktag = KMP_EXTRACT_D_TAG(crit);
1656 if (locktag) {
1657 lck = (kmp_user_lock_p)crit;
1658 KMP_ASSERT(lck != NULL);
1659 if (__kmp_env_consistency_check) {
1660 __kmp_pop_sync(global_tid, ct_critical, loc);
1661 }
1662#if USE_ITT_BUILD
1663 __kmp_itt_critical_releasing(lck);
1664#endif
1665#if KMP_USE_INLINED_TAS
1666 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1667 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1668 } else
1669#elif KMP_USE_INLINED_FUTEX
1670 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1671 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1672 } else
1673#endif
1674 {
1675 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1676 }
1677 } else {
1678 kmp_indirect_lock_t *ilk =
1679 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1680 KMP_ASSERT(ilk != NULL);
1681 lck = ilk->lock;
1682 if (__kmp_env_consistency_check) {
1683 __kmp_pop_sync(global_tid, ct_critical, loc);
1684 }
1685#if USE_ITT_BUILD
1686 __kmp_itt_critical_releasing(lck);
1687#endif
1688 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1689 }
1690
1691#else // KMP_USE_DYNAMIC_LOCK
1692
1693 if ((__kmp_user_lock_kind == lk_tas) &&
1694 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1695 lck = (kmp_user_lock_p)crit;
1696 }
1697#if KMP_USE_FUTEX
1698 else if ((__kmp_user_lock_kind == lk_futex) &&
1699 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1700 lck = (kmp_user_lock_p)crit;
1701 }
1702#endif
1703 else { // ticket, queuing or drdpa
1704 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1705 }
1706
1707 KMP_ASSERT(lck != NULL);
1708
1709 if (__kmp_env_consistency_check)
1710 __kmp_pop_sync(global_tid, ct_critical, loc);
1711
1712#if USE_ITT_BUILD
1713 __kmp_itt_critical_releasing(lck);
1714#endif /* USE_ITT_BUILD */
1715 // Value of 'crit' should be good for using as a critical_id of the critical
1716 // section directive.
1717 __kmp_release_user_lock_with_checks(lck, global_tid);
1718
1719#endif // KMP_USE_DYNAMIC_LOCK
1720
1721#if OMPT_SUPPORT && OMPT_OPTIONAL
1722 /* OMPT release event triggers after lock is released; place here to trigger
1723 * for all #if branches */
1724 OMPT_STORE_RETURN_ADDRESS(global_tid);
1725 if (ompt_enabled.ompt_callback_mutex_released) {
1726 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1727 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1728 OMPT_LOAD_RETURN_ADDRESS(0));
1729 }
1730#endif
1731
1732 KMP_POP_PARTITIONED_TIMER();
1733 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1734}
1735
1745kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1746 int status;
1747 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1748 __kmp_assert_valid_gtid(global_tid);
1749
1750 if (!TCR_4(__kmp_init_parallel))
1751 __kmp_parallel_initialize();
1752
1753 __kmp_resume_if_soft_paused();
1754
1755 if (__kmp_env_consistency_check)
1756 __kmp_check_barrier(global_tid, ct_barrier, loc);
1757
1758#if OMPT_SUPPORT
1759 ompt_frame_t *ompt_frame;
1760 if (ompt_enabled.enabled) {
1761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1762 if (ompt_frame->enter_frame.ptr == NULL)
1763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1764 }
1765 OMPT_STORE_RETURN_ADDRESS(global_tid);
1766#endif
1767#if USE_ITT_NOTIFY
1768 __kmp_threads[global_tid]->th.th_ident = loc;
1769#endif
1770 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1771#if OMPT_SUPPORT && OMPT_OPTIONAL
1772 if (ompt_enabled.enabled) {
1773 ompt_frame->enter_frame = ompt_data_none;
1774 }
1775#endif
1776
1777 return (status != 0) ? 0 : 1;
1778}
1779
1789void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1790 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1791 __kmp_assert_valid_gtid(global_tid);
1792 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1793}
1794
1805kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1806 kmp_int32 ret;
1807 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1808 __kmp_assert_valid_gtid(global_tid);
1809
1810 if (!TCR_4(__kmp_init_parallel))
1811 __kmp_parallel_initialize();
1812
1813 __kmp_resume_if_soft_paused();
1814
1815 if (__kmp_env_consistency_check) {
1816 if (loc == 0) {
1817 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1818 }
1819 __kmp_check_barrier(global_tid, ct_barrier, loc);
1820 }
1821
1822#if OMPT_SUPPORT
1823 ompt_frame_t *ompt_frame;
1824 if (ompt_enabled.enabled) {
1825 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1826 if (ompt_frame->enter_frame.ptr == NULL)
1827 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1828 }
1829 OMPT_STORE_RETURN_ADDRESS(global_tid);
1830#endif
1831#if USE_ITT_NOTIFY
1832 __kmp_threads[global_tid]->th.th_ident = loc;
1833#endif
1834 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1835#if OMPT_SUPPORT && OMPT_OPTIONAL
1836 if (ompt_enabled.enabled) {
1837 ompt_frame->enter_frame = ompt_data_none;
1838 }
1839#endif
1840
1841 ret = __kmpc_master(loc, global_tid);
1842
1843 if (__kmp_env_consistency_check) {
1844 /* there's no __kmpc_end_master called; so the (stats) */
1845 /* actions of __kmpc_end_master are done here */
1846 if (ret) {
1847 /* only one thread should do the pop since only */
1848 /* one did the push (see __kmpc_master()) */
1849 __kmp_pop_sync(global_tid, ct_master, loc);
1850 }
1851 }
1852
1853 return (ret);
1854}
1855
1856/* The BARRIER for a SINGLE process section is always explicit */
1868kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1869 __kmp_assert_valid_gtid(global_tid);
1870 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1871
1872 if (rc) {
1873 // We are going to execute the single statement, so we should count it.
1874 KMP_COUNT_BLOCK(OMP_SINGLE);
1875 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1876 }
1877
1878#if OMPT_SUPPORT && OMPT_OPTIONAL
1879 kmp_info_t *this_thr = __kmp_threads[global_tid];
1880 kmp_team_t *team = this_thr->th.th_team;
1881 int tid = __kmp_tid_from_gtid(global_tid);
1882
1883 if (ompt_enabled.enabled) {
1884 if (rc) {
1885 if (ompt_enabled.ompt_callback_work) {
1886 ompt_callbacks.ompt_callback(ompt_callback_work)(
1887 ompt_work_single_executor, ompt_scope_begin,
1888 &(team->t.ompt_team_info.parallel_data),
1889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1890 1, OMPT_GET_RETURN_ADDRESS(0));
1891 }
1892 } else {
1893 if (ompt_enabled.ompt_callback_work) {
1894 ompt_callbacks.ompt_callback(ompt_callback_work)(
1895 ompt_work_single_other, ompt_scope_begin,
1896 &(team->t.ompt_team_info.parallel_data),
1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1898 1, OMPT_GET_RETURN_ADDRESS(0));
1899 ompt_callbacks.ompt_callback(ompt_callback_work)(
1900 ompt_work_single_other, ompt_scope_end,
1901 &(team->t.ompt_team_info.parallel_data),
1902 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1903 1, OMPT_GET_RETURN_ADDRESS(0));
1904 }
1905 }
1906 }
1907#endif
1908
1909 return rc;
1910}
1911
1921void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1922 __kmp_assert_valid_gtid(global_tid);
1923 __kmp_exit_single(global_tid);
1924 KMP_POP_PARTITIONED_TIMER();
1925
1926#if OMPT_SUPPORT && OMPT_OPTIONAL
1927 kmp_info_t *this_thr = __kmp_threads[global_tid];
1928 kmp_team_t *team = this_thr->th.th_team;
1929 int tid = __kmp_tid_from_gtid(global_tid);
1930
1931 if (ompt_enabled.ompt_callback_work) {
1932 ompt_callbacks.ompt_callback(ompt_callback_work)(
1933 ompt_work_single_executor, ompt_scope_end,
1934 &(team->t.ompt_team_info.parallel_data),
1935 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1936 OMPT_GET_RETURN_ADDRESS(0));
1937 }
1938#endif
1939}
1940
1948void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1949 KMP_POP_PARTITIONED_TIMER();
1950 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1951
1952#if OMPT_SUPPORT && OMPT_OPTIONAL
1953 if (ompt_enabled.ompt_callback_work) {
1954 ompt_work_t ompt_work_type = ompt_work_loop;
1955 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1956 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1957 // Determine workshare type
1958 if (loc != NULL) {
1959 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1960 ompt_work_type = ompt_work_loop;
1961 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1962 ompt_work_type = ompt_work_sections;
1963 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1964 ompt_work_type = ompt_work_distribute;
1965 } else {
1966 // use default set above.
1967 // a warning about this case is provided in __kmpc_for_static_init
1968 }
1969 KMP_DEBUG_ASSERT(ompt_work_type);
1970 }
1971 ompt_callbacks.ompt_callback(ompt_callback_work)(
1972 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1973 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1974 }
1975#endif
1976 if (__kmp_env_consistency_check)
1977 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1978}
1979
1980// User routines which take C-style arguments (call by value)
1981// different from the Fortran equivalent routines
1982
1983void ompc_set_num_threads(int arg) {
1984 // !!!!! TODO: check the per-task binding
1985 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1986}
1987
1988void ompc_set_dynamic(int flag) {
1989 kmp_info_t *thread;
1990
1991 /* For the thread-private implementation of the internal controls */
1992 thread = __kmp_entry_thread();
1993
1994 __kmp_save_internal_controls(thread);
1995
1996 set__dynamic(thread, flag ? true : false);
1997}
1998
1999void ompc_set_nested(int flag) {
2000 kmp_info_t *thread;
2001
2002 /* For the thread-private internal controls implementation */
2003 thread = __kmp_entry_thread();
2004
2005 __kmp_save_internal_controls(thread);
2006
2007 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2008}
2009
2010void ompc_set_max_active_levels(int max_active_levels) {
2011 /* TO DO */
2012 /* we want per-task implementation of this internal control */
2013
2014 /* For the per-thread internal controls implementation */
2015 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2016}
2017
2018void ompc_set_schedule(omp_sched_t kind, int modifier) {
2019 // !!!!! TODO: check the per-task binding
2020 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2021}
2022
2023int ompc_get_ancestor_thread_num(int level) {
2024 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2025}
2026
2027int ompc_get_team_size(int level) {
2028 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2029}
2030
2031/* OpenMP 5.0 Affinity Format API */
2032void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2033 if (!__kmp_init_serial) {
2034 __kmp_serial_initialize();
2035 }
2036 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2037 format, KMP_STRLEN(format) + 1);
2038}
2039
2040size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2041 size_t format_size;
2042 if (!__kmp_init_serial) {
2043 __kmp_serial_initialize();
2044 }
2045 format_size = KMP_STRLEN(__kmp_affinity_format);
2046 if (buffer && size) {
2047 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2048 format_size + 1);
2049 }
2050 return format_size;
2051}
2052
2053void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2054 int gtid;
2055 if (!TCR_4(__kmp_init_middle)) {
2056 __kmp_middle_initialize();
2057 }
2058 __kmp_assign_root_init_mask();
2059 gtid = __kmp_get_gtid();
2060#if KMP_AFFINITY_SUPPORTED
2061 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2062 __kmp_affinity.flags.reset) {
2063 __kmp_reset_root_init_mask(gtid);
2064 }
2065#endif
2066 __kmp_aux_display_affinity(gtid, format);
2067}
2068
2069size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2070 char const *format) {
2071 int gtid;
2072 size_t num_required;
2073 kmp_str_buf_t capture_buf;
2074 if (!TCR_4(__kmp_init_middle)) {
2075 __kmp_middle_initialize();
2076 }
2077 __kmp_assign_root_init_mask();
2078 gtid = __kmp_get_gtid();
2079#if KMP_AFFINITY_SUPPORTED
2080 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2081 __kmp_affinity.flags.reset) {
2082 __kmp_reset_root_init_mask(gtid);
2083 }
2084#endif
2085 __kmp_str_buf_init(&capture_buf);
2086 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2087 if (buffer && buf_size) {
2088 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2089 capture_buf.used + 1);
2090 }
2091 __kmp_str_buf_free(&capture_buf);
2092 return num_required;
2093}
2094
2095void kmpc_set_stacksize(int arg) {
2096 // __kmp_aux_set_stacksize initializes the library if needed
2097 __kmp_aux_set_stacksize(arg);
2098}
2099
2100void kmpc_set_stacksize_s(size_t arg) {
2101 // __kmp_aux_set_stacksize initializes the library if needed
2102 __kmp_aux_set_stacksize(arg);
2103}
2104
2105void kmpc_set_blocktime(int arg) {
2106 int gtid, tid, bt = arg;
2107 kmp_info_t *thread;
2108
2109 gtid = __kmp_entry_gtid();
2110 tid = __kmp_tid_from_gtid(gtid);
2111 thread = __kmp_thread_from_gtid(gtid);
2112
2113 __kmp_aux_convert_blocktime(&bt);
2114 __kmp_aux_set_blocktime(bt, thread, tid);
2115}
2116
2117void kmpc_set_library(int arg) {
2118 // __kmp_user_set_library initializes the library if needed
2119 __kmp_user_set_library((enum library_type)arg);
2120}
2121
2122void kmpc_set_defaults(char const *str) {
2123 // __kmp_aux_set_defaults initializes the library if needed
2124 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2125}
2126
2127void kmpc_set_disp_num_buffers(int arg) {
2128 // ignore after initialization because some teams have already
2129 // allocated dispatch buffers
2130 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2131 arg <= KMP_MAX_DISP_NUM_BUFF) {
2132 __kmp_dispatch_num_buffers = arg;
2133 }
2134}
2135
2136int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2137#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2138 return -1;
2139#else
2140 if (!TCR_4(__kmp_init_middle)) {
2141 __kmp_middle_initialize();
2142 }
2143 __kmp_assign_root_init_mask();
2144 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2145#endif
2146}
2147
2148int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2149#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2150 return -1;
2151#else
2152 if (!TCR_4(__kmp_init_middle)) {
2153 __kmp_middle_initialize();
2154 }
2155 __kmp_assign_root_init_mask();
2156 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2157#endif
2158}
2159
2160int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2161#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2162 return -1;
2163#else
2164 if (!TCR_4(__kmp_init_middle)) {
2165 __kmp_middle_initialize();
2166 }
2167 __kmp_assign_root_init_mask();
2168 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2169#endif
2170}
2171
2172/* -------------------------------------------------------------------------- */
2217void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2218 void *cpy_data, void (*cpy_func)(void *, void *),
2219 kmp_int32 didit) {
2220 void **data_ptr;
2221 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2222 __kmp_assert_valid_gtid(gtid);
2223
2224 KMP_MB();
2225
2226 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2227
2228 if (__kmp_env_consistency_check) {
2229 if (loc == 0) {
2230 KMP_WARNING(ConstructIdentInvalid);
2231 }
2232 }
2233
2234 // ToDo: Optimize the following two barriers into some kind of split barrier
2235
2236 if (didit)
2237 *data_ptr = cpy_data;
2238
2239#if OMPT_SUPPORT
2240 ompt_frame_t *ompt_frame;
2241 if (ompt_enabled.enabled) {
2242 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2243 if (ompt_frame->enter_frame.ptr == NULL)
2244 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2245 }
2246 OMPT_STORE_RETURN_ADDRESS(gtid);
2247#endif
2248/* This barrier is not a barrier region boundary */
2249#if USE_ITT_NOTIFY
2250 __kmp_threads[gtid]->th.th_ident = loc;
2251#endif
2252 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2253
2254 if (!didit)
2255 (*cpy_func)(cpy_data, *data_ptr);
2256
2257 // Consider next barrier a user-visible barrier for barrier region boundaries
2258 // Nesting checks are already handled by the single construct checks
2259 {
2260#if OMPT_SUPPORT
2261 OMPT_STORE_RETURN_ADDRESS(gtid);
2262#endif
2263#if USE_ITT_NOTIFY
2264 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2265// tasks can overwrite the location)
2266#endif
2267 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2268#if OMPT_SUPPORT && OMPT_OPTIONAL
2269 if (ompt_enabled.enabled) {
2270 ompt_frame->enter_frame = ompt_data_none;
2271 }
2272#endif
2273 }
2274}
2275
2276/* --------------------------------------------------------------------------*/
2293void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2294 void **data_ptr;
2295
2296 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2297
2298 KMP_MB();
2299
2300 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2301
2302 if (__kmp_env_consistency_check) {
2303 if (loc == 0) {
2304 KMP_WARNING(ConstructIdentInvalid);
2305 }
2306 }
2307
2308 // ToDo: Optimize the following barrier
2309
2310 if (cpy_data)
2311 *data_ptr = cpy_data;
2312
2313#if OMPT_SUPPORT
2314 ompt_frame_t *ompt_frame;
2315 if (ompt_enabled.enabled) {
2316 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2317 if (ompt_frame->enter_frame.ptr == NULL)
2318 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2319 OMPT_STORE_RETURN_ADDRESS(gtid);
2320 }
2321#endif
2322/* This barrier is not a barrier region boundary */
2323#if USE_ITT_NOTIFY
2324 __kmp_threads[gtid]->th.th_ident = loc;
2325#endif
2326 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2327
2328 return *data_ptr;
2329}
2330
2331/* -------------------------------------------------------------------------- */
2332
2333#define INIT_LOCK __kmp_init_user_lock_with_checks
2334#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2335#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2336#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2337#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2338#define ACQUIRE_NESTED_LOCK_TIMED \
2339 __kmp_acquire_nested_user_lock_with_checks_timed
2340#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2341#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2342#define TEST_LOCK __kmp_test_user_lock_with_checks
2343#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2344#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2345#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2346
2347// TODO: Make check abort messages use location info & pass it into
2348// with_checks routines
2349
2350#if KMP_USE_DYNAMIC_LOCK
2351
2352// internal lock initializer
2353static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2354 kmp_dyna_lockseq_t seq) {
2355 if (KMP_IS_D_LOCK(seq)) {
2356 KMP_INIT_D_LOCK(lock, seq);
2357#if USE_ITT_BUILD
2358 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2359#endif
2360 } else {
2361 KMP_INIT_I_LOCK(lock, seq);
2362#if USE_ITT_BUILD
2363 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2364 __kmp_itt_lock_creating(ilk->lock, loc);
2365#endif
2366 }
2367}
2368
2369// internal nest lock initializer
2370static __forceinline void
2371__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2372 kmp_dyna_lockseq_t seq) {
2373#if KMP_USE_TSX
2374 // Don't have nested lock implementation for speculative locks
2375 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2376 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2377 seq = __kmp_user_lock_seq;
2378#endif
2379 switch (seq) {
2380 case lockseq_tas:
2381 seq = lockseq_nested_tas;
2382 break;
2383#if KMP_USE_FUTEX
2384 case lockseq_futex:
2385 seq = lockseq_nested_futex;
2386 break;
2387#endif
2388 case lockseq_ticket:
2389 seq = lockseq_nested_ticket;
2390 break;
2391 case lockseq_queuing:
2392 seq = lockseq_nested_queuing;
2393 break;
2394 case lockseq_drdpa:
2395 seq = lockseq_nested_drdpa;
2396 break;
2397 default:
2398 seq = lockseq_nested_queuing;
2399 }
2400 KMP_INIT_I_LOCK(lock, seq);
2401#if USE_ITT_BUILD
2402 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2403 __kmp_itt_lock_creating(ilk->lock, loc);
2404#endif
2405}
2406
2407/* initialize the lock with a hint */
2408void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2409 uintptr_t hint) {
2410 KMP_DEBUG_ASSERT(__kmp_init_serial);
2411 if (__kmp_env_consistency_check && user_lock == NULL) {
2412 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2413 }
2414
2415 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2416
2417#if OMPT_SUPPORT && OMPT_OPTIONAL
2418 // This is the case, if called from omp_init_lock_with_hint:
2419 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2420 if (!codeptr)
2421 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2422 if (ompt_enabled.ompt_callback_lock_init) {
2423 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2424 ompt_mutex_lock, (omp_lock_hint_t)hint,
2425 __ompt_get_mutex_impl_type(user_lock),
2426 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2427 }
2428#endif
2429}
2430
2431/* initialize the lock with a hint */
2432void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2433 void **user_lock, uintptr_t hint) {
2434 KMP_DEBUG_ASSERT(__kmp_init_serial);
2435 if (__kmp_env_consistency_check && user_lock == NULL) {
2436 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2437 }
2438
2439 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2440
2441#if OMPT_SUPPORT && OMPT_OPTIONAL
2442 // This is the case, if called from omp_init_lock_with_hint:
2443 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2444 if (!codeptr)
2445 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2446 if (ompt_enabled.ompt_callback_lock_init) {
2447 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2448 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2449 __ompt_get_mutex_impl_type(user_lock),
2450 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2451 }
2452#endif
2453}
2454
2455#endif // KMP_USE_DYNAMIC_LOCK
2456
2457/* initialize the lock */
2458void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2459#if KMP_USE_DYNAMIC_LOCK
2460
2461 KMP_DEBUG_ASSERT(__kmp_init_serial);
2462 if (__kmp_env_consistency_check && user_lock == NULL) {
2463 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2464 }
2465 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2466
2467#if OMPT_SUPPORT && OMPT_OPTIONAL
2468 // This is the case, if called from omp_init_lock_with_hint:
2469 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2470 if (!codeptr)
2471 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2472 if (ompt_enabled.ompt_callback_lock_init) {
2473 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2474 ompt_mutex_lock, omp_lock_hint_none,
2475 __ompt_get_mutex_impl_type(user_lock),
2476 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2477 }
2478#endif
2479
2480#else // KMP_USE_DYNAMIC_LOCK
2481
2482 static char const *const func = "omp_init_lock";
2483 kmp_user_lock_p lck;
2484 KMP_DEBUG_ASSERT(__kmp_init_serial);
2485
2486 if (__kmp_env_consistency_check) {
2487 if (user_lock == NULL) {
2488 KMP_FATAL(LockIsUninitialized, func);
2489 }
2490 }
2491
2492 KMP_CHECK_USER_LOCK_INIT();
2493
2494 if ((__kmp_user_lock_kind == lk_tas) &&
2495 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2496 lck = (kmp_user_lock_p)user_lock;
2497 }
2498#if KMP_USE_FUTEX
2499 else if ((__kmp_user_lock_kind == lk_futex) &&
2500 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2501 lck = (kmp_user_lock_p)user_lock;
2502 }
2503#endif
2504 else {
2505 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2506 }
2507 INIT_LOCK(lck);
2508 __kmp_set_user_lock_location(lck, loc);
2509
2510#if OMPT_SUPPORT && OMPT_OPTIONAL
2511 // This is the case, if called from omp_init_lock_with_hint:
2512 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2513 if (!codeptr)
2514 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2515 if (ompt_enabled.ompt_callback_lock_init) {
2516 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2517 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2518 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2519 }
2520#endif
2521
2522#if USE_ITT_BUILD
2523 __kmp_itt_lock_creating(lck);
2524#endif /* USE_ITT_BUILD */
2525
2526#endif // KMP_USE_DYNAMIC_LOCK
2527} // __kmpc_init_lock
2528
2529/* initialize the lock */
2530void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2531#if KMP_USE_DYNAMIC_LOCK
2532
2533 KMP_DEBUG_ASSERT(__kmp_init_serial);
2534 if (__kmp_env_consistency_check && user_lock == NULL) {
2535 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2536 }
2537 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2538
2539#if OMPT_SUPPORT && OMPT_OPTIONAL
2540 // This is the case, if called from omp_init_lock_with_hint:
2541 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2542 if (!codeptr)
2543 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2544 if (ompt_enabled.ompt_callback_lock_init) {
2545 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2546 ompt_mutex_nest_lock, omp_lock_hint_none,
2547 __ompt_get_mutex_impl_type(user_lock),
2548 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2549 }
2550#endif
2551
2552#else // KMP_USE_DYNAMIC_LOCK
2553
2554 static char const *const func = "omp_init_nest_lock";
2555 kmp_user_lock_p lck;
2556 KMP_DEBUG_ASSERT(__kmp_init_serial);
2557
2558 if (__kmp_env_consistency_check) {
2559 if (user_lock == NULL) {
2560 KMP_FATAL(LockIsUninitialized, func);
2561 }
2562 }
2563
2564 KMP_CHECK_USER_LOCK_INIT();
2565
2566 if ((__kmp_user_lock_kind == lk_tas) &&
2567 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2568 OMP_NEST_LOCK_T_SIZE)) {
2569 lck = (kmp_user_lock_p)user_lock;
2570 }
2571#if KMP_USE_FUTEX
2572 else if ((__kmp_user_lock_kind == lk_futex) &&
2573 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2574 OMP_NEST_LOCK_T_SIZE)) {
2575 lck = (kmp_user_lock_p)user_lock;
2576 }
2577#endif
2578 else {
2579 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2580 }
2581
2582 INIT_NESTED_LOCK(lck);
2583 __kmp_set_user_lock_location(lck, loc);
2584
2585#if OMPT_SUPPORT && OMPT_OPTIONAL
2586 // This is the case, if called from omp_init_lock_with_hint:
2587 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2588 if (!codeptr)
2589 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2590 if (ompt_enabled.ompt_callback_lock_init) {
2591 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2592 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2593 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2594 }
2595#endif
2596
2597#if USE_ITT_BUILD
2598 __kmp_itt_lock_creating(lck);
2599#endif /* USE_ITT_BUILD */
2600
2601#endif // KMP_USE_DYNAMIC_LOCK
2602} // __kmpc_init_nest_lock
2603
2604void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2605#if KMP_USE_DYNAMIC_LOCK
2606
2607#if USE_ITT_BUILD
2608 kmp_user_lock_p lck;
2609 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2610 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2611 } else {
2612 lck = (kmp_user_lock_p)user_lock;
2613 }
2614 __kmp_itt_lock_destroyed(lck);
2615#endif
2616#if OMPT_SUPPORT && OMPT_OPTIONAL
2617 // This is the case, if called from omp_init_lock_with_hint:
2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619 if (!codeptr)
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_lock_destroy) {
2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2623 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2624 }
2625#endif
2626 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2627#else
2628 kmp_user_lock_p lck;
2629
2630 if ((__kmp_user_lock_kind == lk_tas) &&
2631 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2632 lck = (kmp_user_lock_p)user_lock;
2633 }
2634#if KMP_USE_FUTEX
2635 else if ((__kmp_user_lock_kind == lk_futex) &&
2636 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2637 lck = (kmp_user_lock_p)user_lock;
2638 }
2639#endif
2640 else {
2641 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2642 }
2643
2644#if OMPT_SUPPORT && OMPT_OPTIONAL
2645 // This is the case, if called from omp_init_lock_with_hint:
2646 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2647 if (!codeptr)
2648 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2649 if (ompt_enabled.ompt_callback_lock_destroy) {
2650 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2651 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2652 }
2653#endif
2654
2655#if USE_ITT_BUILD
2656 __kmp_itt_lock_destroyed(lck);
2657#endif /* USE_ITT_BUILD */
2658 DESTROY_LOCK(lck);
2659
2660 if ((__kmp_user_lock_kind == lk_tas) &&
2661 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2662 ;
2663 }
2664#if KMP_USE_FUTEX
2665 else if ((__kmp_user_lock_kind == lk_futex) &&
2666 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2667 ;
2668 }
2669#endif
2670 else {
2671 __kmp_user_lock_free(user_lock, gtid, lck);
2672 }
2673#endif // KMP_USE_DYNAMIC_LOCK
2674} // __kmpc_destroy_lock
2675
2676/* destroy the lock */
2677void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2678#if KMP_USE_DYNAMIC_LOCK
2679
2680#if USE_ITT_BUILD
2681 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2682 __kmp_itt_lock_destroyed(ilk->lock);
2683#endif
2684#if OMPT_SUPPORT && OMPT_OPTIONAL
2685 // This is the case, if called from omp_init_lock_with_hint:
2686 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2687 if (!codeptr)
2688 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2689 if (ompt_enabled.ompt_callback_lock_destroy) {
2690 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2691 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2692 }
2693#endif
2694 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2695
2696#else // KMP_USE_DYNAMIC_LOCK
2697
2698 kmp_user_lock_p lck;
2699
2700 if ((__kmp_user_lock_kind == lk_tas) &&
2701 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZE)) {
2703 lck = (kmp_user_lock_p)user_lock;
2704 }
2705#if KMP_USE_FUTEX
2706 else if ((__kmp_user_lock_kind == lk_futex) &&
2707 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2708 OMP_NEST_LOCK_T_SIZE)) {
2709 lck = (kmp_user_lock_p)user_lock;
2710 }
2711#endif
2712 else {
2713 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2714 }
2715
2716#if OMPT_SUPPORT && OMPT_OPTIONAL
2717 // This is the case, if called from omp_init_lock_with_hint:
2718 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2719 if (!codeptr)
2720 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2721 if (ompt_enabled.ompt_callback_lock_destroy) {
2722 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2723 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2724 }
2725#endif
2726
2727#if USE_ITT_BUILD
2728 __kmp_itt_lock_destroyed(lck);
2729#endif /* USE_ITT_BUILD */
2730
2731 DESTROY_NESTED_LOCK(lck);
2732
2733 if ((__kmp_user_lock_kind == lk_tas) &&
2734 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2735 OMP_NEST_LOCK_T_SIZE)) {
2736 ;
2737 }
2738#if KMP_USE_FUTEX
2739 else if ((__kmp_user_lock_kind == lk_futex) &&
2740 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2741 OMP_NEST_LOCK_T_SIZE)) {
2742 ;
2743 }
2744#endif
2745 else {
2746 __kmp_user_lock_free(user_lock, gtid, lck);
2747 }
2748#endif // KMP_USE_DYNAMIC_LOCK
2749} // __kmpc_destroy_nest_lock
2750
2751void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2752 KMP_COUNT_BLOCK(OMP_set_lock);
2753#if KMP_USE_DYNAMIC_LOCK
2754 int tag = KMP_EXTRACT_D_TAG(user_lock);
2755#if USE_ITT_BUILD
2756 __kmp_itt_lock_acquiring(
2757 (kmp_user_lock_p)
2758 user_lock); // itt function will get to the right lock object.
2759#endif
2760#if OMPT_SUPPORT && OMPT_OPTIONAL
2761 // This is the case, if called from omp_init_lock_with_hint:
2762 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2763 if (!codeptr)
2764 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2765 if (ompt_enabled.ompt_callback_mutex_acquire) {
2766 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2767 ompt_mutex_lock, omp_lock_hint_none,
2768 __ompt_get_mutex_impl_type(user_lock),
2769 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2770 }
2771#endif
2772#if KMP_USE_INLINED_TAS
2773 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2774 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2775 } else
2776#elif KMP_USE_INLINED_FUTEX
2777 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2778 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2779 } else
2780#endif
2781 {
2782 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2783 }
2784#if USE_ITT_BUILD
2785 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2786#endif
2787#if OMPT_SUPPORT && OMPT_OPTIONAL
2788 if (ompt_enabled.ompt_callback_mutex_acquired) {
2789 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2790 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2791 }
2792#endif
2793
2794#else // KMP_USE_DYNAMIC_LOCK
2795
2796 kmp_user_lock_p lck;
2797
2798 if ((__kmp_user_lock_kind == lk_tas) &&
2799 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2800 lck = (kmp_user_lock_p)user_lock;
2801 }
2802#if KMP_USE_FUTEX
2803 else if ((__kmp_user_lock_kind == lk_futex) &&
2804 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2805 lck = (kmp_user_lock_p)user_lock;
2806 }
2807#endif
2808 else {
2809 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2810 }
2811
2812#if USE_ITT_BUILD
2813 __kmp_itt_lock_acquiring(lck);
2814#endif /* USE_ITT_BUILD */
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2823 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2824 }
2825#endif
2826
2827 ACQUIRE_LOCK(lck, gtid);
2828
2829#if USE_ITT_BUILD
2830 __kmp_itt_lock_acquired(lck);
2831#endif /* USE_ITT_BUILD */
2832
2833#if OMPT_SUPPORT && OMPT_OPTIONAL
2834 if (ompt_enabled.ompt_callback_mutex_acquired) {
2835 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2836 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2837 }
2838#endif
2839
2840#endif // KMP_USE_DYNAMIC_LOCK
2841}
2842
2843void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2844#if KMP_USE_DYNAMIC_LOCK
2845
2846#if USE_ITT_BUILD
2847 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2848#endif
2849#if OMPT_SUPPORT && OMPT_OPTIONAL
2850 // This is the case, if called from omp_init_lock_with_hint:
2851 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2852 if (!codeptr)
2853 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2854 if (ompt_enabled.enabled) {
2855 if (ompt_enabled.ompt_callback_mutex_acquire) {
2856 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2857 ompt_mutex_nest_lock, omp_lock_hint_none,
2858 __ompt_get_mutex_impl_type(user_lock),
2859 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2860 }
2861 }
2862#endif
2863 int acquire_status =
2864 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2865 (void)acquire_status;
2866#if USE_ITT_BUILD
2867 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2868#endif
2869
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2871 if (ompt_enabled.enabled) {
2872 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2873 if (ompt_enabled.ompt_callback_mutex_acquired) {
2874 // lock_first
2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2876 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2877 codeptr);
2878 }
2879 } else {
2880 if (ompt_enabled.ompt_callback_nest_lock) {
2881 // lock_next
2882 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2883 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2884 }
2885 }
2886 }
2887#endif
2888
2889#else // KMP_USE_DYNAMIC_LOCK
2890 int acquire_status;
2891 kmp_user_lock_p lck;
2892
2893 if ((__kmp_user_lock_kind == lk_tas) &&
2894 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2895 OMP_NEST_LOCK_T_SIZE)) {
2896 lck = (kmp_user_lock_p)user_lock;
2897 }
2898#if KMP_USE_FUTEX
2899 else if ((__kmp_user_lock_kind == lk_futex) &&
2900 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2901 OMP_NEST_LOCK_T_SIZE)) {
2902 lck = (kmp_user_lock_p)user_lock;
2903 }
2904#endif
2905 else {
2906 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2907 }
2908
2909#if USE_ITT_BUILD
2910 __kmp_itt_lock_acquiring(lck);
2911#endif /* USE_ITT_BUILD */
2912#if OMPT_SUPPORT && OMPT_OPTIONAL
2913 // This is the case, if called from omp_init_lock_with_hint:
2914 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2915 if (!codeptr)
2916 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2917 if (ompt_enabled.enabled) {
2918 if (ompt_enabled.ompt_callback_mutex_acquire) {
2919 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2920 ompt_mutex_nest_lock, omp_lock_hint_none,
2921 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2922 codeptr);
2923 }
2924 }
2925#endif
2926
2927 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2928
2929#if USE_ITT_BUILD
2930 __kmp_itt_lock_acquired(lck);
2931#endif /* USE_ITT_BUILD */
2932
2933#if OMPT_SUPPORT && OMPT_OPTIONAL
2934 if (ompt_enabled.enabled) {
2935 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2936 if (ompt_enabled.ompt_callback_mutex_acquired) {
2937 // lock_first
2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2939 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940 }
2941 } else {
2942 if (ompt_enabled.ompt_callback_nest_lock) {
2943 // lock_next
2944 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2945 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2946 }
2947 }
2948 }
2949#endif
2950
2951#endif // KMP_USE_DYNAMIC_LOCK
2952}
2953
2954void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2955#if KMP_USE_DYNAMIC_LOCK
2956
2957 int tag = KMP_EXTRACT_D_TAG(user_lock);
2958#if USE_ITT_BUILD
2959 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2960#endif
2961#if KMP_USE_INLINED_TAS
2962 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2963 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2964 } else
2965#elif KMP_USE_INLINED_FUTEX
2966 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2967 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2968 } else
2969#endif
2970 {
2971 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2972 }
2973
2974#if OMPT_SUPPORT && OMPT_OPTIONAL
2975 // This is the case, if called from omp_init_lock_with_hint:
2976 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2977 if (!codeptr)
2978 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2979 if (ompt_enabled.ompt_callback_mutex_released) {
2980 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2981 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2982 }
2983#endif
2984
2985#else // KMP_USE_DYNAMIC_LOCK
2986
2987 kmp_user_lock_p lck;
2988
2989 /* Can't use serial interval since not block structured */
2990 /* release the lock */
2991
2992 if ((__kmp_user_lock_kind == lk_tas) &&
2993 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2994#if KMP_OS_LINUX && \
2995 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2996// "fast" path implemented to fix customer performance issue
2997#if USE_ITT_BUILD
2998 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2999#endif /* USE_ITT_BUILD */
3000 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3001 KMP_MB();
3002
3003#if OMPT_SUPPORT && OMPT_OPTIONAL
3004 // This is the case, if called from omp_init_lock_with_hint:
3005 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3006 if (!codeptr)
3007 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3008 if (ompt_enabled.ompt_callback_mutex_released) {
3009 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3010 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3011 }
3012#endif
3013
3014 return;
3015#else
3016 lck = (kmp_user_lock_p)user_lock;
3017#endif
3018 }
3019#if KMP_USE_FUTEX
3020 else if ((__kmp_user_lock_kind == lk_futex) &&
3021 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3022 lck = (kmp_user_lock_p)user_lock;
3023 }
3024#endif
3025 else {
3026 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3027 }
3028
3029#if USE_ITT_BUILD
3030 __kmp_itt_lock_releasing(lck);
3031#endif /* USE_ITT_BUILD */
3032
3033 RELEASE_LOCK(lck, gtid);
3034
3035#if OMPT_SUPPORT && OMPT_OPTIONAL
3036 // This is the case, if called from omp_init_lock_with_hint:
3037 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3038 if (!codeptr)
3039 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3040 if (ompt_enabled.ompt_callback_mutex_released) {
3041 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3042 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3043 }
3044#endif
3045
3046#endif // KMP_USE_DYNAMIC_LOCK
3047}
3048
3049/* release the lock */
3050void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3051#if KMP_USE_DYNAMIC_LOCK
3052
3053#if USE_ITT_BUILD
3054 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3055#endif
3056 int release_status =
3057 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3058 (void)release_status;
3059
3060#if OMPT_SUPPORT && OMPT_OPTIONAL
3061 // This is the case, if called from omp_init_lock_with_hint:
3062 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3063 if (!codeptr)
3064 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3065 if (ompt_enabled.enabled) {
3066 if (release_status == KMP_LOCK_RELEASED) {
3067 if (ompt_enabled.ompt_callback_mutex_released) {
3068 // release_lock_last
3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3070 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3071 codeptr);
3072 }
3073 } else if (ompt_enabled.ompt_callback_nest_lock) {
3074 // release_lock_prev
3075 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3076 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3077 }
3078 }
3079#endif
3080
3081#else // KMP_USE_DYNAMIC_LOCK
3082
3083 kmp_user_lock_p lck;
3084
3085 /* Can't use serial interval since not block structured */
3086
3087 if ((__kmp_user_lock_kind == lk_tas) &&
3088 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3089 OMP_NEST_LOCK_T_SIZE)) {
3090#if KMP_OS_LINUX && \
3091 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3092 // "fast" path implemented to fix customer performance issue
3093 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3094#if USE_ITT_BUILD
3095 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3096#endif /* USE_ITT_BUILD */
3097
3098#if OMPT_SUPPORT && OMPT_OPTIONAL
3099 int release_status = KMP_LOCK_STILL_HELD;
3100#endif
3101
3102 if (--(tl->lk.depth_locked) == 0) {
3103 TCW_4(tl->lk.poll, 0);
3104#if OMPT_SUPPORT && OMPT_OPTIONAL
3105 release_status = KMP_LOCK_RELEASED;
3106#endif
3107 }
3108 KMP_MB();
3109
3110#if OMPT_SUPPORT && OMPT_OPTIONAL
3111 // This is the case, if called from omp_init_lock_with_hint:
3112 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3113 if (!codeptr)
3114 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3115 if (ompt_enabled.enabled) {
3116 if (release_status == KMP_LOCK_RELEASED) {
3117 if (ompt_enabled.ompt_callback_mutex_released) {
3118 // release_lock_last
3119 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3120 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3121 }
3122 } else if (ompt_enabled.ompt_callback_nest_lock) {
3123 // release_lock_previous
3124 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3125 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3126 }
3127 }
3128#endif
3129
3130 return;
3131#else
3132 lck = (kmp_user_lock_p)user_lock;
3133#endif
3134 }
3135#if KMP_USE_FUTEX
3136 else if ((__kmp_user_lock_kind == lk_futex) &&
3137 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3138 OMP_NEST_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3140 }
3141#endif
3142 else {
3143 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3144 }
3145
3146#if USE_ITT_BUILD
3147 __kmp_itt_lock_releasing(lck);
3148#endif /* USE_ITT_BUILD */
3149
3150 int release_status;
3151 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3152#if OMPT_SUPPORT && OMPT_OPTIONAL
3153 // This is the case, if called from omp_init_lock_with_hint:
3154 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3155 if (!codeptr)
3156 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3157 if (ompt_enabled.enabled) {
3158 if (release_status == KMP_LOCK_RELEASED) {
3159 if (ompt_enabled.ompt_callback_mutex_released) {
3160 // release_lock_last
3161 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3162 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3163 }
3164 } else if (ompt_enabled.ompt_callback_nest_lock) {
3165 // release_lock_previous
3166 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3167 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3168 }
3169 }
3170#endif
3171
3172#endif // KMP_USE_DYNAMIC_LOCK
3173}
3174
3175/* try to acquire the lock */
3176int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3177 KMP_COUNT_BLOCK(OMP_test_lock);
3178
3179#if KMP_USE_DYNAMIC_LOCK
3180 int rc;
3181 int tag = KMP_EXTRACT_D_TAG(user_lock);
3182#if USE_ITT_BUILD
3183 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3184#endif
3185#if OMPT_SUPPORT && OMPT_OPTIONAL
3186 // This is the case, if called from omp_init_lock_with_hint:
3187 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3188 if (!codeptr)
3189 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3190 if (ompt_enabled.ompt_callback_mutex_acquire) {
3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3192 ompt_mutex_test_lock, omp_lock_hint_none,
3193 __ompt_get_mutex_impl_type(user_lock),
3194 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3195 }
3196#endif
3197#if KMP_USE_INLINED_TAS
3198 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3199 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3200 } else
3201#elif KMP_USE_INLINED_FUTEX
3202 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3203 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3204 } else
3205#endif
3206 {
3207 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3208 }
3209 if (rc) {
3210#if USE_ITT_BUILD
3211 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3212#endif
3213#if OMPT_SUPPORT && OMPT_OPTIONAL
3214 if (ompt_enabled.ompt_callback_mutex_acquired) {
3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3216 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3217 }
3218#endif
3219 return FTN_TRUE;
3220 } else {
3221#if USE_ITT_BUILD
3222 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3223#endif
3224 return FTN_FALSE;
3225 }
3226
3227#else // KMP_USE_DYNAMIC_LOCK
3228
3229 kmp_user_lock_p lck;
3230 int rc;
3231
3232 if ((__kmp_user_lock_kind == lk_tas) &&
3233 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3234 lck = (kmp_user_lock_p)user_lock;
3235 }
3236#if KMP_USE_FUTEX
3237 else if ((__kmp_user_lock_kind == lk_futex) &&
3238 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3239 lck = (kmp_user_lock_p)user_lock;
3240 }
3241#endif
3242 else {
3243 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3244 }
3245
3246#if USE_ITT_BUILD
3247 __kmp_itt_lock_acquiring(lck);
3248#endif /* USE_ITT_BUILD */
3249#if OMPT_SUPPORT && OMPT_OPTIONAL
3250 // This is the case, if called from omp_init_lock_with_hint:
3251 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3252 if (!codeptr)
3253 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3254 if (ompt_enabled.ompt_callback_mutex_acquire) {
3255 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3256 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3257 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3258 }
3259#endif
3260
3261 rc = TEST_LOCK(lck, gtid);
3262#if USE_ITT_BUILD
3263 if (rc) {
3264 __kmp_itt_lock_acquired(lck);
3265 } else {
3266 __kmp_itt_lock_cancelled(lck);
3267 }
3268#endif /* USE_ITT_BUILD */
3269#if OMPT_SUPPORT && OMPT_OPTIONAL
3270 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3271 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3272 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3273 }
3274#endif
3275
3276 return (rc ? FTN_TRUE : FTN_FALSE);
3277
3278 /* Can't use serial interval since not block structured */
3279
3280#endif // KMP_USE_DYNAMIC_LOCK
3281}
3282
3283/* try to acquire the lock */
3284int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3285#if KMP_USE_DYNAMIC_LOCK
3286 int rc;
3287#if USE_ITT_BUILD
3288 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3289#endif
3290#if OMPT_SUPPORT && OMPT_OPTIONAL
3291 // This is the case, if called from omp_init_lock_with_hint:
3292 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3293 if (!codeptr)
3294 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3295 if (ompt_enabled.ompt_callback_mutex_acquire) {
3296 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3297 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3298 __ompt_get_mutex_impl_type(user_lock),
3299 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3300 }
3301#endif
3302 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3303#if USE_ITT_BUILD
3304 if (rc) {
3305 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3306 } else {
3307 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3308 }
3309#endif
3310#if OMPT_SUPPORT && OMPT_OPTIONAL
3311 if (ompt_enabled.enabled && rc) {
3312 if (rc == 1) {
3313 if (ompt_enabled.ompt_callback_mutex_acquired) {
3314 // lock_first
3315 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3316 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3317 codeptr);
3318 }
3319 } else {
3320 if (ompt_enabled.ompt_callback_nest_lock) {
3321 // lock_next
3322 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3323 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3324 }
3325 }
3326 }
3327#endif
3328 return rc;
3329
3330#else // KMP_USE_DYNAMIC_LOCK
3331
3332 kmp_user_lock_p lck;
3333 int rc;
3334
3335 if ((__kmp_user_lock_kind == lk_tas) &&
3336 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3337 OMP_NEST_LOCK_T_SIZE)) {
3338 lck = (kmp_user_lock_p)user_lock;
3339 }
3340#if KMP_USE_FUTEX
3341 else if ((__kmp_user_lock_kind == lk_futex) &&
3342 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3343 OMP_NEST_LOCK_T_SIZE)) {
3344 lck = (kmp_user_lock_p)user_lock;
3345 }
3346#endif
3347 else {
3348 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3349 }
3350
3351#if USE_ITT_BUILD
3352 __kmp_itt_lock_acquiring(lck);
3353#endif /* USE_ITT_BUILD */
3354
3355#if OMPT_SUPPORT && OMPT_OPTIONAL
3356 // This is the case, if called from omp_init_lock_with_hint:
3357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3358 if (!codeptr)
3359 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3360 if (ompt_enabled.enabled) &&
3361 ompt_enabled.ompt_callback_mutex_acquire) {
3362 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3363 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3364 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3365 codeptr);
3366 }
3367#endif
3368
3369 rc = TEST_NESTED_LOCK(lck, gtid);
3370#if USE_ITT_BUILD
3371 if (rc) {
3372 __kmp_itt_lock_acquired(lck);
3373 } else {
3374 __kmp_itt_lock_cancelled(lck);
3375 }
3376#endif /* USE_ITT_BUILD */
3377#if OMPT_SUPPORT && OMPT_OPTIONAL
3378 if (ompt_enabled.enabled && rc) {
3379 if (rc == 1) {
3380 if (ompt_enabled.ompt_callback_mutex_acquired) {
3381 // lock_first
3382 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3383 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3384 }
3385 } else {
3386 if (ompt_enabled.ompt_callback_nest_lock) {
3387 // lock_next
3388 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3389 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3390 }
3391 }
3392 }
3393#endif
3394 return rc;
3395
3396 /* Can't use serial interval since not block structured */
3397
3398#endif // KMP_USE_DYNAMIC_LOCK
3399}
3400
3401// Interface to fast scalable reduce methods routines
3402
3403// keep the selected method in a thread local structure for cross-function
3404// usage: will be used in __kmpc_end_reduce* functions;
3405// another solution: to re-determine the method one more time in
3406// __kmpc_end_reduce* functions (new prototype required then)
3407// AT: which solution is better?
3408#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3409 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3410
3411#define __KMP_GET_REDUCTION_METHOD(gtid) \
3412 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3413
3414// description of the packed_reduction_method variable: look at the macros in
3415// kmp.h
3416
3417// used in a critical section reduce block
3418static __forceinline void
3419__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3420 kmp_critical_name *crit) {
3421
3422 // this lock was visible to a customer and to the threading profile tool as a
3423 // serial overhead span (although it's used for an internal purpose only)
3424 // why was it visible in previous implementation?
3425 // should we keep it visible in new reduce block?
3426 kmp_user_lock_p lck;
3427
3428#if KMP_USE_DYNAMIC_LOCK
3429
3430 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3431 // Check if it is initialized.
3432 if (*lk == 0) {
3433 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3434 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3435 KMP_GET_D_TAG(__kmp_user_lock_seq));
3436 } else {
3437 __kmp_init_indirect_csptr(crit, loc, global_tid,
3438 KMP_GET_I_TAG(__kmp_user_lock_seq));
3439 }
3440 }
3441 // Branch for accessing the actual lock object and set operation. This
3442 // branching is inevitable since this lock initialization does not follow the
3443 // normal dispatch path (lock table is not used).
3444 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3445 lck = (kmp_user_lock_p)lk;
3446 KMP_DEBUG_ASSERT(lck != NULL);
3447 if (__kmp_env_consistency_check) {
3448 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3449 }
3450 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3451 } else {
3452 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3453 lck = ilk->lock;
3454 KMP_DEBUG_ASSERT(lck != NULL);
3455 if (__kmp_env_consistency_check) {
3456 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3457 }
3458 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3459 }
3460
3461#else // KMP_USE_DYNAMIC_LOCK
3462
3463 // We know that the fast reduction code is only emitted by Intel compilers
3464 // with 32 byte critical sections. If there isn't enough space, then we
3465 // have to use a pointer.
3466 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3467 lck = (kmp_user_lock_p)crit;
3468 } else {
3469 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3470 }
3471 KMP_DEBUG_ASSERT(lck != NULL);
3472
3473 if (__kmp_env_consistency_check)
3474 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3475
3476 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3477
3478#endif // KMP_USE_DYNAMIC_LOCK
3479}
3480
3481// used in a critical section reduce block
3482static __forceinline void
3483__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3484 kmp_critical_name *crit) {
3485
3486 kmp_user_lock_p lck;
3487
3488#if KMP_USE_DYNAMIC_LOCK
3489
3490 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3491 lck = (kmp_user_lock_p)crit;
3492 if (__kmp_env_consistency_check)
3493 __kmp_pop_sync(global_tid, ct_critical, loc);
3494 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3495 } else {
3496 kmp_indirect_lock_t *ilk =
3497 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3498 if (__kmp_env_consistency_check)
3499 __kmp_pop_sync(global_tid, ct_critical, loc);
3500 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3501 }
3502
3503#else // KMP_USE_DYNAMIC_LOCK
3504
3505 // We know that the fast reduction code is only emitted by Intel compilers
3506 // with 32 byte critical sections. If there isn't enough space, then we have
3507 // to use a pointer.
3508 if (__kmp_base_user_lock_size > 32) {
3509 lck = *((kmp_user_lock_p *)crit);
3510 KMP_ASSERT(lck != NULL);
3511 } else {
3512 lck = (kmp_user_lock_p)crit;
3513 }
3514
3515 if (__kmp_env_consistency_check)
3516 __kmp_pop_sync(global_tid, ct_critical, loc);
3517
3518 __kmp_release_user_lock_with_checks(lck, global_tid);
3519
3520#endif // KMP_USE_DYNAMIC_LOCK
3521} // __kmp_end_critical_section_reduce_block
3522
3523static __forceinline int
3524__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3525 int *task_state) {
3526 kmp_team_t *team;
3527
3528 // Check if we are inside the teams construct?
3529 if (th->th.th_teams_microtask) {
3530 *team_p = team = th->th.th_team;
3531 if (team->t.t_level == th->th.th_teams_level) {
3532 // This is reduction at teams construct.
3533 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3534 // Let's swap teams temporarily for the reduction.
3535 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3536 th->th.th_team = team->t.t_parent;
3537 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3538 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3539 *task_state = th->th.th_task_state;
3540 th->th.th_task_state = 0;
3541
3542 return 1;
3543 }
3544 }
3545 return 0;
3546}
3547
3548static __forceinline void
3549__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3550 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3551 th->th.th_info.ds.ds_tid = 0;
3552 th->th.th_team = team;
3553 th->th.th_team_nproc = team->t.t_nproc;
3554 th->th.th_task_team = team->t.t_task_team[task_state];
3555 __kmp_type_convert(task_state, &(th->th.th_task_state));
3556}
3557
3558/* 2.a.i. Reduce Block without a terminating barrier */
3574kmp_int32
3575__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3576 size_t reduce_size, void *reduce_data,
3577 void (*reduce_func)(void *lhs_data, void *rhs_data),
3578 kmp_critical_name *lck) {
3579
3580 KMP_COUNT_BLOCK(REDUCE_nowait);
3581 int retval = 0;
3582 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3583 kmp_info_t *th;
3584 kmp_team_t *team;
3585 int teams_swapped = 0, task_state;
3586 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3587 __kmp_assert_valid_gtid(global_tid);
3588
3589 // why do we need this initialization here at all?
3590 // Reduction clause can not be used as a stand-alone directive.
3591
3592 // do not call __kmp_serial_initialize(), it will be called by
3593 // __kmp_parallel_initialize() if needed
3594 // possible detection of false-positive race by the threadchecker ???
3595 if (!TCR_4(__kmp_init_parallel))
3596 __kmp_parallel_initialize();
3597
3598 __kmp_resume_if_soft_paused();
3599
3600// check correctness of reduce block nesting
3601#if KMP_USE_DYNAMIC_LOCK
3602 if (__kmp_env_consistency_check)
3603 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3604#else
3605 if (__kmp_env_consistency_check)
3606 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3607#endif
3608
3609 th = __kmp_thread_from_gtid(global_tid);
3610 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3611
3612 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3613 // the value should be kept in a variable
3614 // the variable should be either a construct-specific or thread-specific
3615 // property, not a team specific property
3616 // (a thread can reach the next reduce block on the next construct, reduce
3617 // method may differ on the next construct)
3618 // an ident_t "loc" parameter could be used as a construct-specific property
3619 // (what if loc == 0?)
3620 // (if both construct-specific and team-specific variables were shared,
3621 // then unness extra syncs should be needed)
3622 // a thread-specific variable is better regarding two issues above (next
3623 // construct and extra syncs)
3624 // a thread-specific "th_local.reduction_method" variable is used currently
3625 // each thread executes 'determine' and 'set' lines (no need to execute by one
3626 // thread, to avoid unness extra syncs)
3627
3628 packed_reduction_method = __kmp_determine_reduction_method(
3629 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3630 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3631
3632 OMPT_REDUCTION_DECL(th, global_tid);
3633 if (packed_reduction_method == critical_reduce_block) {
3634
3635 OMPT_REDUCTION_BEGIN;
3636
3637 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3638 retval = 1;
3639
3640 } else if (packed_reduction_method == empty_reduce_block) {
3641
3642 OMPT_REDUCTION_BEGIN;
3643
3644 // usage: if team size == 1, no synchronization is required ( Intel
3645 // platforms only )
3646 retval = 1;
3647
3648 } else if (packed_reduction_method == atomic_reduce_block) {
3649
3650 retval = 2;
3651
3652 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3653 // won't be called by the code gen)
3654 // (it's not quite good, because the checking block has been closed by
3655 // this 'pop',
3656 // but atomic operation has not been executed yet, will be executed
3657 // slightly later, literally on next instruction)
3658 if (__kmp_env_consistency_check)
3659 __kmp_pop_sync(global_tid, ct_reduce, loc);
3660
3661 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3662 tree_reduce_block)) {
3663
3664// AT: performance issue: a real barrier here
3665// AT: (if primary thread is slow, other threads are blocked here waiting for
3666// the primary thread to come and release them)
3667// AT: (it's not what a customer might expect specifying NOWAIT clause)
3668// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3669// be confusing to a customer)
3670// AT: another implementation of *barrier_gather*nowait() (or some other design)
3671// might go faster and be more in line with sense of NOWAIT
3672// AT: TO DO: do epcc test and compare times
3673
3674// this barrier should be invisible to a customer and to the threading profile
3675// tool (it's neither a terminating barrier nor customer's code, it's
3676// used for an internal purpose)
3677#if OMPT_SUPPORT
3678 // JP: can this barrier potentially leed to task scheduling?
3679 // JP: as long as there is a barrier in the implementation, OMPT should and
3680 // will provide the barrier events
3681 // so we set-up the necessary frame/return addresses.
3682 ompt_frame_t *ompt_frame;
3683 if (ompt_enabled.enabled) {
3684 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3685 if (ompt_frame->enter_frame.ptr == NULL)
3686 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3687 }
3688 OMPT_STORE_RETURN_ADDRESS(global_tid);
3689#endif
3690#if USE_ITT_NOTIFY
3691 __kmp_threads[global_tid]->th.th_ident = loc;
3692#endif
3693 retval =
3694 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3695 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3696 retval = (retval != 0) ? (0) : (1);
3697#if OMPT_SUPPORT && OMPT_OPTIONAL
3698 if (ompt_enabled.enabled) {
3699 ompt_frame->enter_frame = ompt_data_none;
3700 }
3701#endif
3702
3703 // all other workers except primary thread should do this pop here
3704 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3705 if (__kmp_env_consistency_check) {
3706 if (retval == 0) {
3707 __kmp_pop_sync(global_tid, ct_reduce, loc);
3708 }
3709 }
3710
3711 } else {
3712
3713 // should never reach this block
3714 KMP_ASSERT(0); // "unexpected method"
3715 }
3716 if (teams_swapped) {
3717 __kmp_restore_swapped_teams(th, team, task_state);
3718 }
3719 KA_TRACE(
3720 10,
3721 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3722 global_tid, packed_reduction_method, retval));
3723
3724 return retval;
3725}
3726
3735void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3736 kmp_critical_name *lck) {
3737
3738 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3739
3740 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3741 __kmp_assert_valid_gtid(global_tid);
3742
3743 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3744
3745 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3746
3747 if (packed_reduction_method == critical_reduce_block) {
3748
3749 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3750 OMPT_REDUCTION_END;
3751
3752 } else if (packed_reduction_method == empty_reduce_block) {
3753
3754 // usage: if team size == 1, no synchronization is required ( on Intel
3755 // platforms only )
3756
3757 OMPT_REDUCTION_END;
3758
3759 } else if (packed_reduction_method == atomic_reduce_block) {
3760
3761 // neither primary thread nor other workers should get here
3762 // (code gen does not generate this call in case 2: atomic reduce block)
3763 // actually it's better to remove this elseif at all;
3764 // after removal this value will checked by the 'else' and will assert
3765
3766 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3767 tree_reduce_block)) {
3768
3769 // only primary thread gets here
3770 // OMPT: tree reduction is annotated in the barrier code
3771
3772 } else {
3773
3774 // should never reach this block
3775 KMP_ASSERT(0); // "unexpected method"
3776 }
3777
3778 if (__kmp_env_consistency_check)
3779 __kmp_pop_sync(global_tid, ct_reduce, loc);
3780
3781 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3782 global_tid, packed_reduction_method));
3783
3784 return;
3785}
3786
3787/* 2.a.ii. Reduce Block with a terminating barrier */
3788
3804kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3805 size_t reduce_size, void *reduce_data,
3806 void (*reduce_func)(void *lhs_data, void *rhs_data),
3807 kmp_critical_name *lck) {
3808 KMP_COUNT_BLOCK(REDUCE_wait);
3809 int retval = 0;
3810 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3811 kmp_info_t *th;
3812 kmp_team_t *team;
3813 int teams_swapped = 0, task_state;
3814
3815 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3816 __kmp_assert_valid_gtid(global_tid);
3817
3818 // why do we need this initialization here at all?
3819 // Reduction clause can not be a stand-alone directive.
3820
3821 // do not call __kmp_serial_initialize(), it will be called by
3822 // __kmp_parallel_initialize() if needed
3823 // possible detection of false-positive race by the threadchecker ???
3824 if (!TCR_4(__kmp_init_parallel))
3825 __kmp_parallel_initialize();
3826
3827 __kmp_resume_if_soft_paused();
3828
3829// check correctness of reduce block nesting
3830#if KMP_USE_DYNAMIC_LOCK
3831 if (__kmp_env_consistency_check)
3832 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3833#else
3834 if (__kmp_env_consistency_check)
3835 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3836#endif
3837
3838 th = __kmp_thread_from_gtid(global_tid);
3839 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3840
3841 packed_reduction_method = __kmp_determine_reduction_method(
3842 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3843 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3844
3845 OMPT_REDUCTION_DECL(th, global_tid);
3846
3847 if (packed_reduction_method == critical_reduce_block) {
3848
3849 OMPT_REDUCTION_BEGIN;
3850 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3851 retval = 1;
3852
3853 } else if (packed_reduction_method == empty_reduce_block) {
3854
3855 OMPT_REDUCTION_BEGIN;
3856 // usage: if team size == 1, no synchronization is required ( Intel
3857 // platforms only )
3858 retval = 1;
3859
3860 } else if (packed_reduction_method == atomic_reduce_block) {
3861
3862 retval = 2;
3863
3864 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3865 tree_reduce_block)) {
3866
3867// case tree_reduce_block:
3868// this barrier should be visible to a customer and to the threading profile
3869// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3870#if OMPT_SUPPORT
3871 ompt_frame_t *ompt_frame;
3872 if (ompt_enabled.enabled) {
3873 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3874 if (ompt_frame->enter_frame.ptr == NULL)
3875 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3876 }
3877 OMPT_STORE_RETURN_ADDRESS(global_tid);
3878#endif
3879#if USE_ITT_NOTIFY
3880 __kmp_threads[global_tid]->th.th_ident =
3881 loc; // needed for correct notification of frames
3882#endif
3883 retval =
3884 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3885 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3886 retval = (retval != 0) ? (0) : (1);
3887#if OMPT_SUPPORT && OMPT_OPTIONAL
3888 if (ompt_enabled.enabled) {
3889 ompt_frame->enter_frame = ompt_data_none;
3890 }
3891#endif
3892
3893 // all other workers except primary thread should do this pop here
3894 // (none of other workers except primary will enter __kmpc_end_reduce())
3895 if (__kmp_env_consistency_check) {
3896 if (retval == 0) { // 0: all other workers; 1: primary thread
3897 __kmp_pop_sync(global_tid, ct_reduce, loc);
3898 }
3899 }
3900
3901 } else {
3902
3903 // should never reach this block
3904 KMP_ASSERT(0); // "unexpected method"
3905 }
3906 if (teams_swapped) {
3907 __kmp_restore_swapped_teams(th, team, task_state);
3908 }
3909
3910 KA_TRACE(10,
3911 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3912 global_tid, packed_reduction_method, retval));
3913 return retval;
3914}
3915
3926void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3927 kmp_critical_name *lck) {
3928
3929 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3930 kmp_info_t *th;
3931 kmp_team_t *team;
3932 int teams_swapped = 0, task_state;
3933
3934 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3935 __kmp_assert_valid_gtid(global_tid);
3936
3937 th = __kmp_thread_from_gtid(global_tid);
3938 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3939
3940 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3941
3942 // this barrier should be visible to a customer and to the threading profile
3943 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3944 OMPT_REDUCTION_DECL(th, global_tid);
3945
3946 if (packed_reduction_method == critical_reduce_block) {
3947 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3948
3949 OMPT_REDUCTION_END;
3950
3951// TODO: implicit barrier: should be exposed
3952#if OMPT_SUPPORT
3953 ompt_frame_t *ompt_frame;
3954 if (ompt_enabled.enabled) {
3955 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3956 if (ompt_frame->enter_frame.ptr == NULL)
3957 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3958 }
3959 OMPT_STORE_RETURN_ADDRESS(global_tid);
3960#endif
3961#if USE_ITT_NOTIFY
3962 __kmp_threads[global_tid]->th.th_ident = loc;
3963#endif
3964 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3965#if OMPT_SUPPORT && OMPT_OPTIONAL
3966 if (ompt_enabled.enabled) {
3967 ompt_frame->enter_frame = ompt_data_none;
3968 }
3969#endif
3970
3971 } else if (packed_reduction_method == empty_reduce_block) {
3972
3973 OMPT_REDUCTION_END;
3974
3975// usage: if team size==1, no synchronization is required (Intel platforms only)
3976
3977// TODO: implicit barrier: should be exposed
3978#if OMPT_SUPPORT
3979 ompt_frame_t *ompt_frame;
3980 if (ompt_enabled.enabled) {
3981 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3982 if (ompt_frame->enter_frame.ptr == NULL)
3983 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3984 }
3985 OMPT_STORE_RETURN_ADDRESS(global_tid);
3986#endif
3987#if USE_ITT_NOTIFY
3988 __kmp_threads[global_tid]->th.th_ident = loc;
3989#endif
3990 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3991#if OMPT_SUPPORT && OMPT_OPTIONAL
3992 if (ompt_enabled.enabled) {
3993 ompt_frame->enter_frame = ompt_data_none;
3994 }
3995#endif
3996
3997 } else if (packed_reduction_method == atomic_reduce_block) {
3998
3999#if OMPT_SUPPORT
4000 ompt_frame_t *ompt_frame;
4001 if (ompt_enabled.enabled) {
4002 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4003 if (ompt_frame->enter_frame.ptr == NULL)
4004 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4005 }
4006 OMPT_STORE_RETURN_ADDRESS(global_tid);
4007#endif
4008// TODO: implicit barrier: should be exposed
4009#if USE_ITT_NOTIFY
4010 __kmp_threads[global_tid]->th.th_ident = loc;
4011#endif
4012 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4013#if OMPT_SUPPORT && OMPT_OPTIONAL
4014 if (ompt_enabled.enabled) {
4015 ompt_frame->enter_frame = ompt_data_none;
4016 }
4017#endif
4018
4019 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4020 tree_reduce_block)) {
4021
4022 // only primary thread executes here (primary releases all other workers)
4023 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4024 global_tid);
4025
4026 } else {
4027
4028 // should never reach this block
4029 KMP_ASSERT(0); // "unexpected method"
4030 }
4031 if (teams_swapped) {
4032 __kmp_restore_swapped_teams(th, team, task_state);
4033 }
4034
4035 if (__kmp_env_consistency_check)
4036 __kmp_pop_sync(global_tid, ct_reduce, loc);
4037
4038 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4039 global_tid, packed_reduction_method));
4040
4041 return;
4042}
4043
4044#undef __KMP_GET_REDUCTION_METHOD
4045#undef __KMP_SET_REDUCTION_METHOD
4046
4047/* end of interface to fast scalable reduce routines */
4048
4049kmp_uint64 __kmpc_get_taskid() {
4050
4051 kmp_int32 gtid;
4052 kmp_info_t *thread;
4053
4054 gtid = __kmp_get_gtid();
4055 if (gtid < 0) {
4056 return 0;
4057 }
4058 thread = __kmp_thread_from_gtid(gtid);
4059 return thread->th.th_current_task->td_task_id;
4060
4061} // __kmpc_get_taskid
4062
4063kmp_uint64 __kmpc_get_parent_taskid() {
4064
4065 kmp_int32 gtid;
4066 kmp_info_t *thread;
4067 kmp_taskdata_t *parent_task;
4068
4069 gtid = __kmp_get_gtid();
4070 if (gtid < 0) {
4071 return 0;
4072 }
4073 thread = __kmp_thread_from_gtid(gtid);
4074 parent_task = thread->th.th_current_task->td_parent;
4075 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4076
4077} // __kmpc_get_parent_taskid
4078
4090void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4091 const struct kmp_dim *dims) {
4092 __kmp_assert_valid_gtid(gtid);
4093 int j, idx;
4094 kmp_int64 last, trace_count;
4095 kmp_info_t *th = __kmp_threads[gtid];
4096 kmp_team_t *team = th->th.th_team;
4097 kmp_uint32 *flags;
4098 kmp_disp_t *pr_buf = th->th.th_dispatch;
4099 dispatch_shared_info_t *sh_buf;
4100
4101 KA_TRACE(
4102 20,
4103 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4104 gtid, num_dims, !team->t.t_serialized));
4105 KMP_DEBUG_ASSERT(dims != NULL);
4106 KMP_DEBUG_ASSERT(num_dims > 0);
4107
4108 if (team->t.t_serialized) {
4109 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4110 return; // no dependencies if team is serialized
4111 }
4112 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4113 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4114 // the next loop
4115 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4116
4117 // Save bounds info into allocated private buffer
4118 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4119 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4120 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4121 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4122 pr_buf->th_doacross_info[0] =
4123 (kmp_int64)num_dims; // first element is number of dimensions
4124 // Save also address of num_done in order to access it later without knowing
4125 // the buffer index
4126 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4127 pr_buf->th_doacross_info[2] = dims[0].lo;
4128 pr_buf->th_doacross_info[3] = dims[0].up;
4129 pr_buf->th_doacross_info[4] = dims[0].st;
4130 last = 5;
4131 for (j = 1; j < num_dims; ++j) {
4132 kmp_int64
4133 range_length; // To keep ranges of all dimensions but the first dims[0]
4134 if (dims[j].st == 1) { // most common case
4135 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4136 range_length = dims[j].up - dims[j].lo + 1;
4137 } else {
4138 if (dims[j].st > 0) {
4139 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4140 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4141 } else { // negative increment
4142 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4143 range_length =
4144 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4145 }
4146 }
4147 pr_buf->th_doacross_info[last++] = range_length;
4148 pr_buf->th_doacross_info[last++] = dims[j].lo;
4149 pr_buf->th_doacross_info[last++] = dims[j].up;
4150 pr_buf->th_doacross_info[last++] = dims[j].st;
4151 }
4152
4153 // Compute total trip count.
4154 // Start with range of dims[0] which we don't need to keep in the buffer.
4155 if (dims[0].st == 1) { // most common case
4156 trace_count = dims[0].up - dims[0].lo + 1;
4157 } else if (dims[0].st > 0) {
4158 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4159 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4160 } else { // negative increment
4161 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4162 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4163 }
4164 for (j = 1; j < num_dims; ++j) {
4165 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4166 }
4167 KMP_DEBUG_ASSERT(trace_count > 0);
4168
4169 // Check if shared buffer is not occupied by other loop (idx -
4170 // __kmp_dispatch_num_buffers)
4171 if (idx != sh_buf->doacross_buf_idx) {
4172 // Shared buffer is occupied, wait for it to be free
4173 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4174 __kmp_eq_4, NULL);
4175 }
4176#if KMP_32_BIT_ARCH
4177 // Check if we are the first thread. After the CAS the first thread gets 0,
4178 // others get 1 if initialization is in progress, allocated pointer otherwise.
4179 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4180 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4181 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4182#else
4183 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4184 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4185#endif
4186 if (flags == NULL) {
4187 // we are the first thread, allocate the array of flags
4188 size_t size =
4189 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4190 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4191 KMP_MB();
4192 sh_buf->doacross_flags = flags;
4193 } else if (flags == (kmp_uint32 *)1) {
4194#if KMP_32_BIT_ARCH
4195 // initialization is still in progress, need to wait
4196 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4197#else
4198 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4199#endif
4200 KMP_YIELD(TRUE);
4201 KMP_MB();
4202 } else {
4203 KMP_MB();
4204 }
4205 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4206 pr_buf->th_doacross_flags =
4207 sh_buf->doacross_flags; // save private copy in order to not
4208 // touch shared buffer on each iteration
4209 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4210}
4211
4212void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4213 __kmp_assert_valid_gtid(gtid);
4214 kmp_int64 shft;
4215 size_t num_dims, i;
4216 kmp_uint32 flag;
4217 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4218 kmp_info_t *th = __kmp_threads[gtid];
4219 kmp_team_t *team = th->th.th_team;
4220 kmp_disp_t *pr_buf;
4221 kmp_int64 lo, up, st;
4222
4223 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4224 if (team->t.t_serialized) {
4225 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4226 return; // no dependencies if team is serialized
4227 }
4228
4229 // calculate sequential iteration number and check out-of-bounds condition
4230 pr_buf = th->th.th_dispatch;
4231 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4232 num_dims = (size_t)pr_buf->th_doacross_info[0];
4233 lo = pr_buf->th_doacross_info[2];
4234 up = pr_buf->th_doacross_info[3];
4235 st = pr_buf->th_doacross_info[4];
4236#if OMPT_SUPPORT && OMPT_OPTIONAL
4237 SimpleVLA<ompt_dependence_t> deps(num_dims);
4238#endif
4239 if (st == 1) { // most common case
4240 if (vec[0] < lo || vec[0] > up) {
4241 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4242 "bounds [%lld,%lld]\n",
4243 gtid, vec[0], lo, up));
4244 return;
4245 }
4246 iter_number = vec[0] - lo;
4247 } else if (st > 0) {
4248 if (vec[0] < lo || vec[0] > up) {
4249 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4250 "bounds [%lld,%lld]\n",
4251 gtid, vec[0], lo, up));
4252 return;
4253 }
4254 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4255 } else { // negative increment
4256 if (vec[0] > lo || vec[0] < up) {
4257 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4258 "bounds [%lld,%lld]\n",
4259 gtid, vec[0], lo, up));
4260 return;
4261 }
4262 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4263 }
4264#if OMPT_SUPPORT && OMPT_OPTIONAL
4265 deps[0].variable.value = iter_number;
4266 deps[0].dependence_type = ompt_dependence_type_sink;
4267#endif
4268 for (i = 1; i < num_dims; ++i) {
4269 kmp_int64 iter, ln;
4270 size_t j = i * 4;
4271 ln = pr_buf->th_doacross_info[j + 1];
4272 lo = pr_buf->th_doacross_info[j + 2];
4273 up = pr_buf->th_doacross_info[j + 3];
4274 st = pr_buf->th_doacross_info[j + 4];
4275 if (st == 1) {
4276 if (vec[i] < lo || vec[i] > up) {
4277 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4278 "bounds [%lld,%lld]\n",
4279 gtid, vec[i], lo, up));
4280 return;
4281 }
4282 iter = vec[i] - lo;
4283 } else if (st > 0) {
4284 if (vec[i] < lo || vec[i] > up) {
4285 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4286 "bounds [%lld,%lld]\n",
4287 gtid, vec[i], lo, up));
4288 return;
4289 }
4290 iter = (kmp_uint64)(vec[i] - lo) / st;
4291 } else { // st < 0
4292 if (vec[i] > lo || vec[i] < up) {
4293 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4294 "bounds [%lld,%lld]\n",
4295 gtid, vec[i], lo, up));
4296 return;
4297 }
4298 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4299 }
4300 iter_number = iter + ln * iter_number;
4301#if OMPT_SUPPORT && OMPT_OPTIONAL
4302 deps[i].variable.value = iter;
4303 deps[i].dependence_type = ompt_dependence_type_sink;
4304#endif
4305 }
4306 shft = iter_number % 32; // use 32-bit granularity
4307 iter_number >>= 5; // divided by 32
4308 flag = 1 << shft;
4309 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4310 KMP_YIELD(TRUE);
4311 }
4312 KMP_MB();
4313#if OMPT_SUPPORT && OMPT_OPTIONAL
4314 if (ompt_enabled.ompt_callback_dependences) {
4315 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4316 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4317 }
4318#endif
4319 KA_TRACE(20,
4320 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4321 gtid, (iter_number << 5) + shft));
4322}
4323
4324void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4325 __kmp_assert_valid_gtid(gtid);
4326 kmp_int64 shft;
4327 size_t num_dims, i;
4328 kmp_uint32 flag;
4329 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4330 kmp_info_t *th = __kmp_threads[gtid];
4331 kmp_team_t *team = th->th.th_team;
4332 kmp_disp_t *pr_buf;
4333 kmp_int64 lo, st;
4334
4335 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4336 if (team->t.t_serialized) {
4337 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4338 return; // no dependencies if team is serialized
4339 }
4340
4341 // calculate sequential iteration number (same as in "wait" but no
4342 // out-of-bounds checks)
4343 pr_buf = th->th.th_dispatch;
4344 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4345 num_dims = (size_t)pr_buf->th_doacross_info[0];
4346 lo = pr_buf->th_doacross_info[2];
4347 st = pr_buf->th_doacross_info[4];
4348#if OMPT_SUPPORT && OMPT_OPTIONAL
4349 SimpleVLA<ompt_dependence_t> deps(num_dims);
4350#endif
4351 if (st == 1) { // most common case
4352 iter_number = vec[0] - lo;
4353 } else if (st > 0) {
4354 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4355 } else { // negative increment
4356 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4357 }
4358#if OMPT_SUPPORT && OMPT_OPTIONAL
4359 deps[0].variable.value = iter_number;
4360 deps[0].dependence_type = ompt_dependence_type_source;
4361#endif
4362 for (i = 1; i < num_dims; ++i) {
4363 kmp_int64 iter, ln;
4364 size_t j = i * 4;
4365 ln = pr_buf->th_doacross_info[j + 1];
4366 lo = pr_buf->th_doacross_info[j + 2];
4367 st = pr_buf->th_doacross_info[j + 4];
4368 if (st == 1) {
4369 iter = vec[i] - lo;
4370 } else if (st > 0) {
4371 iter = (kmp_uint64)(vec[i] - lo) / st;
4372 } else { // st < 0
4373 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4374 }
4375 iter_number = iter + ln * iter_number;
4376#if OMPT_SUPPORT && OMPT_OPTIONAL
4377 deps[i].variable.value = iter;
4378 deps[i].dependence_type = ompt_dependence_type_source;
4379#endif
4380 }
4381#if OMPT_SUPPORT && OMPT_OPTIONAL
4382 if (ompt_enabled.ompt_callback_dependences) {
4383 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4384 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4385 }
4386#endif
4387 shft = iter_number % 32; // use 32-bit granularity
4388 iter_number >>= 5; // divided by 32
4389 flag = 1 << shft;
4390 KMP_MB();
4391 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4392 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4393 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4394 (iter_number << 5) + shft));
4395}
4396
4397void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4398 __kmp_assert_valid_gtid(gtid);
4399 kmp_int32 num_done;
4400 kmp_info_t *th = __kmp_threads[gtid];
4401 kmp_team_t *team = th->th.th_team;
4402 kmp_disp_t *pr_buf = th->th.th_dispatch;
4403
4404 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4405 if (team->t.t_serialized) {
4406 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4407 return; // nothing to do
4408 }
4409 num_done =
4410 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4411 if (num_done == th->th.th_team_nproc) {
4412 // we are the last thread, need to free shared resources
4413 int idx = pr_buf->th_doacross_buf_idx - 1;
4414 dispatch_shared_info_t *sh_buf =
4415 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4416 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4417 (kmp_int64)&sh_buf->doacross_num_done);
4418 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4419 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4420 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4421 sh_buf->doacross_flags = NULL;
4422 sh_buf->doacross_num_done = 0;
4423 sh_buf->doacross_buf_idx +=
4424 __kmp_dispatch_num_buffers; // free buffer for future re-use
4425 }
4426 // free private resources (need to keep buffer index forever)
4427 pr_buf->th_doacross_flags = NULL;
4428 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4429 pr_buf->th_doacross_info = NULL;
4430 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4431}
4432
4433/* OpenMP 5.1 Memory Management routines */
4434void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4435 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4436}
4437
4438void *omp_aligned_alloc(size_t align, size_t size,
4439 omp_allocator_handle_t allocator) {
4440 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4441}
4442
4443void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4444 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4445}
4446
4447void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4448 omp_allocator_handle_t allocator) {
4449 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4450}
4451
4452void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4453 omp_allocator_handle_t free_allocator) {
4454 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4455 free_allocator);
4456}
4457
4458void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4459 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4460}
4461/* end of OpenMP 5.1 Memory Management routines */
4462
4463int __kmpc_get_target_offload(void) {
4464 if (!__kmp_init_serial) {
4465 __kmp_serial_initialize();
4466 }
4467 return __kmp_target_offload;
4468}
4469
4470int __kmpc_pause_resource(kmp_pause_status_t level) {
4471 if (!__kmp_init_serial) {
4472 return 1; // Can't pause if runtime is not initialized
4473 }
4474 return __kmp_pause_resource(level);
4475}
4476
4477void __kmpc_error(ident_t *loc, int severity, const char *message) {
4478 if (!__kmp_init_serial)
4479 __kmp_serial_initialize();
4480
4481 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4482
4483#if OMPT_SUPPORT
4484 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4485 ompt_callbacks.ompt_callback(ompt_callback_error)(
4486 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4487 OMPT_GET_RETURN_ADDRESS(0));
4488 }
4489#endif // OMPT_SUPPORT
4490
4491 char *src_loc;
4492 if (loc && loc->psource) {
4493 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4494 src_loc =
4495 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4496 __kmp_str_loc_free(&str_loc);
4497 } else {
4498 src_loc = __kmp_str_format("unknown");
4499 }
4500
4501 if (severity == severity_warning)
4502 KMP_WARNING(UserDirectedWarning, src_loc, message);
4503 else
4504 KMP_FATAL(UserDirectedError, src_loc, message);
4505
4506 __kmp_str_free(&src_loc);
4507}
4508
4509// Mark begin of scope directive.
4510void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4511// reserved is for extension of scope directive and not used.
4512#if OMPT_SUPPORT && OMPT_OPTIONAL
4513 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4514 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4515 int tid = __kmp_tid_from_gtid(gtid);
4516 ompt_callbacks.ompt_callback(ompt_callback_work)(
4517 ompt_work_scope, ompt_scope_begin,
4518 &(team->t.ompt_team_info.parallel_data),
4519 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4520 OMPT_GET_RETURN_ADDRESS(0));
4521 }
4522#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4523}
4524
4525// Mark end of scope directive
4526void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4527// reserved is for extension of scope directive and not used.
4528#if OMPT_SUPPORT && OMPT_OPTIONAL
4529 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4530 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4531 int tid = __kmp_tid_from_gtid(gtid);
4532 ompt_callbacks.ompt_callback(ompt_callback_work)(
4533 ompt_work_scope, ompt_scope_end,
4534 &(team->t.ompt_team_info.parallel_data),
4535 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4536 OMPT_GET_RETURN_ADDRESS(0));
4537 }
4538#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4539}
4540
4541#ifdef KMP_USE_VERSION_SYMBOLS
4542// For GOMP compatibility there are two versions of each omp_* API.
4543// One is the plain C symbol and one is the Fortran symbol with an appended
4544// underscore. When we implement a specific ompc_* version of an omp_*
4545// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4546// instead of the Fortran versions in kmp_ftn_entry.h
4547extern "C" {
4548// Have to undef these from omp.h so they aren't translated into
4549// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4550#ifdef omp_set_affinity_format
4551#undef omp_set_affinity_format
4552#endif
4553#ifdef omp_get_affinity_format
4554#undef omp_get_affinity_format
4555#endif
4556#ifdef omp_display_affinity
4557#undef omp_display_affinity
4558#endif
4559#ifdef omp_capture_affinity
4560#undef omp_capture_affinity
4561#endif
4562KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4563 "OMP_5.0");
4564KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4565 "OMP_5.0");
4566KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4567 "OMP_5.0");
4568KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4569 "OMP_5.0");
4570} // extern "C"
4571#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:227
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:229
@ KMP_IDENT_AUTOPAR
Definition kmp.h:212
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:231
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1743
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:247
char const * psource
Definition kmp.h:257
kmp_int32 flags
Definition kmp.h:249