LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43  } else if (__kmp_ignore_mppbeg() == FALSE) {
44  // By default __kmp_ignore_mppbeg() returns TRUE.
45  __kmp_internal_begin();
46  KC_TRACE(10, ("__kmpc_begin: called\n"));
47  }
48 }
49 
58 void __kmpc_end(ident_t *loc) {
59  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62  // returns FALSE and __kmpc_end() will unregister this root (it can cause
63  // library shut down).
64  if (__kmp_ignore_mppend() == FALSE) {
65  KC_TRACE(10, ("__kmpc_end: called\n"));
66  KA_TRACE(30, ("__kmpc_end\n"));
67 
68  __kmp_internal_end_thread(-1);
69  }
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
71  // Normal exit process on Windows does not allow worker threads of the final
72  // parallel region to finish reporting their events, so shutting down the
73  // library here fixes the issue at least for the cases where __kmpc_end() is
74  // placed properly.
75  if (ompt_enabled.enabled)
76  __kmp_internal_end_library(__kmp_gtid_get_specific());
77 #endif
78 }
79 
99  kmp_int32 gtid = __kmp_entry_gtid();
100 
101  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
102 
103  return gtid;
104 }
105 
121  KC_TRACE(10,
122  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
123 
124  return TCR_4(__kmp_all_nth);
125 }
126 
134  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135  return __kmp_tid_from_gtid(__kmp_entry_gtid());
136 }
137 
144  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
145 
146  return __kmp_entry_thread()->th.th_team->t.t_nproc;
147 }
148 
155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
156 #ifndef KMP_DEBUG
157 
158  return TRUE;
159 
160 #else
161 
162  const char *semi2;
163  const char *semi3;
164  int line_no;
165 
166  if (__kmp_par_range == 0) {
167  return TRUE;
168  }
169  semi2 = loc->psource;
170  if (semi2 == NULL) {
171  return TRUE;
172  }
173  semi2 = strchr(semi2, ';');
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2 + 1, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  if (__kmp_par_range_filename[0]) {
182  const char *name = semi2 - 1;
183  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
184  name--;
185  }
186  if ((*name == '/') || (*name == ';')) {
187  name++;
188  }
189  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190  return __kmp_par_range < 0;
191  }
192  }
193  semi3 = strchr(semi2 + 1, ';');
194  if (__kmp_par_range_routine[0]) {
195  if ((semi3 != NULL) && (semi3 > semi2) &&
196  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197  return __kmp_par_range < 0;
198  }
199  }
200  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
201  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202  return __kmp_par_range > 0;
203  }
204  return __kmp_par_range < 0;
205  }
206  return TRUE;
207 
208 #endif /* KMP_DEBUG */
209 }
210 
217 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
218  return __kmp_entry_thread()->th.th_root->r.r_active;
219 }
220 
230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
231  kmp_int32 num_threads) {
232  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233  global_tid, num_threads));
234  __kmp_assert_valid_gtid(global_tid);
235  __kmp_push_num_threads(loc, global_tid, num_threads);
236 }
237 
238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
239  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
240  /* the num_threads are automatically popped */
241 }
242 
243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
244  kmp_int32 proc_bind) {
245  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
246  proc_bind));
247  __kmp_assert_valid_gtid(global_tid);
248  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
249 }
250 
261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262  int gtid = __kmp_entry_gtid();
263 
264 #if (KMP_STATS_ENABLED)
265  // If we were in a serial region, then stop the serial timer, record
266  // the event, and start parallel region timer
267  stats_state_e previous_state = KMP_GET_THREAD_STATE();
268  if (previous_state == stats_state_e::SERIAL_REGION) {
269  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
270  } else {
271  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
272  }
273  int inParallel = __kmpc_in_parallel(loc);
274  if (inParallel) {
275  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
276  } else {
277  KMP_COUNT_BLOCK(OMP_PARALLEL);
278  }
279 #endif
280 
281  // maybe to save thr_state is enough here
282  {
283  va_list ap;
284  va_start(ap, microtask);
285 
286 #if OMPT_SUPPORT
287  ompt_frame_t *ompt_frame;
288  if (ompt_enabled.enabled) {
289  kmp_info_t *master_th = __kmp_threads[gtid];
290  kmp_team_t *parent_team = master_th->th.th_team;
291  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
292  if (lwt)
293  ompt_frame = &(lwt->ompt_task_info.frame);
294  else {
295  int tid = __kmp_tid_from_gtid(gtid);
296  ompt_frame = &(
297  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
298  }
299  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
300  OMPT_STORE_RETURN_ADDRESS(gtid);
301  }
302 #endif
303 
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_FORKING();
306 #endif
307  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
308  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
309  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
310  kmp_va_addr_of(ap));
311 #if INCLUDE_SSC_MARKS
312  SSC_MARK_JOINING();
313 #endif
314  __kmp_join_call(loc, gtid
315 #if OMPT_SUPPORT
316  ,
317  fork_context_intel
318 #endif
319  );
320 
321  va_end(ap);
322  }
323 
324 #if KMP_STATS_ENABLED
325  if (previous_state == stats_state_e::SERIAL_REGION) {
326  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345  kmp_int32 num_teams, kmp_int32 num_threads) {
346  KA_TRACE(20,
347  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348  global_tid, num_teams, num_threads));
349  __kmp_assert_valid_gtid(global_tid);
350  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351 }
352 
363 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
364  ...) {
365  int gtid = __kmp_entry_gtid();
366  kmp_info_t *this_thr = __kmp_threads[gtid];
367  va_list ap;
368  va_start(ap, microtask);
369 
370 #if KMP_STATS_ENABLED
371  KMP_COUNT_BLOCK(OMP_TEAMS);
372  stats_state_e previous_state = KMP_GET_THREAD_STATE();
373  if (previous_state == stats_state_e::SERIAL_REGION) {
374  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
375  } else {
376  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
377  }
378 #endif
379 
380  // remember teams entry point and nesting level
381  this_thr->th.th_teams_microtask = microtask;
382  this_thr->th.th_teams_level =
383  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
384 
385 #if OMPT_SUPPORT
386  kmp_team_t *parent_team = this_thr->th.th_team;
387  int tid = __kmp_tid_from_gtid(gtid);
388  if (ompt_enabled.enabled) {
389  parent_team->t.t_implicit_task_taskdata[tid]
390  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
391  }
392  OMPT_STORE_RETURN_ADDRESS(gtid);
393 #endif
394 
395  // check if __kmpc_push_num_teams called, set default number of teams
396  // otherwise
397  if (this_thr->th.th_teams_size.nteams == 0) {
398  __kmp_push_num_teams(loc, gtid, 0, 0);
399  }
400  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
401  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
402  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
403 
404  __kmp_fork_call(
405  loc, gtid, fork_context_intel, argc,
406  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
407  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
408  __kmp_join_call(loc, gtid
409 #if OMPT_SUPPORT
410  ,
411  fork_context_intel
412 #endif
413  );
414 
415  // Pop current CG root off list
416  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
417  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
418  this_thr->th.th_cg_roots = tmp->up;
419  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
420  " to node %p. cg_nthreads was %d\n",
421  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
422  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
423  int i = tmp->cg_nthreads--;
424  if (i == 1) { // check is we are the last thread in CG (not always the case)
425  __kmp_free(tmp);
426  }
427  // Restore current task's thread_limit from CG root
428  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
429  this_thr->th.th_current_task->td_icvs.thread_limit =
430  this_thr->th.th_cg_roots->cg_thread_limit;
431 
432  this_thr->th.th_teams_microtask = NULL;
433  this_thr->th.th_teams_level = 0;
434  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
435  va_end(ap);
436 #if KMP_STATS_ENABLED
437  if (previous_state == stats_state_e::SERIAL_REGION) {
438  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
439  } else {
440  KMP_POP_PARTITIONED_TIMER();
441  }
442 #endif // KMP_STATS_ENABLED
443 }
444 
445 // I don't think this function should ever have been exported.
446 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
447 // openmp code ever called it, but it's been exported from the RTL for so
448 // long that I'm afraid to remove the definition.
449 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
450 
463 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
464  // The implementation is now in kmp_runtime.cpp so that it can share static
465  // functions with kmp_fork_call since the tasks to be done are similar in
466  // each case.
467  __kmp_assert_valid_gtid(global_tid);
468 #if OMPT_SUPPORT
469  OMPT_STORE_RETURN_ADDRESS(global_tid);
470 #endif
471  __kmp_serialized_parallel(loc, global_tid);
472 }
473 
481 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
482  kmp_internal_control_t *top;
483  kmp_info_t *this_thr;
484  kmp_team_t *serial_team;
485 
486  KC_TRACE(10,
487  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
488 
489  /* skip all this code for autopar serialized loops since it results in
490  unacceptable overhead */
491  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
492  return;
493 
494  // Not autopar code
495  __kmp_assert_valid_gtid(global_tid);
496  if (!TCR_4(__kmp_init_parallel))
497  __kmp_parallel_initialize();
498 
499  __kmp_resume_if_soft_paused();
500 
501  this_thr = __kmp_threads[global_tid];
502  serial_team = this_thr->th.th_serial_team;
503 
504  kmp_task_team_t *task_team = this_thr->th.th_task_team;
505  // we need to wait for the proxy tasks before finishing the thread
506  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
507  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
508 
509  KMP_MB();
510  KMP_DEBUG_ASSERT(serial_team);
511  KMP_ASSERT(serial_team->t.t_serialized);
512  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
513  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
514  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
515  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
516 
517 #if OMPT_SUPPORT
518  if (ompt_enabled.enabled &&
519  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
520  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
521  if (ompt_enabled.ompt_callback_implicit_task) {
522  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
523  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
524  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
525  }
526 
527  // reset clear the task id only after unlinking the task
528  ompt_data_t *parent_task_data;
529  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
530 
531  if (ompt_enabled.ompt_callback_parallel_end) {
532  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
533  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
534  ompt_parallel_invoker_program | ompt_parallel_team,
535  OMPT_LOAD_RETURN_ADDRESS(global_tid));
536  }
537  __ompt_lw_taskteam_unlink(this_thr);
538  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
539  }
540 #endif
541 
542  /* If necessary, pop the internal control stack values and replace the team
543  * values */
544  top = serial_team->t.t_control_stack_top;
545  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
546  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
547  serial_team->t.t_control_stack_top = top->next;
548  __kmp_free(top);
549  }
550 
551  // if( serial_team -> t.t_serialized > 1 )
552  serial_team->t.t_level--;
553 
554  /* pop dispatch buffers stack */
555  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
556  {
557  dispatch_private_info_t *disp_buffer =
558  serial_team->t.t_dispatch->th_disp_buffer;
559  serial_team->t.t_dispatch->th_disp_buffer =
560  serial_team->t.t_dispatch->th_disp_buffer->next;
561  __kmp_free(disp_buffer);
562  }
563  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
564 
565  --serial_team->t.t_serialized;
566  if (serial_team->t.t_serialized == 0) {
567 
568 /* return to the parallel section */
569 
570 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
571  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
572  __kmp_clear_x87_fpu_status_word();
573  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
574  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
575  }
576 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
577 
578  this_thr->th.th_team = serial_team->t.t_parent;
579  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
580 
581  /* restore values cached in the thread */
582  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
583  this_thr->th.th_team_master =
584  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
585  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
586 
587  /* TODO the below shouldn't need to be adjusted for serialized teams */
588  this_thr->th.th_dispatch =
589  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
590 
591  __kmp_pop_current_task_from_thread(this_thr);
592 
593  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
594  this_thr->th.th_current_task->td_flags.executing = 1;
595 
596  if (__kmp_tasking_mode != tskm_immediate_exec) {
597  // Copy the task team from the new child / old parent team to the thread.
598  this_thr->th.th_task_team =
599  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
600  KA_TRACE(20,
601  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
602  "team %p\n",
603  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
604  }
605  } else {
606  if (__kmp_tasking_mode != tskm_immediate_exec) {
607  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
608  "depth of serial team %p to %d\n",
609  global_tid, serial_team, serial_team->t.t_serialized));
610  }
611  }
612 
613  if (__kmp_env_consistency_check)
614  __kmp_pop_parallel(global_tid, NULL);
615 #if OMPT_SUPPORT
616  if (ompt_enabled.enabled)
617  this_thr->th.ompt_thread_info.state =
618  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
619  : ompt_state_work_parallel);
620 #endif
621 }
622 
631 void __kmpc_flush(ident_t *loc) {
632  KC_TRACE(10, ("__kmpc_flush: called\n"));
633 
634  /* need explicit __mf() here since use volatile instead in library */
635  KMP_MB(); /* Flush all pending memory write invalidates. */
636 
637 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
638 #if KMP_MIC
639 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
640 // We shouldn't need it, though, since the ABI rules require that
641 // * If the compiler generates NGO stores it also generates the fence
642 // * If users hand-code NGO stores they should insert the fence
643 // therefore no incomplete unordered stores should be visible.
644 #else
645  // C74404
646  // This is to address non-temporal store instructions (sfence needed).
647  // The clflush instruction is addressed either (mfence needed).
648  // Probably the non-temporal load monvtdqa instruction should also be
649  // addressed.
650  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651  if (!__kmp_cpuinfo.initialized) {
652  __kmp_query_cpuid(&__kmp_cpuinfo);
653  }
654  if (!__kmp_cpuinfo.sse2) {
655  // CPU cannot execute SSE2 instructions.
656  } else {
657 #if KMP_COMPILER_ICC
658  _mm_mfence();
659 #elif KMP_COMPILER_MSVC
660  MemoryBarrier();
661 #else
662  __sync_synchronize();
663 #endif // KMP_COMPILER_ICC
664  }
665 #endif // KMP_MIC
666 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
667  KMP_ARCH_RISCV64)
668 // Nothing to see here move along
669 #elif KMP_ARCH_PPC64
670 // Nothing needed here (we have a real MB above).
671 #else
672 #error Unknown or unsupported architecture
673 #endif
674 
675 #if OMPT_SUPPORT && OMPT_OPTIONAL
676  if (ompt_enabled.ompt_callback_flush) {
677  ompt_callbacks.ompt_callback(ompt_callback_flush)(
678  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
679  }
680 #endif
681 }
682 
683 /* -------------------------------------------------------------------------- */
691 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
692  KMP_COUNT_BLOCK(OMP_BARRIER);
693  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
694  __kmp_assert_valid_gtid(global_tid);
695 
696  if (!TCR_4(__kmp_init_parallel))
697  __kmp_parallel_initialize();
698 
699  __kmp_resume_if_soft_paused();
700 
701  if (__kmp_env_consistency_check) {
702  if (loc == 0) {
703  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
704  }
705  __kmp_check_barrier(global_tid, ct_barrier, loc);
706  }
707 
708 #if OMPT_SUPPORT
709  ompt_frame_t *ompt_frame;
710  if (ompt_enabled.enabled) {
711  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
712  if (ompt_frame->enter_frame.ptr == NULL)
713  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
714  OMPT_STORE_RETURN_ADDRESS(global_tid);
715  }
716 #endif
717  __kmp_threads[global_tid]->th.th_ident = loc;
718  // TODO: explicit barrier_wait_id:
719  // this function is called when 'barrier' directive is present or
720  // implicit barrier at the end of a worksharing construct.
721  // 1) better to add a per-thread barrier counter to a thread data structure
722  // 2) set to 0 when a new team is created
723  // 4) no sync is required
724 
725  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
726 #if OMPT_SUPPORT && OMPT_OPTIONAL
727  if (ompt_enabled.enabled) {
728  ompt_frame->enter_frame = ompt_data_none;
729  }
730 #endif
731 }
732 
733 /* The BARRIER for a MASTER section is always explicit */
740 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
741  int status = 0;
742 
743  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
744  __kmp_assert_valid_gtid(global_tid);
745 
746  if (!TCR_4(__kmp_init_parallel))
747  __kmp_parallel_initialize();
748 
749  __kmp_resume_if_soft_paused();
750 
751  if (KMP_MASTER_GTID(global_tid)) {
752  KMP_COUNT_BLOCK(OMP_MASTER);
753  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
754  status = 1;
755  }
756 
757 #if OMPT_SUPPORT && OMPT_OPTIONAL
758  if (status) {
759  if (ompt_enabled.ompt_callback_master) {
760  kmp_info_t *this_thr = __kmp_threads[global_tid];
761  kmp_team_t *team = this_thr->th.th_team;
762 
763  int tid = __kmp_tid_from_gtid(global_tid);
764  ompt_callbacks.ompt_callback(ompt_callback_master)(
765  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
766  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
767  OMPT_GET_RETURN_ADDRESS(0));
768  }
769  }
770 #endif
771 
772  if (__kmp_env_consistency_check) {
773 #if KMP_USE_DYNAMIC_LOCK
774  if (status)
775  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
776  else
777  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
778 #else
779  if (status)
780  __kmp_push_sync(global_tid, ct_master, loc, NULL);
781  else
782  __kmp_check_sync(global_tid, ct_master, loc, NULL);
783 #endif
784  }
785 
786  return status;
787 }
788 
797 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
798  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
799  __kmp_assert_valid_gtid(global_tid);
800  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
801  KMP_POP_PARTITIONED_TIMER();
802 
803 #if OMPT_SUPPORT && OMPT_OPTIONAL
804  kmp_info_t *this_thr = __kmp_threads[global_tid];
805  kmp_team_t *team = this_thr->th.th_team;
806  if (ompt_enabled.ompt_callback_master) {
807  int tid = __kmp_tid_from_gtid(global_tid);
808  ompt_callbacks.ompt_callback(ompt_callback_master)(
809  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
810  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
811  OMPT_GET_RETURN_ADDRESS(0));
812  }
813 #endif
814 
815  if (__kmp_env_consistency_check) {
816  if (KMP_MASTER_GTID(global_tid))
817  __kmp_pop_sync(global_tid, ct_master, loc);
818  }
819 }
820 
828 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
829  int cid = 0;
830  kmp_info_t *th;
831  KMP_DEBUG_ASSERT(__kmp_init_serial);
832 
833  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
834  __kmp_assert_valid_gtid(gtid);
835 
836  if (!TCR_4(__kmp_init_parallel))
837  __kmp_parallel_initialize();
838 
839  __kmp_resume_if_soft_paused();
840 
841 #if USE_ITT_BUILD
842  __kmp_itt_ordered_prep(gtid);
843 // TODO: ordered_wait_id
844 #endif /* USE_ITT_BUILD */
845 
846  th = __kmp_threads[gtid];
847 
848 #if OMPT_SUPPORT && OMPT_OPTIONAL
849  kmp_team_t *team;
850  ompt_wait_id_t lck;
851  void *codeptr_ra;
852  if (ompt_enabled.enabled) {
853  OMPT_STORE_RETURN_ADDRESS(gtid);
854  team = __kmp_team_from_gtid(gtid);
855  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
856  /* OMPT state update */
857  th->th.ompt_thread_info.wait_id = lck;
858  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
859 
860  /* OMPT event callback */
861  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
862  if (ompt_enabled.ompt_callback_mutex_acquire) {
863  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
864  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
865  codeptr_ra);
866  }
867  }
868 #endif
869 
870  if (th->th.th_dispatch->th_deo_fcn != 0)
871  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
872  else
873  __kmp_parallel_deo(&gtid, &cid, loc);
874 
875 #if OMPT_SUPPORT && OMPT_OPTIONAL
876  if (ompt_enabled.enabled) {
877  /* OMPT state update */
878  th->th.ompt_thread_info.state = ompt_state_work_parallel;
879  th->th.ompt_thread_info.wait_id = 0;
880 
881  /* OMPT event callback */
882  if (ompt_enabled.ompt_callback_mutex_acquired) {
883  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
884  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
885  }
886  }
887 #endif
888 
889 #if USE_ITT_BUILD
890  __kmp_itt_ordered_start(gtid);
891 #endif /* USE_ITT_BUILD */
892 }
893 
901 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
902  int cid = 0;
903  kmp_info_t *th;
904 
905  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
906  __kmp_assert_valid_gtid(gtid);
907 
908 #if USE_ITT_BUILD
909  __kmp_itt_ordered_end(gtid);
910 // TODO: ordered_wait_id
911 #endif /* USE_ITT_BUILD */
912 
913  th = __kmp_threads[gtid];
914 
915  if (th->th.th_dispatch->th_dxo_fcn != 0)
916  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
917  else
918  __kmp_parallel_dxo(&gtid, &cid, loc);
919 
920 #if OMPT_SUPPORT && OMPT_OPTIONAL
921  OMPT_STORE_RETURN_ADDRESS(gtid);
922  if (ompt_enabled.ompt_callback_mutex_released) {
923  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
924  ompt_mutex_ordered,
925  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
926  ->t.t_ordered.dt.t_value,
927  OMPT_LOAD_RETURN_ADDRESS(gtid));
928  }
929 #endif
930 }
931 
932 #if KMP_USE_DYNAMIC_LOCK
933 
934 static __forceinline void
935 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
936  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
937  // Pointer to the allocated indirect lock is written to crit, while indexing
938  // is ignored.
939  void *idx;
940  kmp_indirect_lock_t **lck;
941  lck = (kmp_indirect_lock_t **)crit;
942  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
943  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
944  KMP_SET_I_LOCK_LOCATION(ilk, loc);
945  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
946  KA_TRACE(20,
947  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
948 #if USE_ITT_BUILD
949  __kmp_itt_critical_creating(ilk->lock, loc);
950 #endif
951  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
952  if (status == 0) {
953 #if USE_ITT_BUILD
954  __kmp_itt_critical_destroyed(ilk->lock);
955 #endif
956  // We don't really need to destroy the unclaimed lock here since it will be
957  // cleaned up at program exit.
958  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
959  }
960  KMP_DEBUG_ASSERT(*lck != NULL);
961 }
962 
963 // Fast-path acquire tas lock
964 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
965  { \
966  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
967  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
968  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
969  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
970  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
971  kmp_uint32 spins; \
972  KMP_FSYNC_PREPARE(l); \
973  KMP_INIT_YIELD(spins); \
974  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
975  do { \
976  if (TCR_4(__kmp_nth) > \
977  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
978  KMP_YIELD(TRUE); \
979  } else { \
980  KMP_YIELD_SPIN(spins); \
981  } \
982  __kmp_spin_backoff(&backoff); \
983  } while ( \
984  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
985  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
986  } \
987  KMP_FSYNC_ACQUIRED(l); \
988  }
989 
990 // Fast-path test tas lock
991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
992  { \
993  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
994  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
995  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
996  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
997  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
998  }
999 
1000 // Fast-path release tas lock
1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1002  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1003 
1004 #if KMP_USE_FUTEX
1005 
1006 #include <sys/syscall.h>
1007 #include <unistd.h>
1008 #ifndef FUTEX_WAIT
1009 #define FUTEX_WAIT 0
1010 #endif
1011 #ifndef FUTEX_WAKE
1012 #define FUTEX_WAKE 1
1013 #endif
1014 
1015 // Fast-path acquire futex lock
1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1017  { \
1018  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019  kmp_int32 gtid_code = (gtid + 1) << 1; \
1020  KMP_MB(); \
1021  KMP_FSYNC_PREPARE(ftx); \
1022  kmp_int32 poll_val; \
1023  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1024  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1025  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1026  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1027  if (!cond) { \
1028  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1029  poll_val | \
1030  KMP_LOCK_BUSY(1, futex))) { \
1031  continue; \
1032  } \
1033  poll_val |= KMP_LOCK_BUSY(1, futex); \
1034  } \
1035  kmp_int32 rc; \
1036  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1037  NULL, NULL, 0)) != 0) { \
1038  continue; \
1039  } \
1040  gtid_code |= 1; \
1041  } \
1042  KMP_FSYNC_ACQUIRED(ftx); \
1043  }
1044 
1045 // Fast-path test futex lock
1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1047  { \
1048  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1050  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1051  KMP_FSYNC_ACQUIRED(ftx); \
1052  rc = TRUE; \
1053  } else { \
1054  rc = FALSE; \
1055  } \
1056  }
1057 
1058 // Fast-path release futex lock
1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1060  { \
1061  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1062  KMP_MB(); \
1063  KMP_FSYNC_RELEASING(ftx); \
1064  kmp_int32 poll_val = \
1065  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1066  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1067  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1068  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1069  } \
1070  KMP_MB(); \
1071  KMP_YIELD_OVERSUB(); \
1072  }
1073 
1074 #endif // KMP_USE_FUTEX
1075 
1076 #else // KMP_USE_DYNAMIC_LOCK
1077 
1078 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1079  ident_t const *loc,
1080  kmp_int32 gtid) {
1081  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1082 
1083  // Because of the double-check, the following load doesn't need to be volatile
1084  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1085 
1086  if (lck == NULL) {
1087  void *idx;
1088 
1089  // Allocate & initialize the lock.
1090  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1091  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1092  __kmp_init_user_lock_with_checks(lck);
1093  __kmp_set_user_lock_location(lck, loc);
1094 #if USE_ITT_BUILD
1095  __kmp_itt_critical_creating(lck);
1096 // __kmp_itt_critical_creating() should be called *before* the first usage
1097 // of underlying lock. It is the only place where we can guarantee it. There
1098 // are chances the lock will destroyed with no usage, but it is not a
1099 // problem, because this is not real event seen by user but rather setting
1100 // name for object (lock). See more details in kmp_itt.h.
1101 #endif /* USE_ITT_BUILD */
1102 
1103  // Use a cmpxchg instruction to slam the start of the critical section with
1104  // the lock pointer. If another thread beat us to it, deallocate the lock,
1105  // and use the lock that the other thread allocated.
1106  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1107 
1108  if (status == 0) {
1109 // Deallocate the lock and reload the value.
1110 #if USE_ITT_BUILD
1111  __kmp_itt_critical_destroyed(lck);
1112 // Let ITT know the lock is destroyed and the same memory location may be reused
1113 // for another purpose.
1114 #endif /* USE_ITT_BUILD */
1115  __kmp_destroy_user_lock_with_checks(lck);
1116  __kmp_user_lock_free(&idx, gtid, lck);
1117  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1118  KMP_DEBUG_ASSERT(lck != NULL);
1119  }
1120  }
1121  return lck;
1122 }
1123 
1124 #endif // KMP_USE_DYNAMIC_LOCK
1125 
1136 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1137  kmp_critical_name *crit) {
1138 #if KMP_USE_DYNAMIC_LOCK
1139 #if OMPT_SUPPORT && OMPT_OPTIONAL
1140  OMPT_STORE_RETURN_ADDRESS(global_tid);
1141 #endif // OMPT_SUPPORT
1142  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1143 #else
1144  KMP_COUNT_BLOCK(OMP_CRITICAL);
1145 #if OMPT_SUPPORT && OMPT_OPTIONAL
1146  ompt_state_t prev_state = ompt_state_undefined;
1147  ompt_thread_info_t ti;
1148 #endif
1149  kmp_user_lock_p lck;
1150 
1151  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1152  __kmp_assert_valid_gtid(global_tid);
1153 
1154  // TODO: add THR_OVHD_STATE
1155 
1156  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1157  KMP_CHECK_USER_LOCK_INIT();
1158 
1159  if ((__kmp_user_lock_kind == lk_tas) &&
1160  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1161  lck = (kmp_user_lock_p)crit;
1162  }
1163 #if KMP_USE_FUTEX
1164  else if ((__kmp_user_lock_kind == lk_futex) &&
1165  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1166  lck = (kmp_user_lock_p)crit;
1167  }
1168 #endif
1169  else { // ticket, queuing or drdpa
1170  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1171  }
1172 
1173  if (__kmp_env_consistency_check)
1174  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1175 
1176 // since the critical directive binds to all threads, not just the current
1177 // team we have to check this even if we are in a serialized team.
1178 // also, even if we are the uber thread, we still have to conduct the lock,
1179 // as we have to contend with sibling threads.
1180 
1181 #if USE_ITT_BUILD
1182  __kmp_itt_critical_acquiring(lck);
1183 #endif /* USE_ITT_BUILD */
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL
1185  OMPT_STORE_RETURN_ADDRESS(gtid);
1186  void *codeptr_ra = NULL;
1187  if (ompt_enabled.enabled) {
1188  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1189  /* OMPT state update */
1190  prev_state = ti.state;
1191  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1192  ti.state = ompt_state_wait_critical;
1193 
1194  /* OMPT event callback */
1195  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1196  if (ompt_enabled.ompt_callback_mutex_acquire) {
1197  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1198  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1199  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1200  }
1201  }
1202 #endif
1203  // Value of 'crit' should be good for using as a critical_id of the critical
1204  // section directive.
1205  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1206 
1207 #if USE_ITT_BUILD
1208  __kmp_itt_critical_acquired(lck);
1209 #endif /* USE_ITT_BUILD */
1210 #if OMPT_SUPPORT && OMPT_OPTIONAL
1211  if (ompt_enabled.enabled) {
1212  /* OMPT state update */
1213  ti.state = prev_state;
1214  ti.wait_id = 0;
1215 
1216  /* OMPT event callback */
1217  if (ompt_enabled.ompt_callback_mutex_acquired) {
1218  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1219  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1220  }
1221  }
1222 #endif
1223  KMP_POP_PARTITIONED_TIMER();
1224 
1225  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1226  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1227 #endif // KMP_USE_DYNAMIC_LOCK
1228 }
1229 
1230 #if KMP_USE_DYNAMIC_LOCK
1231 
1232 // Converts the given hint to an internal lock implementation
1233 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1234 #if KMP_USE_TSX
1235 #define KMP_TSX_LOCK(seq) lockseq_##seq
1236 #else
1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1238 #endif
1239 
1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1242 #else
1243 #define KMP_CPUINFO_RTM 0
1244 #endif
1245 
1246  // Hints that do not require further logic
1247  if (hint & kmp_lock_hint_hle)
1248  return KMP_TSX_LOCK(hle);
1249  if (hint & kmp_lock_hint_rtm)
1250  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1251  if (hint & kmp_lock_hint_adaptive)
1252  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1253 
1254  // Rule out conflicting hints first by returning the default lock
1255  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1256  return __kmp_user_lock_seq;
1257  if ((hint & omp_lock_hint_speculative) &&
1258  (hint & omp_lock_hint_nonspeculative))
1259  return __kmp_user_lock_seq;
1260 
1261  // Do not even consider speculation when it appears to be contended
1262  if (hint & omp_lock_hint_contended)
1263  return lockseq_queuing;
1264 
1265  // Uncontended lock without speculation
1266  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1267  return lockseq_tas;
1268 
1269  // HLE lock for speculation
1270  if (hint & omp_lock_hint_speculative)
1271  return KMP_TSX_LOCK(hle);
1272 
1273  return __kmp_user_lock_seq;
1274 }
1275 
1276 #if OMPT_SUPPORT && OMPT_OPTIONAL
1277 #if KMP_USE_DYNAMIC_LOCK
1278 static kmp_mutex_impl_t
1279 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1280  if (user_lock) {
1281  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1282  case 0:
1283  break;
1284 #if KMP_USE_FUTEX
1285  case locktag_futex:
1286  return kmp_mutex_impl_queuing;
1287 #endif
1288  case locktag_tas:
1289  return kmp_mutex_impl_spin;
1290 #if KMP_USE_TSX
1291  case locktag_hle:
1292  return kmp_mutex_impl_speculative;
1293 #endif
1294  default:
1295  return kmp_mutex_impl_none;
1296  }
1297  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1298  }
1299  KMP_ASSERT(ilock);
1300  switch (ilock->type) {
1301 #if KMP_USE_TSX
1302  case locktag_adaptive:
1303  case locktag_rtm:
1304  return kmp_mutex_impl_speculative;
1305 #endif
1306  case locktag_nested_tas:
1307  return kmp_mutex_impl_spin;
1308 #if KMP_USE_FUTEX
1309  case locktag_nested_futex:
1310 #endif
1311  case locktag_ticket:
1312  case locktag_queuing:
1313  case locktag_drdpa:
1314  case locktag_nested_ticket:
1315  case locktag_nested_queuing:
1316  case locktag_nested_drdpa:
1317  return kmp_mutex_impl_queuing;
1318  default:
1319  return kmp_mutex_impl_none;
1320  }
1321 }
1322 #else
1323 // For locks without dynamic binding
1324 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1325  switch (__kmp_user_lock_kind) {
1326  case lk_tas:
1327  return kmp_mutex_impl_spin;
1328 #if KMP_USE_FUTEX
1329  case lk_futex:
1330 #endif
1331  case lk_ticket:
1332  case lk_queuing:
1333  case lk_drdpa:
1334  return kmp_mutex_impl_queuing;
1335 #if KMP_USE_TSX
1336  case lk_hle:
1337  case lk_rtm:
1338  case lk_adaptive:
1339  return kmp_mutex_impl_speculative;
1340 #endif
1341  default:
1342  return kmp_mutex_impl_none;
1343  }
1344 }
1345 #endif // KMP_USE_DYNAMIC_LOCK
1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1347 
1361 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1362  kmp_critical_name *crit, uint32_t hint) {
1363  KMP_COUNT_BLOCK(OMP_CRITICAL);
1364  kmp_user_lock_p lck;
1365 #if OMPT_SUPPORT && OMPT_OPTIONAL
1366  ompt_state_t prev_state = ompt_state_undefined;
1367  ompt_thread_info_t ti;
1368  // This is the case, if called from __kmpc_critical:
1369  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1370  if (!codeptr)
1371  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1372 #endif
1373 
1374  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1375  __kmp_assert_valid_gtid(global_tid);
1376 
1377  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1378  // Check if it is initialized.
1379  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1380  if (*lk == 0) {
1381  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1382  if (KMP_IS_D_LOCK(lckseq)) {
1383  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1384  KMP_GET_D_TAG(lckseq));
1385  } else {
1386  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1387  }
1388  }
1389  // Branch for accessing the actual lock object and set operation. This
1390  // branching is inevitable since this lock initialization does not follow the
1391  // normal dispatch path (lock table is not used).
1392  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1393  lck = (kmp_user_lock_p)lk;
1394  if (__kmp_env_consistency_check) {
1395  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1396  __kmp_map_hint_to_lock(hint));
1397  }
1398 #if USE_ITT_BUILD
1399  __kmp_itt_critical_acquiring(lck);
1400 #endif
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL
1402  if (ompt_enabled.enabled) {
1403  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1404  /* OMPT state update */
1405  prev_state = ti.state;
1406  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1407  ti.state = ompt_state_wait_critical;
1408 
1409  /* OMPT event callback */
1410  if (ompt_enabled.ompt_callback_mutex_acquire) {
1411  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1412  ompt_mutex_critical, (unsigned int)hint,
1413  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1414  codeptr);
1415  }
1416  }
1417 #endif
1418 #if KMP_USE_INLINED_TAS
1419  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1420  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1421  } else
1422 #elif KMP_USE_INLINED_FUTEX
1423  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1424  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1425  } else
1426 #endif
1427  {
1428  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1429  }
1430  } else {
1431  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1432  lck = ilk->lock;
1433  if (__kmp_env_consistency_check) {
1434  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1435  __kmp_map_hint_to_lock(hint));
1436  }
1437 #if USE_ITT_BUILD
1438  __kmp_itt_critical_acquiring(lck);
1439 #endif
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL
1441  if (ompt_enabled.enabled) {
1442  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1443  /* OMPT state update */
1444  prev_state = ti.state;
1445  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1446  ti.state = ompt_state_wait_critical;
1447 
1448  /* OMPT event callback */
1449  if (ompt_enabled.ompt_callback_mutex_acquire) {
1450  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1451  ompt_mutex_critical, (unsigned int)hint,
1452  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1453  codeptr);
1454  }
1455  }
1456 #endif
1457  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1458  }
1459  KMP_POP_PARTITIONED_TIMER();
1460 
1461 #if USE_ITT_BUILD
1462  __kmp_itt_critical_acquired(lck);
1463 #endif /* USE_ITT_BUILD */
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL
1465  if (ompt_enabled.enabled) {
1466  /* OMPT state update */
1467  ti.state = prev_state;
1468  ti.wait_id = 0;
1469 
1470  /* OMPT event callback */
1471  if (ompt_enabled.ompt_callback_mutex_acquired) {
1472  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1473  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1474  }
1475  }
1476 #endif
1477 
1478  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1479  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1480 } // __kmpc_critical_with_hint
1481 
1482 #endif // KMP_USE_DYNAMIC_LOCK
1483 
1493 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1494  kmp_critical_name *crit) {
1495  kmp_user_lock_p lck;
1496 
1497  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1498 
1499 #if KMP_USE_DYNAMIC_LOCK
1500  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1501  lck = (kmp_user_lock_p)crit;
1502  KMP_ASSERT(lck != NULL);
1503  if (__kmp_env_consistency_check) {
1504  __kmp_pop_sync(global_tid, ct_critical, loc);
1505  }
1506 #if USE_ITT_BUILD
1507  __kmp_itt_critical_releasing(lck);
1508 #endif
1509 #if KMP_USE_INLINED_TAS
1510  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1511  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1512  } else
1513 #elif KMP_USE_INLINED_FUTEX
1514  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1515  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1516  } else
1517 #endif
1518  {
1519  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1520  }
1521  } else {
1522  kmp_indirect_lock_t *ilk =
1523  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1524  KMP_ASSERT(ilk != NULL);
1525  lck = ilk->lock;
1526  if (__kmp_env_consistency_check) {
1527  __kmp_pop_sync(global_tid, ct_critical, loc);
1528  }
1529 #if USE_ITT_BUILD
1530  __kmp_itt_critical_releasing(lck);
1531 #endif
1532  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1533  }
1534 
1535 #else // KMP_USE_DYNAMIC_LOCK
1536 
1537  if ((__kmp_user_lock_kind == lk_tas) &&
1538  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1539  lck = (kmp_user_lock_p)crit;
1540  }
1541 #if KMP_USE_FUTEX
1542  else if ((__kmp_user_lock_kind == lk_futex) &&
1543  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1544  lck = (kmp_user_lock_p)crit;
1545  }
1546 #endif
1547  else { // ticket, queuing or drdpa
1548  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1549  }
1550 
1551  KMP_ASSERT(lck != NULL);
1552 
1553  if (__kmp_env_consistency_check)
1554  __kmp_pop_sync(global_tid, ct_critical, loc);
1555 
1556 #if USE_ITT_BUILD
1557  __kmp_itt_critical_releasing(lck);
1558 #endif /* USE_ITT_BUILD */
1559  // Value of 'crit' should be good for using as a critical_id of the critical
1560  // section directive.
1561  __kmp_release_user_lock_with_checks(lck, global_tid);
1562 
1563 #endif // KMP_USE_DYNAMIC_LOCK
1564 
1565 #if OMPT_SUPPORT && OMPT_OPTIONAL
1566  /* OMPT release event triggers after lock is released; place here to trigger
1567  * for all #if branches */
1568  OMPT_STORE_RETURN_ADDRESS(global_tid);
1569  if (ompt_enabled.ompt_callback_mutex_released) {
1570  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1571  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1572  OMPT_LOAD_RETURN_ADDRESS(0));
1573  }
1574 #endif
1575 
1576  KMP_POP_PARTITIONED_TIMER();
1577  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1578 }
1579 
1589 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1590  int status;
1591  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1592  __kmp_assert_valid_gtid(global_tid);
1593 
1594  if (!TCR_4(__kmp_init_parallel))
1595  __kmp_parallel_initialize();
1596 
1597  __kmp_resume_if_soft_paused();
1598 
1599  if (__kmp_env_consistency_check)
1600  __kmp_check_barrier(global_tid, ct_barrier, loc);
1601 
1602 #if OMPT_SUPPORT
1603  ompt_frame_t *ompt_frame;
1604  if (ompt_enabled.enabled) {
1605  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1606  if (ompt_frame->enter_frame.ptr == NULL)
1607  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1608  OMPT_STORE_RETURN_ADDRESS(global_tid);
1609  }
1610 #endif
1611 #if USE_ITT_NOTIFY
1612  __kmp_threads[global_tid]->th.th_ident = loc;
1613 #endif
1614  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1615 #if OMPT_SUPPORT && OMPT_OPTIONAL
1616  if (ompt_enabled.enabled) {
1617  ompt_frame->enter_frame = ompt_data_none;
1618  }
1619 #endif
1620 
1621  return (status != 0) ? 0 : 1;
1622 }
1623 
1633 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1634  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1635  __kmp_assert_valid_gtid(global_tid);
1636  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1637 }
1638 
1649 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1650  kmp_int32 ret;
1651  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1652  __kmp_assert_valid_gtid(global_tid);
1653 
1654  if (!TCR_4(__kmp_init_parallel))
1655  __kmp_parallel_initialize();
1656 
1657  __kmp_resume_if_soft_paused();
1658 
1659  if (__kmp_env_consistency_check) {
1660  if (loc == 0) {
1661  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1662  }
1663  __kmp_check_barrier(global_tid, ct_barrier, loc);
1664  }
1665 
1666 #if OMPT_SUPPORT
1667  ompt_frame_t *ompt_frame;
1668  if (ompt_enabled.enabled) {
1669  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1670  if (ompt_frame->enter_frame.ptr == NULL)
1671  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1672  OMPT_STORE_RETURN_ADDRESS(global_tid);
1673  }
1674 #endif
1675 #if USE_ITT_NOTIFY
1676  __kmp_threads[global_tid]->th.th_ident = loc;
1677 #endif
1678  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1679 #if OMPT_SUPPORT && OMPT_OPTIONAL
1680  if (ompt_enabled.enabled) {
1681  ompt_frame->enter_frame = ompt_data_none;
1682  }
1683 #endif
1684 
1685  ret = __kmpc_master(loc, global_tid);
1686 
1687  if (__kmp_env_consistency_check) {
1688  /* there's no __kmpc_end_master called; so the (stats) */
1689  /* actions of __kmpc_end_master are done here */
1690  if (ret) {
1691  /* only one thread should do the pop since only */
1692  /* one did the push (see __kmpc_master()) */
1693  __kmp_pop_sync(global_tid, ct_master, loc);
1694  }
1695  }
1696 
1697  return (ret);
1698 }
1699 
1700 /* The BARRIER for a SINGLE process section is always explicit */
1712 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1713  __kmp_assert_valid_gtid(global_tid);
1714  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1715 
1716  if (rc) {
1717  // We are going to execute the single statement, so we should count it.
1718  KMP_COUNT_BLOCK(OMP_SINGLE);
1719  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1720  }
1721 
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL
1723  kmp_info_t *this_thr = __kmp_threads[global_tid];
1724  kmp_team_t *team = this_thr->th.th_team;
1725  int tid = __kmp_tid_from_gtid(global_tid);
1726 
1727  if (ompt_enabled.enabled) {
1728  if (rc) {
1729  if (ompt_enabled.ompt_callback_work) {
1730  ompt_callbacks.ompt_callback(ompt_callback_work)(
1731  ompt_work_single_executor, ompt_scope_begin,
1732  &(team->t.ompt_team_info.parallel_data),
1733  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1734  1, OMPT_GET_RETURN_ADDRESS(0));
1735  }
1736  } else {
1737  if (ompt_enabled.ompt_callback_work) {
1738  ompt_callbacks.ompt_callback(ompt_callback_work)(
1739  ompt_work_single_other, ompt_scope_begin,
1740  &(team->t.ompt_team_info.parallel_data),
1741  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1742  1, OMPT_GET_RETURN_ADDRESS(0));
1743  ompt_callbacks.ompt_callback(ompt_callback_work)(
1744  ompt_work_single_other, ompt_scope_end,
1745  &(team->t.ompt_team_info.parallel_data),
1746  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1747  1, OMPT_GET_RETURN_ADDRESS(0));
1748  }
1749  }
1750  }
1751 #endif
1752 
1753  return rc;
1754 }
1755 
1765 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1766  __kmp_assert_valid_gtid(global_tid);
1767  __kmp_exit_single(global_tid);
1768  KMP_POP_PARTITIONED_TIMER();
1769 
1770 #if OMPT_SUPPORT && OMPT_OPTIONAL
1771  kmp_info_t *this_thr = __kmp_threads[global_tid];
1772  kmp_team_t *team = this_thr->th.th_team;
1773  int tid = __kmp_tid_from_gtid(global_tid);
1774 
1775  if (ompt_enabled.ompt_callback_work) {
1776  ompt_callbacks.ompt_callback(ompt_callback_work)(
1777  ompt_work_single_executor, ompt_scope_end,
1778  &(team->t.ompt_team_info.parallel_data),
1779  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1780  OMPT_GET_RETURN_ADDRESS(0));
1781  }
1782 #endif
1783 }
1784 
1792 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1793  KMP_POP_PARTITIONED_TIMER();
1794  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1795 
1796 #if OMPT_SUPPORT && OMPT_OPTIONAL
1797  if (ompt_enabled.ompt_callback_work) {
1798  ompt_work_t ompt_work_type = ompt_work_loop;
1799  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1800  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1801  // Determine workshare type
1802  if (loc != NULL) {
1803  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1804  ompt_work_type = ompt_work_loop;
1805  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1806  ompt_work_type = ompt_work_sections;
1807  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1808  ompt_work_type = ompt_work_distribute;
1809  } else {
1810  // use default set above.
1811  // a warning about this case is provided in __kmpc_for_static_init
1812  }
1813  KMP_DEBUG_ASSERT(ompt_work_type);
1814  }
1815  ompt_callbacks.ompt_callback(ompt_callback_work)(
1816  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1817  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1818  }
1819 #endif
1820  if (__kmp_env_consistency_check)
1821  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1822 }
1823 
1824 // User routines which take C-style arguments (call by value)
1825 // different from the Fortran equivalent routines
1826 
1827 void ompc_set_num_threads(int arg) {
1828  // !!!!! TODO: check the per-task binding
1829  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1830 }
1831 
1832 void ompc_set_dynamic(int flag) {
1833  kmp_info_t *thread;
1834 
1835  /* For the thread-private implementation of the internal controls */
1836  thread = __kmp_entry_thread();
1837 
1838  __kmp_save_internal_controls(thread);
1839 
1840  set__dynamic(thread, flag ? TRUE : FALSE);
1841 }
1842 
1843 void ompc_set_nested(int flag) {
1844  kmp_info_t *thread;
1845 
1846  /* For the thread-private internal controls implementation */
1847  thread = __kmp_entry_thread();
1848 
1849  __kmp_save_internal_controls(thread);
1850 
1851  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1852 }
1853 
1854 void ompc_set_max_active_levels(int max_active_levels) {
1855  /* TO DO */
1856  /* we want per-task implementation of this internal control */
1857 
1858  /* For the per-thread internal controls implementation */
1859  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1860 }
1861 
1862 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1863  // !!!!! TODO: check the per-task binding
1864  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1865 }
1866 
1867 int ompc_get_ancestor_thread_num(int level) {
1868  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1869 }
1870 
1871 int ompc_get_team_size(int level) {
1872  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1873 }
1874 
1875 /* OpenMP 5.0 Affinity Format API */
1876 
1877 void ompc_set_affinity_format(char const *format) {
1878  if (!__kmp_init_serial) {
1879  __kmp_serial_initialize();
1880  }
1881  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1882  format, KMP_STRLEN(format) + 1);
1883 }
1884 
1885 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1886  size_t format_size;
1887  if (!__kmp_init_serial) {
1888  __kmp_serial_initialize();
1889  }
1890  format_size = KMP_STRLEN(__kmp_affinity_format);
1891  if (buffer && size) {
1892  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1893  format_size + 1);
1894  }
1895  return format_size;
1896 }
1897 
1898 void ompc_display_affinity(char const *format) {
1899  int gtid;
1900  if (!TCR_4(__kmp_init_middle)) {
1901  __kmp_middle_initialize();
1902  }
1903  gtid = __kmp_get_gtid();
1904  __kmp_aux_display_affinity(gtid, format);
1905 }
1906 
1907 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1908  char const *format) {
1909  int gtid;
1910  size_t num_required;
1911  kmp_str_buf_t capture_buf;
1912  if (!TCR_4(__kmp_init_middle)) {
1913  __kmp_middle_initialize();
1914  }
1915  gtid = __kmp_get_gtid();
1916  __kmp_str_buf_init(&capture_buf);
1917  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1918  if (buffer && buf_size) {
1919  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1920  capture_buf.used + 1);
1921  }
1922  __kmp_str_buf_free(&capture_buf);
1923  return num_required;
1924 }
1925 
1926 void kmpc_set_stacksize(int arg) {
1927  // __kmp_aux_set_stacksize initializes the library if needed
1928  __kmp_aux_set_stacksize(arg);
1929 }
1930 
1931 void kmpc_set_stacksize_s(size_t arg) {
1932  // __kmp_aux_set_stacksize initializes the library if needed
1933  __kmp_aux_set_stacksize(arg);
1934 }
1935 
1936 void kmpc_set_blocktime(int arg) {
1937  int gtid, tid;
1938  kmp_info_t *thread;
1939 
1940  gtid = __kmp_entry_gtid();
1941  tid = __kmp_tid_from_gtid(gtid);
1942  thread = __kmp_thread_from_gtid(gtid);
1943 
1944  __kmp_aux_set_blocktime(arg, thread, tid);
1945 }
1946 
1947 void kmpc_set_library(int arg) {
1948  // __kmp_user_set_library initializes the library if needed
1949  __kmp_user_set_library((enum library_type)arg);
1950 }
1951 
1952 void kmpc_set_defaults(char const *str) {
1953  // __kmp_aux_set_defaults initializes the library if needed
1954  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1955 }
1956 
1957 void kmpc_set_disp_num_buffers(int arg) {
1958  // ignore after initialization because some teams have already
1959  // allocated dispatch buffers
1960  if (__kmp_init_serial == 0 && arg > 0)
1961  __kmp_dispatch_num_buffers = arg;
1962 }
1963 
1964 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1965 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1966  return -1;
1967 #else
1968  if (!TCR_4(__kmp_init_middle)) {
1969  __kmp_middle_initialize();
1970  }
1971  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1972 #endif
1973 }
1974 
1975 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1976 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1977  return -1;
1978 #else
1979  if (!TCR_4(__kmp_init_middle)) {
1980  __kmp_middle_initialize();
1981  }
1982  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1983 #endif
1984 }
1985 
1986 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1988  return -1;
1989 #else
1990  if (!TCR_4(__kmp_init_middle)) {
1991  __kmp_middle_initialize();
1992  }
1993  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1994 #endif
1995 }
1996 
1997 /* -------------------------------------------------------------------------- */
2042 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2043  void *cpy_data, void (*cpy_func)(void *, void *),
2044  kmp_int32 didit) {
2045  void **data_ptr;
2046  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2047  __kmp_assert_valid_gtid(gtid);
2048 
2049  KMP_MB();
2050 
2051  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2052 
2053  if (__kmp_env_consistency_check) {
2054  if (loc == 0) {
2055  KMP_WARNING(ConstructIdentInvalid);
2056  }
2057  }
2058 
2059  // ToDo: Optimize the following two barriers into some kind of split barrier
2060 
2061  if (didit)
2062  *data_ptr = cpy_data;
2063 
2064 #if OMPT_SUPPORT
2065  ompt_frame_t *ompt_frame;
2066  if (ompt_enabled.enabled) {
2067  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2068  if (ompt_frame->enter_frame.ptr == NULL)
2069  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2070  OMPT_STORE_RETURN_ADDRESS(gtid);
2071  }
2072 #endif
2073 /* This barrier is not a barrier region boundary */
2074 #if USE_ITT_NOTIFY
2075  __kmp_threads[gtid]->th.th_ident = loc;
2076 #endif
2077  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2078 
2079  if (!didit)
2080  (*cpy_func)(cpy_data, *data_ptr);
2081 
2082 // Consider next barrier a user-visible barrier for barrier region boundaries
2083 // Nesting checks are already handled by the single construct checks
2084 
2085 #if OMPT_SUPPORT
2086  if (ompt_enabled.enabled) {
2087  OMPT_STORE_RETURN_ADDRESS(gtid);
2088  }
2089 #endif
2090 #if USE_ITT_NOTIFY
2091  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2092 // tasks can overwrite the location)
2093 #endif
2094  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2095 #if OMPT_SUPPORT && OMPT_OPTIONAL
2096  if (ompt_enabled.enabled) {
2097  ompt_frame->enter_frame = ompt_data_none;
2098  }
2099 #endif
2100 }
2101 
2102 /* -------------------------------------------------------------------------- */
2103 
2104 #define INIT_LOCK __kmp_init_user_lock_with_checks
2105 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2106 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2107 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2108 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2109 #define ACQUIRE_NESTED_LOCK_TIMED \
2110  __kmp_acquire_nested_user_lock_with_checks_timed
2111 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2112 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2113 #define TEST_LOCK __kmp_test_user_lock_with_checks
2114 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2115 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2116 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2117 
2118 // TODO: Make check abort messages use location info & pass it into
2119 // with_checks routines
2120 
2121 #if KMP_USE_DYNAMIC_LOCK
2122 
2123 // internal lock initializer
2124 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2125  kmp_dyna_lockseq_t seq) {
2126  if (KMP_IS_D_LOCK(seq)) {
2127  KMP_INIT_D_LOCK(lock, seq);
2128 #if USE_ITT_BUILD
2129  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2130 #endif
2131  } else {
2132  KMP_INIT_I_LOCK(lock, seq);
2133 #if USE_ITT_BUILD
2134  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2135  __kmp_itt_lock_creating(ilk->lock, loc);
2136 #endif
2137  }
2138 }
2139 
2140 // internal nest lock initializer
2141 static __forceinline void
2142 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2143  kmp_dyna_lockseq_t seq) {
2144 #if KMP_USE_TSX
2145  // Don't have nested lock implementation for speculative locks
2146  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2147  seq = __kmp_user_lock_seq;
2148 #endif
2149  switch (seq) {
2150  case lockseq_tas:
2151  seq = lockseq_nested_tas;
2152  break;
2153 #if KMP_USE_FUTEX
2154  case lockseq_futex:
2155  seq = lockseq_nested_futex;
2156  break;
2157 #endif
2158  case lockseq_ticket:
2159  seq = lockseq_nested_ticket;
2160  break;
2161  case lockseq_queuing:
2162  seq = lockseq_nested_queuing;
2163  break;
2164  case lockseq_drdpa:
2165  seq = lockseq_nested_drdpa;
2166  break;
2167  default:
2168  seq = lockseq_nested_queuing;
2169  }
2170  KMP_INIT_I_LOCK(lock, seq);
2171 #if USE_ITT_BUILD
2172  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2173  __kmp_itt_lock_creating(ilk->lock, loc);
2174 #endif
2175 }
2176 
2177 /* initialize the lock with a hint */
2178 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2179  uintptr_t hint) {
2180  KMP_DEBUG_ASSERT(__kmp_init_serial);
2181  if (__kmp_env_consistency_check && user_lock == NULL) {
2182  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2183  }
2184 
2185  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2186 
2187 #if OMPT_SUPPORT && OMPT_OPTIONAL
2188  // This is the case, if called from omp_init_lock_with_hint:
2189  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2190  if (!codeptr)
2191  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2192  if (ompt_enabled.ompt_callback_lock_init) {
2193  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2194  ompt_mutex_lock, (omp_lock_hint_t)hint,
2195  __ompt_get_mutex_impl_type(user_lock),
2196  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2197  }
2198 #endif
2199 }
2200 
2201 /* initialize the lock with a hint */
2202 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2203  void **user_lock, uintptr_t hint) {
2204  KMP_DEBUG_ASSERT(__kmp_init_serial);
2205  if (__kmp_env_consistency_check && user_lock == NULL) {
2206  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2207  }
2208 
2209  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2210 
2211 #if OMPT_SUPPORT && OMPT_OPTIONAL
2212  // This is the case, if called from omp_init_lock_with_hint:
2213  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2214  if (!codeptr)
2215  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2216  if (ompt_enabled.ompt_callback_lock_init) {
2217  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2218  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2219  __ompt_get_mutex_impl_type(user_lock),
2220  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2221  }
2222 #endif
2223 }
2224 
2225 #endif // KMP_USE_DYNAMIC_LOCK
2226 
2227 /* initialize the lock */
2228 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2229 #if KMP_USE_DYNAMIC_LOCK
2230 
2231  KMP_DEBUG_ASSERT(__kmp_init_serial);
2232  if (__kmp_env_consistency_check && user_lock == NULL) {
2233  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2234  }
2235  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2236 
2237 #if OMPT_SUPPORT && OMPT_OPTIONAL
2238  // This is the case, if called from omp_init_lock_with_hint:
2239  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2240  if (!codeptr)
2241  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2242  if (ompt_enabled.ompt_callback_lock_init) {
2243  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2244  ompt_mutex_lock, omp_lock_hint_none,
2245  __ompt_get_mutex_impl_type(user_lock),
2246  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2247  }
2248 #endif
2249 
2250 #else // KMP_USE_DYNAMIC_LOCK
2251 
2252  static char const *const func = "omp_init_lock";
2253  kmp_user_lock_p lck;
2254  KMP_DEBUG_ASSERT(__kmp_init_serial);
2255 
2256  if (__kmp_env_consistency_check) {
2257  if (user_lock == NULL) {
2258  KMP_FATAL(LockIsUninitialized, func);
2259  }
2260  }
2261 
2262  KMP_CHECK_USER_LOCK_INIT();
2263 
2264  if ((__kmp_user_lock_kind == lk_tas) &&
2265  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2266  lck = (kmp_user_lock_p)user_lock;
2267  }
2268 #if KMP_USE_FUTEX
2269  else if ((__kmp_user_lock_kind == lk_futex) &&
2270  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2271  lck = (kmp_user_lock_p)user_lock;
2272  }
2273 #endif
2274  else {
2275  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2276  }
2277  INIT_LOCK(lck);
2278  __kmp_set_user_lock_location(lck, loc);
2279 
2280 #if OMPT_SUPPORT && OMPT_OPTIONAL
2281  // This is the case, if called from omp_init_lock_with_hint:
2282  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2283  if (!codeptr)
2284  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2285  if (ompt_enabled.ompt_callback_lock_init) {
2286  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2287  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2288  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2289  }
2290 #endif
2291 
2292 #if USE_ITT_BUILD
2293  __kmp_itt_lock_creating(lck);
2294 #endif /* USE_ITT_BUILD */
2295 
2296 #endif // KMP_USE_DYNAMIC_LOCK
2297 } // __kmpc_init_lock
2298 
2299 /* initialize the lock */
2300 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2301 #if KMP_USE_DYNAMIC_LOCK
2302 
2303  KMP_DEBUG_ASSERT(__kmp_init_serial);
2304  if (__kmp_env_consistency_check && user_lock == NULL) {
2305  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2306  }
2307  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2308 
2309 #if OMPT_SUPPORT && OMPT_OPTIONAL
2310  // This is the case, if called from omp_init_lock_with_hint:
2311  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2312  if (!codeptr)
2313  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2314  if (ompt_enabled.ompt_callback_lock_init) {
2315  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2316  ompt_mutex_nest_lock, omp_lock_hint_none,
2317  __ompt_get_mutex_impl_type(user_lock),
2318  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2319  }
2320 #endif
2321 
2322 #else // KMP_USE_DYNAMIC_LOCK
2323 
2324  static char const *const func = "omp_init_nest_lock";
2325  kmp_user_lock_p lck;
2326  KMP_DEBUG_ASSERT(__kmp_init_serial);
2327 
2328  if (__kmp_env_consistency_check) {
2329  if (user_lock == NULL) {
2330  KMP_FATAL(LockIsUninitialized, func);
2331  }
2332  }
2333 
2334  KMP_CHECK_USER_LOCK_INIT();
2335 
2336  if ((__kmp_user_lock_kind == lk_tas) &&
2337  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2338  OMP_NEST_LOCK_T_SIZE)) {
2339  lck = (kmp_user_lock_p)user_lock;
2340  }
2341 #if KMP_USE_FUTEX
2342  else if ((__kmp_user_lock_kind == lk_futex) &&
2343  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2344  OMP_NEST_LOCK_T_SIZE)) {
2345  lck = (kmp_user_lock_p)user_lock;
2346  }
2347 #endif
2348  else {
2349  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2350  }
2351 
2352  INIT_NESTED_LOCK(lck);
2353  __kmp_set_user_lock_location(lck, loc);
2354 
2355 #if OMPT_SUPPORT && OMPT_OPTIONAL
2356  // This is the case, if called from omp_init_lock_with_hint:
2357  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2358  if (!codeptr)
2359  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2360  if (ompt_enabled.ompt_callback_lock_init) {
2361  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2362  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2363  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2364  }
2365 #endif
2366 
2367 #if USE_ITT_BUILD
2368  __kmp_itt_lock_creating(lck);
2369 #endif /* USE_ITT_BUILD */
2370 
2371 #endif // KMP_USE_DYNAMIC_LOCK
2372 } // __kmpc_init_nest_lock
2373 
2374 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2375 #if KMP_USE_DYNAMIC_LOCK
2376 
2377 #if USE_ITT_BUILD
2378  kmp_user_lock_p lck;
2379  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2380  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2381  } else {
2382  lck = (kmp_user_lock_p)user_lock;
2383  }
2384  __kmp_itt_lock_destroyed(lck);
2385 #endif
2386 #if OMPT_SUPPORT && OMPT_OPTIONAL
2387  // This is the case, if called from omp_init_lock_with_hint:
2388  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2389  if (!codeptr)
2390  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2391  if (ompt_enabled.ompt_callback_lock_destroy) {
2392  kmp_user_lock_p lck;
2393  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2394  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2395  } else {
2396  lck = (kmp_user_lock_p)user_lock;
2397  }
2398  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2399  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2400  }
2401 #endif
2402  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2403 #else
2404  kmp_user_lock_p lck;
2405 
2406  if ((__kmp_user_lock_kind == lk_tas) &&
2407  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2408  lck = (kmp_user_lock_p)user_lock;
2409  }
2410 #if KMP_USE_FUTEX
2411  else if ((__kmp_user_lock_kind == lk_futex) &&
2412  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2413  lck = (kmp_user_lock_p)user_lock;
2414  }
2415 #endif
2416  else {
2417  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2418  }
2419 
2420 #if OMPT_SUPPORT && OMPT_OPTIONAL
2421  // This is the case, if called from omp_init_lock_with_hint:
2422  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2423  if (!codeptr)
2424  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2425  if (ompt_enabled.ompt_callback_lock_destroy) {
2426  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2427  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2428  }
2429 #endif
2430 
2431 #if USE_ITT_BUILD
2432  __kmp_itt_lock_destroyed(lck);
2433 #endif /* USE_ITT_BUILD */
2434  DESTROY_LOCK(lck);
2435 
2436  if ((__kmp_user_lock_kind == lk_tas) &&
2437  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2438  ;
2439  }
2440 #if KMP_USE_FUTEX
2441  else if ((__kmp_user_lock_kind == lk_futex) &&
2442  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2443  ;
2444  }
2445 #endif
2446  else {
2447  __kmp_user_lock_free(user_lock, gtid, lck);
2448  }
2449 #endif // KMP_USE_DYNAMIC_LOCK
2450 } // __kmpc_destroy_lock
2451 
2452 /* destroy the lock */
2453 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2454 #if KMP_USE_DYNAMIC_LOCK
2455 
2456 #if USE_ITT_BUILD
2457  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2458  __kmp_itt_lock_destroyed(ilk->lock);
2459 #endif
2460 #if OMPT_SUPPORT && OMPT_OPTIONAL
2461  // This is the case, if called from omp_init_lock_with_hint:
2462  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2463  if (!codeptr)
2464  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2465  if (ompt_enabled.ompt_callback_lock_destroy) {
2466  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2467  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2468  }
2469 #endif
2470  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2471 
2472 #else // KMP_USE_DYNAMIC_LOCK
2473 
2474  kmp_user_lock_p lck;
2475 
2476  if ((__kmp_user_lock_kind == lk_tas) &&
2477  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2478  OMP_NEST_LOCK_T_SIZE)) {
2479  lck = (kmp_user_lock_p)user_lock;
2480  }
2481 #if KMP_USE_FUTEX
2482  else if ((__kmp_user_lock_kind == lk_futex) &&
2483  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2484  OMP_NEST_LOCK_T_SIZE)) {
2485  lck = (kmp_user_lock_p)user_lock;
2486  }
2487 #endif
2488  else {
2489  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2490  }
2491 
2492 #if OMPT_SUPPORT && OMPT_OPTIONAL
2493  // This is the case, if called from omp_init_lock_with_hint:
2494  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2495  if (!codeptr)
2496  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2497  if (ompt_enabled.ompt_callback_lock_destroy) {
2498  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2499  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2500  }
2501 #endif
2502 
2503 #if USE_ITT_BUILD
2504  __kmp_itt_lock_destroyed(lck);
2505 #endif /* USE_ITT_BUILD */
2506 
2507  DESTROY_NESTED_LOCK(lck);
2508 
2509  if ((__kmp_user_lock_kind == lk_tas) &&
2510  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2511  OMP_NEST_LOCK_T_SIZE)) {
2512  ;
2513  }
2514 #if KMP_USE_FUTEX
2515  else if ((__kmp_user_lock_kind == lk_futex) &&
2516  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2517  OMP_NEST_LOCK_T_SIZE)) {
2518  ;
2519  }
2520 #endif
2521  else {
2522  __kmp_user_lock_free(user_lock, gtid, lck);
2523  }
2524 #endif // KMP_USE_DYNAMIC_LOCK
2525 } // __kmpc_destroy_nest_lock
2526 
2527 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2528  KMP_COUNT_BLOCK(OMP_set_lock);
2529 #if KMP_USE_DYNAMIC_LOCK
2530  int tag = KMP_EXTRACT_D_TAG(user_lock);
2531 #if USE_ITT_BUILD
2532  __kmp_itt_lock_acquiring(
2533  (kmp_user_lock_p)
2534  user_lock); // itt function will get to the right lock object.
2535 #endif
2536 #if OMPT_SUPPORT && OMPT_OPTIONAL
2537  // This is the case, if called from omp_init_lock_with_hint:
2538  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2539  if (!codeptr)
2540  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2541  if (ompt_enabled.ompt_callback_mutex_acquire) {
2542  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2543  ompt_mutex_lock, omp_lock_hint_none,
2544  __ompt_get_mutex_impl_type(user_lock),
2545  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2546  }
2547 #endif
2548 #if KMP_USE_INLINED_TAS
2549  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2550  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2551  } else
2552 #elif KMP_USE_INLINED_FUTEX
2553  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2554  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2555  } else
2556 #endif
2557  {
2558  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2559  }
2560 #if USE_ITT_BUILD
2561  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2562 #endif
2563 #if OMPT_SUPPORT && OMPT_OPTIONAL
2564  if (ompt_enabled.ompt_callback_mutex_acquired) {
2565  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2566  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2567  }
2568 #endif
2569 
2570 #else // KMP_USE_DYNAMIC_LOCK
2571 
2572  kmp_user_lock_p lck;
2573 
2574  if ((__kmp_user_lock_kind == lk_tas) &&
2575  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2576  lck = (kmp_user_lock_p)user_lock;
2577  }
2578 #if KMP_USE_FUTEX
2579  else if ((__kmp_user_lock_kind == lk_futex) &&
2580  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2581  lck = (kmp_user_lock_p)user_lock;
2582  }
2583 #endif
2584  else {
2585  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2586  }
2587 
2588 #if USE_ITT_BUILD
2589  __kmp_itt_lock_acquiring(lck);
2590 #endif /* USE_ITT_BUILD */
2591 #if OMPT_SUPPORT && OMPT_OPTIONAL
2592  // This is the case, if called from omp_init_lock_with_hint:
2593  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2594  if (!codeptr)
2595  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2596  if (ompt_enabled.ompt_callback_mutex_acquire) {
2597  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2598  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2599  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2600  }
2601 #endif
2602 
2603  ACQUIRE_LOCK(lck, gtid);
2604 
2605 #if USE_ITT_BUILD
2606  __kmp_itt_lock_acquired(lck);
2607 #endif /* USE_ITT_BUILD */
2608 
2609 #if OMPT_SUPPORT && OMPT_OPTIONAL
2610  if (ompt_enabled.ompt_callback_mutex_acquired) {
2611  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2612  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2613  }
2614 #endif
2615 
2616 #endif // KMP_USE_DYNAMIC_LOCK
2617 }
2618 
2619 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2620 #if KMP_USE_DYNAMIC_LOCK
2621 
2622 #if USE_ITT_BUILD
2623  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2624 #endif
2625 #if OMPT_SUPPORT && OMPT_OPTIONAL
2626  // This is the case, if called from omp_init_lock_with_hint:
2627  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2628  if (!codeptr)
2629  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2630  if (ompt_enabled.enabled) {
2631  if (ompt_enabled.ompt_callback_mutex_acquire) {
2632  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2633  ompt_mutex_nest_lock, omp_lock_hint_none,
2634  __ompt_get_mutex_impl_type(user_lock),
2635  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2636  }
2637  }
2638 #endif
2639  int acquire_status =
2640  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2641  (void) acquire_status;
2642 #if USE_ITT_BUILD
2643  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2644 #endif
2645 
2646 #if OMPT_SUPPORT && OMPT_OPTIONAL
2647  if (ompt_enabled.enabled) {
2648  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2649  if (ompt_enabled.ompt_callback_mutex_acquired) {
2650  // lock_first
2651  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2652  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2653  codeptr);
2654  }
2655  } else {
2656  if (ompt_enabled.ompt_callback_nest_lock) {
2657  // lock_next
2658  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2659  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2660  }
2661  }
2662  }
2663 #endif
2664 
2665 #else // KMP_USE_DYNAMIC_LOCK
2666  int acquire_status;
2667  kmp_user_lock_p lck;
2668 
2669  if ((__kmp_user_lock_kind == lk_tas) &&
2670  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2671  OMP_NEST_LOCK_T_SIZE)) {
2672  lck = (kmp_user_lock_p)user_lock;
2673  }
2674 #if KMP_USE_FUTEX
2675  else if ((__kmp_user_lock_kind == lk_futex) &&
2676  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2677  OMP_NEST_LOCK_T_SIZE)) {
2678  lck = (kmp_user_lock_p)user_lock;
2679  }
2680 #endif
2681  else {
2682  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2683  }
2684 
2685 #if USE_ITT_BUILD
2686  __kmp_itt_lock_acquiring(lck);
2687 #endif /* USE_ITT_BUILD */
2688 #if OMPT_SUPPORT && OMPT_OPTIONAL
2689  // This is the case, if called from omp_init_lock_with_hint:
2690  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2691  if (!codeptr)
2692  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2693  if (ompt_enabled.enabled) {
2694  if (ompt_enabled.ompt_callback_mutex_acquire) {
2695  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2696  ompt_mutex_nest_lock, omp_lock_hint_none,
2697  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2698  codeptr);
2699  }
2700  }
2701 #endif
2702 
2703  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2704 
2705 #if USE_ITT_BUILD
2706  __kmp_itt_lock_acquired(lck);
2707 #endif /* USE_ITT_BUILD */
2708 
2709 #if OMPT_SUPPORT && OMPT_OPTIONAL
2710  if (ompt_enabled.enabled) {
2711  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2712  if (ompt_enabled.ompt_callback_mutex_acquired) {
2713  // lock_first
2714  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2715  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2716  }
2717  } else {
2718  if (ompt_enabled.ompt_callback_nest_lock) {
2719  // lock_next
2720  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2721  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2722  }
2723  }
2724  }
2725 #endif
2726 
2727 #endif // KMP_USE_DYNAMIC_LOCK
2728 }
2729 
2730 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2731 #if KMP_USE_DYNAMIC_LOCK
2732 
2733  int tag = KMP_EXTRACT_D_TAG(user_lock);
2734 #if USE_ITT_BUILD
2735  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2736 #endif
2737 #if KMP_USE_INLINED_TAS
2738  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2739  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2740  } else
2741 #elif KMP_USE_INLINED_FUTEX
2742  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2743  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2744  } else
2745 #endif
2746  {
2747  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2748  }
2749 
2750 #if OMPT_SUPPORT && OMPT_OPTIONAL
2751  // This is the case, if called from omp_init_lock_with_hint:
2752  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2753  if (!codeptr)
2754  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2755  if (ompt_enabled.ompt_callback_mutex_released) {
2756  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2757  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2758  }
2759 #endif
2760 
2761 #else // KMP_USE_DYNAMIC_LOCK
2762 
2763  kmp_user_lock_p lck;
2764 
2765  /* Can't use serial interval since not block structured */
2766  /* release the lock */
2767 
2768  if ((__kmp_user_lock_kind == lk_tas) &&
2769  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2770 #if KMP_OS_LINUX && \
2771  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2772 // "fast" path implemented to fix customer performance issue
2773 #if USE_ITT_BUILD
2774  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2775 #endif /* USE_ITT_BUILD */
2776  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2777  KMP_MB();
2778 
2779 #if OMPT_SUPPORT && OMPT_OPTIONAL
2780  // This is the case, if called from omp_init_lock_with_hint:
2781  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2782  if (!codeptr)
2783  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2784  if (ompt_enabled.ompt_callback_mutex_released) {
2785  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2786  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2787  }
2788 #endif
2789 
2790  return;
2791 #else
2792  lck = (kmp_user_lock_p)user_lock;
2793 #endif
2794  }
2795 #if KMP_USE_FUTEX
2796  else if ((__kmp_user_lock_kind == lk_futex) &&
2797  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2798  lck = (kmp_user_lock_p)user_lock;
2799  }
2800 #endif
2801  else {
2802  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2803  }
2804 
2805 #if USE_ITT_BUILD
2806  __kmp_itt_lock_releasing(lck);
2807 #endif /* USE_ITT_BUILD */
2808 
2809  RELEASE_LOCK(lck, gtid);
2810 
2811 #if OMPT_SUPPORT && OMPT_OPTIONAL
2812  // This is the case, if called from omp_init_lock_with_hint:
2813  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2814  if (!codeptr)
2815  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2816  if (ompt_enabled.ompt_callback_mutex_released) {
2817  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2818  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2819  }
2820 #endif
2821 
2822 #endif // KMP_USE_DYNAMIC_LOCK
2823 }
2824 
2825 /* release the lock */
2826 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2827 #if KMP_USE_DYNAMIC_LOCK
2828 
2829 #if USE_ITT_BUILD
2830  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2831 #endif
2832  int release_status =
2833  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2834  (void) release_status;
2835 
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837  // This is the case, if called from omp_init_lock_with_hint:
2838  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2839  if (!codeptr)
2840  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841  if (ompt_enabled.enabled) {
2842  if (release_status == KMP_LOCK_RELEASED) {
2843  if (ompt_enabled.ompt_callback_mutex_released) {
2844  // release_lock_last
2845  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2846  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2847  codeptr);
2848  }
2849  } else if (ompt_enabled.ompt_callback_nest_lock) {
2850  // release_lock_prev
2851  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2852  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2853  }
2854  }
2855 #endif
2856 
2857 #else // KMP_USE_DYNAMIC_LOCK
2858 
2859  kmp_user_lock_p lck;
2860 
2861  /* Can't use serial interval since not block structured */
2862 
2863  if ((__kmp_user_lock_kind == lk_tas) &&
2864  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2865  OMP_NEST_LOCK_T_SIZE)) {
2866 #if KMP_OS_LINUX && \
2867  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2868  // "fast" path implemented to fix customer performance issue
2869  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2870 #if USE_ITT_BUILD
2871  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2872 #endif /* USE_ITT_BUILD */
2873 
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL
2875  int release_status = KMP_LOCK_STILL_HELD;
2876 #endif
2877 
2878  if (--(tl->lk.depth_locked) == 0) {
2879  TCW_4(tl->lk.poll, 0);
2880 #if OMPT_SUPPORT && OMPT_OPTIONAL
2881  release_status = KMP_LOCK_RELEASED;
2882 #endif
2883  }
2884  KMP_MB();
2885 
2886 #if OMPT_SUPPORT && OMPT_OPTIONAL
2887  // This is the case, if called from omp_init_lock_with_hint:
2888  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2889  if (!codeptr)
2890  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2891  if (ompt_enabled.enabled) {
2892  if (release_status == KMP_LOCK_RELEASED) {
2893  if (ompt_enabled.ompt_callback_mutex_released) {
2894  // release_lock_last
2895  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2896  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2897  }
2898  } else if (ompt_enabled.ompt_callback_nest_lock) {
2899  // release_lock_previous
2900  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2901  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2902  }
2903  }
2904 #endif
2905 
2906  return;
2907 #else
2908  lck = (kmp_user_lock_p)user_lock;
2909 #endif
2910  }
2911 #if KMP_USE_FUTEX
2912  else if ((__kmp_user_lock_kind == lk_futex) &&
2913  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2914  OMP_NEST_LOCK_T_SIZE)) {
2915  lck = (kmp_user_lock_p)user_lock;
2916  }
2917 #endif
2918  else {
2919  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2920  }
2921 
2922 #if USE_ITT_BUILD
2923  __kmp_itt_lock_releasing(lck);
2924 #endif /* USE_ITT_BUILD */
2925 
2926  int release_status;
2927  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2928 #if OMPT_SUPPORT && OMPT_OPTIONAL
2929  // This is the case, if called from omp_init_lock_with_hint:
2930  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2931  if (!codeptr)
2932  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2933  if (ompt_enabled.enabled) {
2934  if (release_status == KMP_LOCK_RELEASED) {
2935  if (ompt_enabled.ompt_callback_mutex_released) {
2936  // release_lock_last
2937  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2938  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2939  }
2940  } else if (ompt_enabled.ompt_callback_nest_lock) {
2941  // release_lock_previous
2942  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2943  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2944  }
2945  }
2946 #endif
2947 
2948 #endif // KMP_USE_DYNAMIC_LOCK
2949 }
2950 
2951 /* try to acquire the lock */
2952 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2953  KMP_COUNT_BLOCK(OMP_test_lock);
2954 
2955 #if KMP_USE_DYNAMIC_LOCK
2956  int rc;
2957  int tag = KMP_EXTRACT_D_TAG(user_lock);
2958 #if USE_ITT_BUILD
2959  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2960 #endif
2961 #if OMPT_SUPPORT && OMPT_OPTIONAL
2962  // This is the case, if called from omp_init_lock_with_hint:
2963  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2964  if (!codeptr)
2965  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2966  if (ompt_enabled.ompt_callback_mutex_acquire) {
2967  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2968  ompt_mutex_lock, omp_lock_hint_none,
2969  __ompt_get_mutex_impl_type(user_lock),
2970  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2971  }
2972 #endif
2973 #if KMP_USE_INLINED_TAS
2974  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2975  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2976  } else
2977 #elif KMP_USE_INLINED_FUTEX
2978  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2979  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2980  } else
2981 #endif
2982  {
2983  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2984  }
2985  if (rc) {
2986 #if USE_ITT_BUILD
2987  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2988 #endif
2989 #if OMPT_SUPPORT && OMPT_OPTIONAL
2990  if (ompt_enabled.ompt_callback_mutex_acquired) {
2991  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2992  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2993  }
2994 #endif
2995  return FTN_TRUE;
2996  } else {
2997 #if USE_ITT_BUILD
2998  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2999 #endif
3000  return FTN_FALSE;
3001  }
3002 
3003 #else // KMP_USE_DYNAMIC_LOCK
3004 
3005  kmp_user_lock_p lck;
3006  int rc;
3007 
3008  if ((__kmp_user_lock_kind == lk_tas) &&
3009  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3010  lck = (kmp_user_lock_p)user_lock;
3011  }
3012 #if KMP_USE_FUTEX
3013  else if ((__kmp_user_lock_kind == lk_futex) &&
3014  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3015  lck = (kmp_user_lock_p)user_lock;
3016  }
3017 #endif
3018  else {
3019  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3020  }
3021 
3022 #if USE_ITT_BUILD
3023  __kmp_itt_lock_acquiring(lck);
3024 #endif /* USE_ITT_BUILD */
3025 #if OMPT_SUPPORT && OMPT_OPTIONAL
3026  // This is the case, if called from omp_init_lock_with_hint:
3027  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3028  if (!codeptr)
3029  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3030  if (ompt_enabled.ompt_callback_mutex_acquire) {
3031  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3032  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3033  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3034  }
3035 #endif
3036 
3037  rc = TEST_LOCK(lck, gtid);
3038 #if USE_ITT_BUILD
3039  if (rc) {
3040  __kmp_itt_lock_acquired(lck);
3041  } else {
3042  __kmp_itt_lock_cancelled(lck);
3043  }
3044 #endif /* USE_ITT_BUILD */
3045 #if OMPT_SUPPORT && OMPT_OPTIONAL
3046  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3047  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3048  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3049  }
3050 #endif
3051 
3052  return (rc ? FTN_TRUE : FTN_FALSE);
3053 
3054 /* Can't use serial interval since not block structured */
3055 
3056 #endif // KMP_USE_DYNAMIC_LOCK
3057 }
3058 
3059 /* try to acquire the lock */
3060 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3061 #if KMP_USE_DYNAMIC_LOCK
3062  int rc;
3063 #if USE_ITT_BUILD
3064  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3065 #endif
3066 #if OMPT_SUPPORT && OMPT_OPTIONAL
3067  // This is the case, if called from omp_init_lock_with_hint:
3068  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3069  if (!codeptr)
3070  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3071  if (ompt_enabled.ompt_callback_mutex_acquire) {
3072  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3073  ompt_mutex_nest_lock, omp_lock_hint_none,
3074  __ompt_get_mutex_impl_type(user_lock),
3075  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3076  }
3077 #endif
3078  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3079 #if USE_ITT_BUILD
3080  if (rc) {
3081  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3082  } else {
3083  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3084  }
3085 #endif
3086 #if OMPT_SUPPORT && OMPT_OPTIONAL
3087  if (ompt_enabled.enabled && rc) {
3088  if (rc == 1) {
3089  if (ompt_enabled.ompt_callback_mutex_acquired) {
3090  // lock_first
3091  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3092  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3093  codeptr);
3094  }
3095  } else {
3096  if (ompt_enabled.ompt_callback_nest_lock) {
3097  // lock_next
3098  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3099  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3100  }
3101  }
3102  }
3103 #endif
3104  return rc;
3105 
3106 #else // KMP_USE_DYNAMIC_LOCK
3107 
3108  kmp_user_lock_p lck;
3109  int rc;
3110 
3111  if ((__kmp_user_lock_kind == lk_tas) &&
3112  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3113  OMP_NEST_LOCK_T_SIZE)) {
3114  lck = (kmp_user_lock_p)user_lock;
3115  }
3116 #if KMP_USE_FUTEX
3117  else if ((__kmp_user_lock_kind == lk_futex) &&
3118  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3119  OMP_NEST_LOCK_T_SIZE)) {
3120  lck = (kmp_user_lock_p)user_lock;
3121  }
3122 #endif
3123  else {
3124  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3125  }
3126 
3127 #if USE_ITT_BUILD
3128  __kmp_itt_lock_acquiring(lck);
3129 #endif /* USE_ITT_BUILD */
3130 
3131 #if OMPT_SUPPORT && OMPT_OPTIONAL
3132  // This is the case, if called from omp_init_lock_with_hint:
3133  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3134  if (!codeptr)
3135  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3136  if (ompt_enabled.enabled) &&
3137  ompt_enabled.ompt_callback_mutex_acquire) {
3138  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3139  ompt_mutex_nest_lock, omp_lock_hint_none,
3140  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3141  codeptr);
3142  }
3143 #endif
3144 
3145  rc = TEST_NESTED_LOCK(lck, gtid);
3146 #if USE_ITT_BUILD
3147  if (rc) {
3148  __kmp_itt_lock_acquired(lck);
3149  } else {
3150  __kmp_itt_lock_cancelled(lck);
3151  }
3152 #endif /* USE_ITT_BUILD */
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3154  if (ompt_enabled.enabled && rc) {
3155  if (rc == 1) {
3156  if (ompt_enabled.ompt_callback_mutex_acquired) {
3157  // lock_first
3158  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3159  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3160  }
3161  } else {
3162  if (ompt_enabled.ompt_callback_nest_lock) {
3163  // lock_next
3164  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3165  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3166  }
3167  }
3168  }
3169 #endif
3170  return rc;
3171 
3172 /* Can't use serial interval since not block structured */
3173 
3174 #endif // KMP_USE_DYNAMIC_LOCK
3175 }
3176 
3177 // Interface to fast scalable reduce methods routines
3178 
3179 // keep the selected method in a thread local structure for cross-function
3180 // usage: will be used in __kmpc_end_reduce* functions;
3181 // another solution: to re-determine the method one more time in
3182 // __kmpc_end_reduce* functions (new prototype required then)
3183 // AT: which solution is better?
3184 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3185  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3186 
3187 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3188  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3189 
3190 // description of the packed_reduction_method variable: look at the macros in
3191 // kmp.h
3192 
3193 // used in a critical section reduce block
3194 static __forceinline void
3195 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3196  kmp_critical_name *crit) {
3197 
3198  // this lock was visible to a customer and to the threading profile tool as a
3199  // serial overhead span (although it's used for an internal purpose only)
3200  // why was it visible in previous implementation?
3201  // should we keep it visible in new reduce block?
3202  kmp_user_lock_p lck;
3203 
3204 #if KMP_USE_DYNAMIC_LOCK
3205 
3206  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3207  // Check if it is initialized.
3208  if (*lk == 0) {
3209  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3210  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3211  KMP_GET_D_TAG(__kmp_user_lock_seq));
3212  } else {
3213  __kmp_init_indirect_csptr(crit, loc, global_tid,
3214  KMP_GET_I_TAG(__kmp_user_lock_seq));
3215  }
3216  }
3217  // Branch for accessing the actual lock object and set operation. This
3218  // branching is inevitable since this lock initialization does not follow the
3219  // normal dispatch path (lock table is not used).
3220  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3221  lck = (kmp_user_lock_p)lk;
3222  KMP_DEBUG_ASSERT(lck != NULL);
3223  if (__kmp_env_consistency_check) {
3224  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3225  }
3226  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3227  } else {
3228  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3229  lck = ilk->lock;
3230  KMP_DEBUG_ASSERT(lck != NULL);
3231  if (__kmp_env_consistency_check) {
3232  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3233  }
3234  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3235  }
3236 
3237 #else // KMP_USE_DYNAMIC_LOCK
3238 
3239  // We know that the fast reduction code is only emitted by Intel compilers
3240  // with 32 byte critical sections. If there isn't enough space, then we
3241  // have to use a pointer.
3242  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3243  lck = (kmp_user_lock_p)crit;
3244  } else {
3245  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3246  }
3247  KMP_DEBUG_ASSERT(lck != NULL);
3248 
3249  if (__kmp_env_consistency_check)
3250  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3251 
3252  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3253 
3254 #endif // KMP_USE_DYNAMIC_LOCK
3255 }
3256 
3257 // used in a critical section reduce block
3258 static __forceinline void
3259 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3260  kmp_critical_name *crit) {
3261 
3262  kmp_user_lock_p lck;
3263 
3264 #if KMP_USE_DYNAMIC_LOCK
3265 
3266  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3267  lck = (kmp_user_lock_p)crit;
3268  if (__kmp_env_consistency_check)
3269  __kmp_pop_sync(global_tid, ct_critical, loc);
3270  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3271  } else {
3272  kmp_indirect_lock_t *ilk =
3273  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3274  if (__kmp_env_consistency_check)
3275  __kmp_pop_sync(global_tid, ct_critical, loc);
3276  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3277  }
3278 
3279 #else // KMP_USE_DYNAMIC_LOCK
3280 
3281  // We know that the fast reduction code is only emitted by Intel compilers
3282  // with 32 byte critical sections. If there isn't enough space, then we have
3283  // to use a pointer.
3284  if (__kmp_base_user_lock_size > 32) {
3285  lck = *((kmp_user_lock_p *)crit);
3286  KMP_ASSERT(lck != NULL);
3287  } else {
3288  lck = (kmp_user_lock_p)crit;
3289  }
3290 
3291  if (__kmp_env_consistency_check)
3292  __kmp_pop_sync(global_tid, ct_critical, loc);
3293 
3294  __kmp_release_user_lock_with_checks(lck, global_tid);
3295 
3296 #endif // KMP_USE_DYNAMIC_LOCK
3297 } // __kmp_end_critical_section_reduce_block
3298 
3299 static __forceinline int
3300 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3301  int *task_state) {
3302  kmp_team_t *team;
3303 
3304  // Check if we are inside the teams construct?
3305  if (th->th.th_teams_microtask) {
3306  *team_p = team = th->th.th_team;
3307  if (team->t.t_level == th->th.th_teams_level) {
3308  // This is reduction at teams construct.
3309  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3310  // Let's swap teams temporarily for the reduction.
3311  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3312  th->th.th_team = team->t.t_parent;
3313  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3314  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3315  *task_state = th->th.th_task_state;
3316  th->th.th_task_state = 0;
3317 
3318  return 1;
3319  }
3320  }
3321  return 0;
3322 }
3323 
3324 static __forceinline void
3325 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3326  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3327  th->th.th_info.ds.ds_tid = 0;
3328  th->th.th_team = team;
3329  th->th.th_team_nproc = team->t.t_nproc;
3330  th->th.th_task_team = team->t.t_task_team[task_state];
3331  th->th.th_task_state = task_state;
3332 }
3333 
3334 /* 2.a.i. Reduce Block without a terminating barrier */
3350 kmp_int32
3351 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3352  size_t reduce_size, void *reduce_data,
3353  void (*reduce_func)(void *lhs_data, void *rhs_data),
3354  kmp_critical_name *lck) {
3355 
3356  KMP_COUNT_BLOCK(REDUCE_nowait);
3357  int retval = 0;
3358  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3359  kmp_info_t *th;
3360  kmp_team_t *team;
3361  int teams_swapped = 0, task_state;
3362  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3363  __kmp_assert_valid_gtid(global_tid);
3364 
3365  // why do we need this initialization here at all?
3366  // Reduction clause can not be used as a stand-alone directive.
3367 
3368  // do not call __kmp_serial_initialize(), it will be called by
3369  // __kmp_parallel_initialize() if needed
3370  // possible detection of false-positive race by the threadchecker ???
3371  if (!TCR_4(__kmp_init_parallel))
3372  __kmp_parallel_initialize();
3373 
3374  __kmp_resume_if_soft_paused();
3375 
3376 // check correctness of reduce block nesting
3377 #if KMP_USE_DYNAMIC_LOCK
3378  if (__kmp_env_consistency_check)
3379  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3380 #else
3381  if (__kmp_env_consistency_check)
3382  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3383 #endif
3384 
3385  th = __kmp_thread_from_gtid(global_tid);
3386  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3387 
3388  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3389  // the value should be kept in a variable
3390  // the variable should be either a construct-specific or thread-specific
3391  // property, not a team specific property
3392  // (a thread can reach the next reduce block on the next construct, reduce
3393  // method may differ on the next construct)
3394  // an ident_t "loc" parameter could be used as a construct-specific property
3395  // (what if loc == 0?)
3396  // (if both construct-specific and team-specific variables were shared,
3397  // then unness extra syncs should be needed)
3398  // a thread-specific variable is better regarding two issues above (next
3399  // construct and extra syncs)
3400  // a thread-specific "th_local.reduction_method" variable is used currently
3401  // each thread executes 'determine' and 'set' lines (no need to execute by one
3402  // thread, to avoid unness extra syncs)
3403 
3404  packed_reduction_method = __kmp_determine_reduction_method(
3405  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3406  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3407 
3408  OMPT_REDUCTION_DECL(th, global_tid);
3409  if (packed_reduction_method == critical_reduce_block) {
3410 
3411  OMPT_REDUCTION_BEGIN;
3412 
3413  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3414  retval = 1;
3415 
3416  } else if (packed_reduction_method == empty_reduce_block) {
3417 
3418  OMPT_REDUCTION_BEGIN;
3419 
3420  // usage: if team size == 1, no synchronization is required ( Intel
3421  // platforms only )
3422  retval = 1;
3423 
3424  } else if (packed_reduction_method == atomic_reduce_block) {
3425 
3426  retval = 2;
3427 
3428  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3429  // won't be called by the code gen)
3430  // (it's not quite good, because the checking block has been closed by
3431  // this 'pop',
3432  // but atomic operation has not been executed yet, will be executed
3433  // slightly later, literally on next instruction)
3434  if (__kmp_env_consistency_check)
3435  __kmp_pop_sync(global_tid, ct_reduce, loc);
3436 
3437  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3438  tree_reduce_block)) {
3439 
3440 // AT: performance issue: a real barrier here
3441 // AT: (if master goes slow, other threads are blocked here waiting for the
3442 // master to come and release them)
3443 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3444 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3445 // be confusing to a customer)
3446 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3447 // might go faster and be more in line with sense of NOWAIT
3448 // AT: TO DO: do epcc test and compare times
3449 
3450 // this barrier should be invisible to a customer and to the threading profile
3451 // tool (it's neither a terminating barrier nor customer's code, it's
3452 // used for an internal purpose)
3453 #if OMPT_SUPPORT
3454  // JP: can this barrier potentially leed to task scheduling?
3455  // JP: as long as there is a barrier in the implementation, OMPT should and
3456  // will provide the barrier events
3457  // so we set-up the necessary frame/return addresses.
3458  ompt_frame_t *ompt_frame;
3459  if (ompt_enabled.enabled) {
3460  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3461  if (ompt_frame->enter_frame.ptr == NULL)
3462  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3463  OMPT_STORE_RETURN_ADDRESS(global_tid);
3464  }
3465 #endif
3466 #if USE_ITT_NOTIFY
3467  __kmp_threads[global_tid]->th.th_ident = loc;
3468 #endif
3469  retval =
3470  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3471  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3472  retval = (retval != 0) ? (0) : (1);
3473 #if OMPT_SUPPORT && OMPT_OPTIONAL
3474  if (ompt_enabled.enabled) {
3475  ompt_frame->enter_frame = ompt_data_none;
3476  }
3477 #endif
3478 
3479  // all other workers except master should do this pop here
3480  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3481  if (__kmp_env_consistency_check) {
3482  if (retval == 0) {
3483  __kmp_pop_sync(global_tid, ct_reduce, loc);
3484  }
3485  }
3486 
3487  } else {
3488 
3489  // should never reach this block
3490  KMP_ASSERT(0); // "unexpected method"
3491  }
3492  if (teams_swapped) {
3493  __kmp_restore_swapped_teams(th, team, task_state);
3494  }
3495  KA_TRACE(
3496  10,
3497  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3498  global_tid, packed_reduction_method, retval));
3499 
3500  return retval;
3501 }
3502 
3511 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3512  kmp_critical_name *lck) {
3513 
3514  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3515 
3516  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3517  __kmp_assert_valid_gtid(global_tid);
3518 
3519  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3520 
3521  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3522 
3523  if (packed_reduction_method == critical_reduce_block) {
3524 
3525  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3526  OMPT_REDUCTION_END;
3527 
3528  } else if (packed_reduction_method == empty_reduce_block) {
3529 
3530  // usage: if team size == 1, no synchronization is required ( on Intel
3531  // platforms only )
3532 
3533  OMPT_REDUCTION_END;
3534 
3535  } else if (packed_reduction_method == atomic_reduce_block) {
3536 
3537  // neither master nor other workers should get here
3538  // (code gen does not generate this call in case 2: atomic reduce block)
3539  // actually it's better to remove this elseif at all;
3540  // after removal this value will checked by the 'else' and will assert
3541 
3542  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3543  tree_reduce_block)) {
3544 
3545  // only master gets here
3546  // OMPT: tree reduction is annotated in the barrier code
3547 
3548  } else {
3549 
3550  // should never reach this block
3551  KMP_ASSERT(0); // "unexpected method"
3552  }
3553 
3554  if (__kmp_env_consistency_check)
3555  __kmp_pop_sync(global_tid, ct_reduce, loc);
3556 
3557  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3558  global_tid, packed_reduction_method));
3559 
3560  return;
3561 }
3562 
3563 /* 2.a.ii. Reduce Block with a terminating barrier */
3564 
3580 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3581  size_t reduce_size, void *reduce_data,
3582  void (*reduce_func)(void *lhs_data, void *rhs_data),
3583  kmp_critical_name *lck) {
3584  KMP_COUNT_BLOCK(REDUCE_wait);
3585  int retval = 0;
3586  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3587  kmp_info_t *th;
3588  kmp_team_t *team;
3589  int teams_swapped = 0, task_state;
3590 
3591  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3592  __kmp_assert_valid_gtid(global_tid);
3593 
3594  // why do we need this initialization here at all?
3595  // Reduction clause can not be a stand-alone directive.
3596 
3597  // do not call __kmp_serial_initialize(), it will be called by
3598  // __kmp_parallel_initialize() if needed
3599  // possible detection of false-positive race by the threadchecker ???
3600  if (!TCR_4(__kmp_init_parallel))
3601  __kmp_parallel_initialize();
3602 
3603  __kmp_resume_if_soft_paused();
3604 
3605 // check correctness of reduce block nesting
3606 #if KMP_USE_DYNAMIC_LOCK
3607  if (__kmp_env_consistency_check)
3608  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3609 #else
3610  if (__kmp_env_consistency_check)
3611  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3612 #endif
3613 
3614  th = __kmp_thread_from_gtid(global_tid);
3615  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3616 
3617  packed_reduction_method = __kmp_determine_reduction_method(
3618  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3619  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3620 
3621  OMPT_REDUCTION_DECL(th, global_tid);
3622 
3623  if (packed_reduction_method == critical_reduce_block) {
3624 
3625  OMPT_REDUCTION_BEGIN;
3626  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3627  retval = 1;
3628 
3629  } else if (packed_reduction_method == empty_reduce_block) {
3630 
3631  OMPT_REDUCTION_BEGIN;
3632  // usage: if team size == 1, no synchronization is required ( Intel
3633  // platforms only )
3634  retval = 1;
3635 
3636  } else if (packed_reduction_method == atomic_reduce_block) {
3637 
3638  retval = 2;
3639 
3640  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3641  tree_reduce_block)) {
3642 
3643 // case tree_reduce_block:
3644 // this barrier should be visible to a customer and to the threading profile
3645 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3646 #if OMPT_SUPPORT
3647  ompt_frame_t *ompt_frame;
3648  if (ompt_enabled.enabled) {
3649  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3650  if (ompt_frame->enter_frame.ptr == NULL)
3651  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3652  OMPT_STORE_RETURN_ADDRESS(global_tid);
3653  }
3654 #endif
3655 #if USE_ITT_NOTIFY
3656  __kmp_threads[global_tid]->th.th_ident =
3657  loc; // needed for correct notification of frames
3658 #endif
3659  retval =
3660  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3661  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3662  retval = (retval != 0) ? (0) : (1);
3663 #if OMPT_SUPPORT && OMPT_OPTIONAL
3664  if (ompt_enabled.enabled) {
3665  ompt_frame->enter_frame = ompt_data_none;
3666  }
3667 #endif
3668 
3669  // all other workers except master should do this pop here
3670  // ( none of other workers except master will enter __kmpc_end_reduce() )
3671  if (__kmp_env_consistency_check) {
3672  if (retval == 0) { // 0: all other workers; 1: master
3673  __kmp_pop_sync(global_tid, ct_reduce, loc);
3674  }
3675  }
3676 
3677  } else {
3678 
3679  // should never reach this block
3680  KMP_ASSERT(0); // "unexpected method"
3681  }
3682  if (teams_swapped) {
3683  __kmp_restore_swapped_teams(th, team, task_state);
3684  }
3685 
3686  KA_TRACE(10,
3687  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3688  global_tid, packed_reduction_method, retval));
3689  return retval;
3690 }
3691 
3702 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3703  kmp_critical_name *lck) {
3704 
3705  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3706  kmp_info_t *th;
3707  kmp_team_t *team;
3708  int teams_swapped = 0, task_state;
3709 
3710  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3711  __kmp_assert_valid_gtid(global_tid);
3712 
3713  th = __kmp_thread_from_gtid(global_tid);
3714  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3715 
3716  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3717 
3718  // this barrier should be visible to a customer and to the threading profile
3719  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3720  OMPT_REDUCTION_DECL(th, global_tid);
3721 
3722  if (packed_reduction_method == critical_reduce_block) {
3723  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3724 
3725  OMPT_REDUCTION_END;
3726 
3727 // TODO: implicit barrier: should be exposed
3728 #if OMPT_SUPPORT
3729  ompt_frame_t *ompt_frame;
3730  if (ompt_enabled.enabled) {
3731  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3732  if (ompt_frame->enter_frame.ptr == NULL)
3733  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3734  OMPT_STORE_RETURN_ADDRESS(global_tid);
3735  }
3736 #endif
3737 #if USE_ITT_NOTIFY
3738  __kmp_threads[global_tid]->th.th_ident = loc;
3739 #endif
3740  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3741 #if OMPT_SUPPORT && OMPT_OPTIONAL
3742  if (ompt_enabled.enabled) {
3743  ompt_frame->enter_frame = ompt_data_none;
3744  }
3745 #endif
3746 
3747  } else if (packed_reduction_method == empty_reduce_block) {
3748 
3749  OMPT_REDUCTION_END;
3750 
3751 // usage: if team size==1, no synchronization is required (Intel platforms only)
3752 
3753 // TODO: implicit barrier: should be exposed
3754 #if OMPT_SUPPORT
3755  ompt_frame_t *ompt_frame;
3756  if (ompt_enabled.enabled) {
3757  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3758  if (ompt_frame->enter_frame.ptr == NULL)
3759  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3760  OMPT_STORE_RETURN_ADDRESS(global_tid);
3761  }
3762 #endif
3763 #if USE_ITT_NOTIFY
3764  __kmp_threads[global_tid]->th.th_ident = loc;
3765 #endif
3766  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3767 #if OMPT_SUPPORT && OMPT_OPTIONAL
3768  if (ompt_enabled.enabled) {
3769  ompt_frame->enter_frame = ompt_data_none;
3770  }
3771 #endif
3772 
3773  } else if (packed_reduction_method == atomic_reduce_block) {
3774 
3775 #if OMPT_SUPPORT
3776  ompt_frame_t *ompt_frame;
3777  if (ompt_enabled.enabled) {
3778  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3779  if (ompt_frame->enter_frame.ptr == NULL)
3780  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3781  OMPT_STORE_RETURN_ADDRESS(global_tid);
3782  }
3783 #endif
3784 // TODO: implicit barrier: should be exposed
3785 #if USE_ITT_NOTIFY
3786  __kmp_threads[global_tid]->th.th_ident = loc;
3787 #endif
3788  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3789 #if OMPT_SUPPORT && OMPT_OPTIONAL
3790  if (ompt_enabled.enabled) {
3791  ompt_frame->enter_frame = ompt_data_none;
3792  }
3793 #endif
3794 
3795  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3796  tree_reduce_block)) {
3797 
3798  // only master executes here (master releases all other workers)
3799  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3800  global_tid);
3801 
3802  } else {
3803 
3804  // should never reach this block
3805  KMP_ASSERT(0); // "unexpected method"
3806  }
3807  if (teams_swapped) {
3808  __kmp_restore_swapped_teams(th, team, task_state);
3809  }
3810 
3811  if (__kmp_env_consistency_check)
3812  __kmp_pop_sync(global_tid, ct_reduce, loc);
3813 
3814  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3815  global_tid, packed_reduction_method));
3816 
3817  return;
3818 }
3819 
3820 #undef __KMP_GET_REDUCTION_METHOD
3821 #undef __KMP_SET_REDUCTION_METHOD
3822 
3823 /* end of interface to fast scalable reduce routines */
3824 
3825 kmp_uint64 __kmpc_get_taskid() {
3826 
3827  kmp_int32 gtid;
3828  kmp_info_t *thread;
3829 
3830  gtid = __kmp_get_gtid();
3831  if (gtid < 0) {
3832  return 0;
3833  }
3834  thread = __kmp_thread_from_gtid(gtid);
3835  return thread->th.th_current_task->td_task_id;
3836 
3837 } // __kmpc_get_taskid
3838 
3839 kmp_uint64 __kmpc_get_parent_taskid() {
3840 
3841  kmp_int32 gtid;
3842  kmp_info_t *thread;
3843  kmp_taskdata_t *parent_task;
3844 
3845  gtid = __kmp_get_gtid();
3846  if (gtid < 0) {
3847  return 0;
3848  }
3849  thread = __kmp_thread_from_gtid(gtid);
3850  parent_task = thread->th.th_current_task->td_parent;
3851  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3852 
3853 } // __kmpc_get_parent_taskid
3854 
3866 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3867  const struct kmp_dim *dims) {
3868  __kmp_assert_valid_gtid(gtid);
3869  int j, idx;
3870  kmp_int64 last, trace_count;
3871  kmp_info_t *th = __kmp_threads[gtid];
3872  kmp_team_t *team = th->th.th_team;
3873  kmp_uint32 *flags;
3874  kmp_disp_t *pr_buf = th->th.th_dispatch;
3875  dispatch_shared_info_t *sh_buf;
3876 
3877  KA_TRACE(
3878  20,
3879  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3880  gtid, num_dims, !team->t.t_serialized));
3881  KMP_DEBUG_ASSERT(dims != NULL);
3882  KMP_DEBUG_ASSERT(num_dims > 0);
3883 
3884  if (team->t.t_serialized) {
3885  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3886  return; // no dependencies if team is serialized
3887  }
3888  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3889  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3890  // the next loop
3891  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3892 
3893  // Save bounds info into allocated private buffer
3894  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3895  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3896  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3897  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3898  pr_buf->th_doacross_info[0] =
3899  (kmp_int64)num_dims; // first element is number of dimensions
3900  // Save also address of num_done in order to access it later without knowing
3901  // the buffer index
3902  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3903  pr_buf->th_doacross_info[2] = dims[0].lo;
3904  pr_buf->th_doacross_info[3] = dims[0].up;
3905  pr_buf->th_doacross_info[4] = dims[0].st;
3906  last = 5;
3907  for (j = 1; j < num_dims; ++j) {
3908  kmp_int64
3909  range_length; // To keep ranges of all dimensions but the first dims[0]
3910  if (dims[j].st == 1) { // most common case
3911  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3912  range_length = dims[j].up - dims[j].lo + 1;
3913  } else {
3914  if (dims[j].st > 0) {
3915  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3916  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3917  } else { // negative increment
3918  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3919  range_length =
3920  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3921  }
3922  }
3923  pr_buf->th_doacross_info[last++] = range_length;
3924  pr_buf->th_doacross_info[last++] = dims[j].lo;
3925  pr_buf->th_doacross_info[last++] = dims[j].up;
3926  pr_buf->th_doacross_info[last++] = dims[j].st;
3927  }
3928 
3929  // Compute total trip count.
3930  // Start with range of dims[0] which we don't need to keep in the buffer.
3931  if (dims[0].st == 1) { // most common case
3932  trace_count = dims[0].up - dims[0].lo + 1;
3933  } else if (dims[0].st > 0) {
3934  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3935  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3936  } else { // negative increment
3937  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3938  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3939  }
3940  for (j = 1; j < num_dims; ++j) {
3941  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3942  }
3943  KMP_DEBUG_ASSERT(trace_count > 0);
3944 
3945  // Check if shared buffer is not occupied by other loop (idx -
3946  // __kmp_dispatch_num_buffers)
3947  if (idx != sh_buf->doacross_buf_idx) {
3948  // Shared buffer is occupied, wait for it to be free
3949  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3950  __kmp_eq_4, NULL);
3951  }
3952 #if KMP_32_BIT_ARCH
3953  // Check if we are the first thread. After the CAS the first thread gets 0,
3954  // others get 1 if initialization is in progress, allocated pointer otherwise.
3955  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3956  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3957  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3958 #else
3959  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3960  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3961 #endif
3962  if (flags == NULL) {
3963  // we are the first thread, allocate the array of flags
3964  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3965  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3966  KMP_MB();
3967  sh_buf->doacross_flags = flags;
3968  } else if (flags == (kmp_uint32 *)1) {
3969 #if KMP_32_BIT_ARCH
3970  // initialization is still in progress, need to wait
3971  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3972 #else
3973  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3974 #endif
3975  KMP_YIELD(TRUE);
3976  KMP_MB();
3977  } else {
3978  KMP_MB();
3979  }
3980  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3981  pr_buf->th_doacross_flags =
3982  sh_buf->doacross_flags; // save private copy in order to not
3983  // touch shared buffer on each iteration
3984  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3985 }
3986 
3987 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3988  __kmp_assert_valid_gtid(gtid);
3989  kmp_int32 shft, num_dims, i;
3990  kmp_uint32 flag;
3991  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3992  kmp_info_t *th = __kmp_threads[gtid];
3993  kmp_team_t *team = th->th.th_team;
3994  kmp_disp_t *pr_buf;
3995  kmp_int64 lo, up, st;
3996 
3997  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3998  if (team->t.t_serialized) {
3999  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4000  return; // no dependencies if team is serialized
4001  }
4002 
4003  // calculate sequential iteration number and check out-of-bounds condition
4004  pr_buf = th->th.th_dispatch;
4005  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4006  num_dims = pr_buf->th_doacross_info[0];
4007  lo = pr_buf->th_doacross_info[2];
4008  up = pr_buf->th_doacross_info[3];
4009  st = pr_buf->th_doacross_info[4];
4010 #if OMPT_SUPPORT && OMPT_OPTIONAL
4011  ompt_dependence_t deps[num_dims];
4012 #endif
4013  if (st == 1) { // most common case
4014  if (vec[0] < lo || vec[0] > up) {
4015  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4016  "bounds [%lld,%lld]\n",
4017  gtid, vec[0], lo, up));
4018  return;
4019  }
4020  iter_number = vec[0] - lo;
4021  } else if (st > 0) {
4022  if (vec[0] < lo || vec[0] > up) {
4023  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4024  "bounds [%lld,%lld]\n",
4025  gtid, vec[0], lo, up));
4026  return;
4027  }
4028  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4029  } else { // negative increment
4030  if (vec[0] > lo || vec[0] < up) {
4031  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4032  "bounds [%lld,%lld]\n",
4033  gtid, vec[0], lo, up));
4034  return;
4035  }
4036  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4037  }
4038 #if OMPT_SUPPORT && OMPT_OPTIONAL
4039  deps[0].variable.value = iter_number;
4040  deps[0].dependence_type = ompt_dependence_type_sink;
4041 #endif
4042  for (i = 1; i < num_dims; ++i) {
4043  kmp_int64 iter, ln;
4044  kmp_int32 j = i * 4;
4045  ln = pr_buf->th_doacross_info[j + 1];
4046  lo = pr_buf->th_doacross_info[j + 2];
4047  up = pr_buf->th_doacross_info[j + 3];
4048  st = pr_buf->th_doacross_info[j + 4];
4049  if (st == 1) {
4050  if (vec[i] < lo || vec[i] > up) {
4051  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4052  "bounds [%lld,%lld]\n",
4053  gtid, vec[i], lo, up));
4054  return;
4055  }
4056  iter = vec[i] - lo;
4057  } else if (st > 0) {
4058  if (vec[i] < lo || vec[i] > up) {
4059  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4060  "bounds [%lld,%lld]\n",
4061  gtid, vec[i], lo, up));
4062  return;
4063  }
4064  iter = (kmp_uint64)(vec[i] - lo) / st;
4065  } else { // st < 0
4066  if (vec[i] > lo || vec[i] < up) {
4067  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4068  "bounds [%lld,%lld]\n",
4069  gtid, vec[i], lo, up));
4070  return;
4071  }
4072  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4073  }
4074  iter_number = iter + ln * iter_number;
4075 #if OMPT_SUPPORT && OMPT_OPTIONAL
4076  deps[i].variable.value = iter;
4077  deps[i].dependence_type = ompt_dependence_type_sink;
4078 #endif
4079  }
4080  shft = iter_number % 32; // use 32-bit granularity
4081  iter_number >>= 5; // divided by 32
4082  flag = 1 << shft;
4083  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4084  KMP_YIELD(TRUE);
4085  }
4086  KMP_MB();
4087 #if OMPT_SUPPORT && OMPT_OPTIONAL
4088  if (ompt_enabled.ompt_callback_dependences) {
4089  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4090  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4091  }
4092 #endif
4093  KA_TRACE(20,
4094  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4095  gtid, (iter_number << 5) + shft));
4096 }
4097 
4098 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4099  __kmp_assert_valid_gtid(gtid);
4100  kmp_int32 shft, num_dims, i;
4101  kmp_uint32 flag;
4102  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4103  kmp_info_t *th = __kmp_threads[gtid];
4104  kmp_team_t *team = th->th.th_team;
4105  kmp_disp_t *pr_buf;
4106  kmp_int64 lo, st;
4107 
4108  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4109  if (team->t.t_serialized) {
4110  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4111  return; // no dependencies if team is serialized
4112  }
4113 
4114  // calculate sequential iteration number (same as in "wait" but no
4115  // out-of-bounds checks)
4116  pr_buf = th->th.th_dispatch;
4117  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4118  num_dims = pr_buf->th_doacross_info[0];
4119  lo = pr_buf->th_doacross_info[2];
4120  st = pr_buf->th_doacross_info[4];
4121 #if OMPT_SUPPORT && OMPT_OPTIONAL
4122  ompt_dependence_t deps[num_dims];
4123 #endif
4124  if (st == 1) { // most common case
4125  iter_number = vec[0] - lo;
4126  } else if (st > 0) {
4127  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4128  } else { // negative increment
4129  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4130  }
4131 #if OMPT_SUPPORT && OMPT_OPTIONAL
4132  deps[0].variable.value = iter_number;
4133  deps[0].dependence_type = ompt_dependence_type_source;
4134 #endif
4135  for (i = 1; i < num_dims; ++i) {
4136  kmp_int64 iter, ln;
4137  kmp_int32 j = i * 4;
4138  ln = pr_buf->th_doacross_info[j + 1];
4139  lo = pr_buf->th_doacross_info[j + 2];
4140  st = pr_buf->th_doacross_info[j + 4];
4141  if (st == 1) {
4142  iter = vec[i] - lo;
4143  } else if (st > 0) {
4144  iter = (kmp_uint64)(vec[i] - lo) / st;
4145  } else { // st < 0
4146  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4147  }
4148  iter_number = iter + ln * iter_number;
4149 #if OMPT_SUPPORT && OMPT_OPTIONAL
4150  deps[i].variable.value = iter;
4151  deps[i].dependence_type = ompt_dependence_type_source;
4152 #endif
4153  }
4154 #if OMPT_SUPPORT && OMPT_OPTIONAL
4155  if (ompt_enabled.ompt_callback_dependences) {
4156  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4157  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
4158  }
4159 #endif
4160  shft = iter_number % 32; // use 32-bit granularity
4161  iter_number >>= 5; // divided by 32
4162  flag = 1 << shft;
4163  KMP_MB();
4164  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4165  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4166  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4167  (iter_number << 5) + shft));
4168 }
4169 
4170 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4171  __kmp_assert_valid_gtid(gtid);
4172  kmp_int32 num_done;
4173  kmp_info_t *th = __kmp_threads[gtid];
4174  kmp_team_t *team = th->th.th_team;
4175  kmp_disp_t *pr_buf = th->th.th_dispatch;
4176 
4177  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4178  if (team->t.t_serialized) {
4179  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4180  return; // nothing to do
4181  }
4182  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4183  if (num_done == th->th.th_team_nproc) {
4184  // we are the last thread, need to free shared resources
4185  int idx = pr_buf->th_doacross_buf_idx - 1;
4186  dispatch_shared_info_t *sh_buf =
4187  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4188  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4189  (kmp_int64)&sh_buf->doacross_num_done);
4190  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4191  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4192  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4193  sh_buf->doacross_flags = NULL;
4194  sh_buf->doacross_num_done = 0;
4195  sh_buf->doacross_buf_idx +=
4196  __kmp_dispatch_num_buffers; // free buffer for future re-use
4197  }
4198  // free private resources (need to keep buffer index forever)
4199  pr_buf->th_doacross_flags = NULL;
4200  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4201  pr_buf->th_doacross_info = NULL;
4202  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4203 }
4204 
4205 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4206 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4207  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4208 }
4209 
4210 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4211  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4212 }
4213 
4214 int __kmpc_get_target_offload(void) {
4215  if (!__kmp_init_serial) {
4216  __kmp_serial_initialize();
4217  }
4218  return __kmp_target_offload;
4219 }
4220 
4221 int __kmpc_pause_resource(kmp_pause_status_t level) {
4222  if (!__kmp_init_serial) {
4223  return 1; // Can't pause if runtime is not initialized
4224  }
4225  return __kmp_pause_resource(level);
4226 }
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:226
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1396
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:236
kmp_int32 flags
Definition: kmp.h:228