LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345  kmp_int32 num_teams, kmp_int32 num_threads) {
346  KA_TRACE(20,
347  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348  global_tid, num_teams, num_threads));
349  __kmp_assert_valid_gtid(global_tid);
350  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351 }
352 
369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371  kmp_int32 num_threads) {
372  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373  " num_teams_ub=%d num_threads=%d\n",
374  global_tid, num_teams_lb, num_teams_ub, num_threads));
375  __kmp_assert_valid_gtid(global_tid);
376  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377  num_threads);
378 }
379 
390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391  ...) {
392  int gtid = __kmp_entry_gtid();
393  kmp_info_t *this_thr = __kmp_threads[gtid];
394  va_list ap;
395  va_start(ap, microtask);
396 
397 #if KMP_STATS_ENABLED
398  KMP_COUNT_BLOCK(OMP_TEAMS);
399  stats_state_e previous_state = KMP_GET_THREAD_STATE();
400  if (previous_state == stats_state_e::SERIAL_REGION) {
401  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402  } else {
403  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404  }
405 #endif
406 
407  // remember teams entry point and nesting level
408  this_thr->th.th_teams_microtask = microtask;
409  this_thr->th.th_teams_level =
410  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411 
412 #if OMPT_SUPPORT
413  kmp_team_t *parent_team = this_thr->th.th_team;
414  int tid = __kmp_tid_from_gtid(gtid);
415  if (ompt_enabled.enabled) {
416  parent_team->t.t_implicit_task_taskdata[tid]
417  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418  }
419  OMPT_STORE_RETURN_ADDRESS(gtid);
420 #endif
421 
422  // check if __kmpc_push_num_teams called, set default number of teams
423  // otherwise
424  if (this_thr->th.th_teams_size.nteams == 0) {
425  __kmp_push_num_teams(loc, gtid, 0, 0);
426  }
427  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430 
431  __kmp_fork_call(
432  loc, gtid, fork_context_intel, argc,
433  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435  __kmp_join_call(loc, gtid
436 #if OMPT_SUPPORT
437  ,
438  fork_context_intel
439 #endif
440  );
441 
442  // Pop current CG root off list
443  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445  this_thr->th.th_cg_roots = tmp->up;
446  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447  " to node %p. cg_nthreads was %d\n",
448  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450  int i = tmp->cg_nthreads--;
451  if (i == 1) { // check is we are the last thread in CG (not always the case)
452  __kmp_free(tmp);
453  }
454  // Restore current task's thread_limit from CG root
455  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456  this_thr->th.th_current_task->td_icvs.thread_limit =
457  this_thr->th.th_cg_roots->cg_thread_limit;
458 
459  this_thr->th.th_teams_microtask = NULL;
460  this_thr->th.th_teams_level = 0;
461  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462  va_end(ap);
463 #if KMP_STATS_ENABLED
464  if (previous_state == stats_state_e::SERIAL_REGION) {
465  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466  KMP_SET_THREAD_STATE(previous_state);
467  } else {
468  KMP_POP_PARTITIONED_TIMER();
469  }
470 #endif // KMP_STATS_ENABLED
471 }
472 
473 // I don't think this function should ever have been exported.
474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475 // openmp code ever called it, but it's been exported from the RTL for so
476 // long that I'm afraid to remove the definition.
477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478 
491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492  // The implementation is now in kmp_runtime.cpp so that it can share static
493  // functions with kmp_fork_call since the tasks to be done are similar in
494  // each case.
495  __kmp_assert_valid_gtid(global_tid);
496 #if OMPT_SUPPORT
497  OMPT_STORE_RETURN_ADDRESS(global_tid);
498 #endif
499  __kmp_serialized_parallel(loc, global_tid);
500 }
501 
509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510  kmp_internal_control_t *top;
511  kmp_info_t *this_thr;
512  kmp_team_t *serial_team;
513 
514  KC_TRACE(10,
515  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516 
517  /* skip all this code for autopar serialized loops since it results in
518  unacceptable overhead */
519  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520  return;
521 
522  // Not autopar code
523  __kmp_assert_valid_gtid(global_tid);
524  if (!TCR_4(__kmp_init_parallel))
525  __kmp_parallel_initialize();
526 
527  __kmp_resume_if_soft_paused();
528 
529  this_thr = __kmp_threads[global_tid];
530  serial_team = this_thr->th.th_serial_team;
531 
532  kmp_task_team_t *task_team = this_thr->th.th_task_team;
533  // we need to wait for the proxy tasks before finishing the thread
534  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
535  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
536 
537  KMP_MB();
538  KMP_DEBUG_ASSERT(serial_team);
539  KMP_ASSERT(serial_team->t.t_serialized);
540  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
541  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
542  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
543  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
544 
545 #if OMPT_SUPPORT
546  if (ompt_enabled.enabled &&
547  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
548  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
549  if (ompt_enabled.ompt_callback_implicit_task) {
550  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
551  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
552  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
553  }
554 
555  // reset clear the task id only after unlinking the task
556  ompt_data_t *parent_task_data;
557  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
558 
559  if (ompt_enabled.ompt_callback_parallel_end) {
560  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
561  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
562  ompt_parallel_invoker_program | ompt_parallel_team,
563  OMPT_LOAD_RETURN_ADDRESS(global_tid));
564  }
565  __ompt_lw_taskteam_unlink(this_thr);
566  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
567  }
568 #endif
569 
570  /* If necessary, pop the internal control stack values and replace the team
571  * values */
572  top = serial_team->t.t_control_stack_top;
573  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
574  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
575  serial_team->t.t_control_stack_top = top->next;
576  __kmp_free(top);
577  }
578 
579  /* pop dispatch buffers stack */
580  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
581  {
582  dispatch_private_info_t *disp_buffer =
583  serial_team->t.t_dispatch->th_disp_buffer;
584  serial_team->t.t_dispatch->th_disp_buffer =
585  serial_team->t.t_dispatch->th_disp_buffer->next;
586  __kmp_free(disp_buffer);
587  }
588  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
589 
590  --serial_team->t.t_serialized;
591  if (serial_team->t.t_serialized == 0) {
592 
593  /* return to the parallel section */
594 
595 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
596  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
597  __kmp_clear_x87_fpu_status_word();
598  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
599  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
600  }
601 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
602 
603  __kmp_pop_current_task_from_thread(this_thr);
604 #if OMPD_SUPPORT
605  if (ompd_state & OMPD_ENABLE_BP)
606  ompd_bp_parallel_end();
607 #endif
608 
609  this_thr->th.th_team = serial_team->t.t_parent;
610  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
611 
612  /* restore values cached in the thread */
613  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
614  this_thr->th.th_team_master =
615  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
616  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
617 
618  /* TODO the below shouldn't need to be adjusted for serialized teams */
619  this_thr->th.th_dispatch =
620  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
621 
622  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
623  this_thr->th.th_current_task->td_flags.executing = 1;
624 
625  if (__kmp_tasking_mode != tskm_immediate_exec) {
626  // Copy the task team from the new child / old parent team to the thread.
627  this_thr->th.th_task_team =
628  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
629  KA_TRACE(20,
630  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
631  "team %p\n",
632  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
633  }
634  } else {
635  if (__kmp_tasking_mode != tskm_immediate_exec) {
636  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
637  "depth of serial team %p to %d\n",
638  global_tid, serial_team, serial_team->t.t_serialized));
639  }
640  }
641 
642  serial_team->t.t_level--;
643  if (__kmp_env_consistency_check)
644  __kmp_pop_parallel(global_tid, NULL);
645 #if OMPT_SUPPORT
646  if (ompt_enabled.enabled)
647  this_thr->th.ompt_thread_info.state =
648  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
649  : ompt_state_work_parallel);
650 #endif
651 }
652 
661 void __kmpc_flush(ident_t *loc) {
662  KC_TRACE(10, ("__kmpc_flush: called\n"));
663 
664  /* need explicit __mf() here since use volatile instead in library */
665  KMP_MB(); /* Flush all pending memory write invalidates. */
666 
667 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
668 #if KMP_MIC
669 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
670 // We shouldn't need it, though, since the ABI rules require that
671 // * If the compiler generates NGO stores it also generates the fence
672 // * If users hand-code NGO stores they should insert the fence
673 // therefore no incomplete unordered stores should be visible.
674 #else
675  // C74404
676  // This is to address non-temporal store instructions (sfence needed).
677  // The clflush instruction is addressed either (mfence needed).
678  // Probably the non-temporal load monvtdqa instruction should also be
679  // addressed.
680  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
681  if (!__kmp_cpuinfo.initialized) {
682  __kmp_query_cpuid(&__kmp_cpuinfo);
683  }
684  if (!__kmp_cpuinfo.flags.sse2) {
685  // CPU cannot execute SSE2 instructions.
686  } else {
687 #if KMP_COMPILER_ICC
688  _mm_mfence();
689 #elif KMP_COMPILER_MSVC
690  MemoryBarrier();
691 #else
692  __sync_synchronize();
693 #endif // KMP_COMPILER_ICC
694  }
695 #endif // KMP_MIC
696 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
697  KMP_ARCH_RISCV64)
698 // Nothing to see here move along
699 #elif KMP_ARCH_PPC64
700 // Nothing needed here (we have a real MB above).
701 #else
702 #error Unknown or unsupported architecture
703 #endif
704 
705 #if OMPT_SUPPORT && OMPT_OPTIONAL
706  if (ompt_enabled.ompt_callback_flush) {
707  ompt_callbacks.ompt_callback(ompt_callback_flush)(
708  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
709  }
710 #endif
711 }
712 
713 /* -------------------------------------------------------------------------- */
721 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
722  KMP_COUNT_BLOCK(OMP_BARRIER);
723  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
724  __kmp_assert_valid_gtid(global_tid);
725 
726  if (!TCR_4(__kmp_init_parallel))
727  __kmp_parallel_initialize();
728 
729  __kmp_resume_if_soft_paused();
730 
731  if (__kmp_env_consistency_check) {
732  if (loc == 0) {
733  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
734  }
735  __kmp_check_barrier(global_tid, ct_barrier, loc);
736  }
737 
738 #if OMPT_SUPPORT
739  ompt_frame_t *ompt_frame;
740  if (ompt_enabled.enabled) {
741  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
742  if (ompt_frame->enter_frame.ptr == NULL)
743  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
744  }
745  OMPT_STORE_RETURN_ADDRESS(global_tid);
746 #endif
747  __kmp_threads[global_tid]->th.th_ident = loc;
748  // TODO: explicit barrier_wait_id:
749  // this function is called when 'barrier' directive is present or
750  // implicit barrier at the end of a worksharing construct.
751  // 1) better to add a per-thread barrier counter to a thread data structure
752  // 2) set to 0 when a new team is created
753  // 4) no sync is required
754 
755  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
756 #if OMPT_SUPPORT && OMPT_OPTIONAL
757  if (ompt_enabled.enabled) {
758  ompt_frame->enter_frame = ompt_data_none;
759  }
760 #endif
761 }
762 
763 /* The BARRIER for a MASTER section is always explicit */
770 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
771  int status = 0;
772 
773  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
774  __kmp_assert_valid_gtid(global_tid);
775 
776  if (!TCR_4(__kmp_init_parallel))
777  __kmp_parallel_initialize();
778 
779  __kmp_resume_if_soft_paused();
780 
781  if (KMP_MASTER_GTID(global_tid)) {
782  KMP_COUNT_BLOCK(OMP_MASTER);
783  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
784  status = 1;
785  }
786 
787 #if OMPT_SUPPORT && OMPT_OPTIONAL
788  if (status) {
789  if (ompt_enabled.ompt_callback_masked) {
790  kmp_info_t *this_thr = __kmp_threads[global_tid];
791  kmp_team_t *team = this_thr->th.th_team;
792 
793  int tid = __kmp_tid_from_gtid(global_tid);
794  ompt_callbacks.ompt_callback(ompt_callback_masked)(
795  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
796  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
797  OMPT_GET_RETURN_ADDRESS(0));
798  }
799  }
800 #endif
801 
802  if (__kmp_env_consistency_check) {
803 #if KMP_USE_DYNAMIC_LOCK
804  if (status)
805  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
806  else
807  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
808 #else
809  if (status)
810  __kmp_push_sync(global_tid, ct_master, loc, NULL);
811  else
812  __kmp_check_sync(global_tid, ct_master, loc, NULL);
813 #endif
814  }
815 
816  return status;
817 }
818 
827 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
828  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
829  __kmp_assert_valid_gtid(global_tid);
830  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
831  KMP_POP_PARTITIONED_TIMER();
832 
833 #if OMPT_SUPPORT && OMPT_OPTIONAL
834  kmp_info_t *this_thr = __kmp_threads[global_tid];
835  kmp_team_t *team = this_thr->th.th_team;
836  if (ompt_enabled.ompt_callback_masked) {
837  int tid = __kmp_tid_from_gtid(global_tid);
838  ompt_callbacks.ompt_callback(ompt_callback_masked)(
839  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
840  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
841  OMPT_GET_RETURN_ADDRESS(0));
842  }
843 #endif
844 
845  if (__kmp_env_consistency_check) {
846  if (KMP_MASTER_GTID(global_tid))
847  __kmp_pop_sync(global_tid, ct_master, loc);
848  }
849 }
850 
859 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
860  int status = 0;
861  int tid;
862  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
863  __kmp_assert_valid_gtid(global_tid);
864 
865  if (!TCR_4(__kmp_init_parallel))
866  __kmp_parallel_initialize();
867 
868  __kmp_resume_if_soft_paused();
869 
870  tid = __kmp_tid_from_gtid(global_tid);
871  if (tid == filter) {
872  KMP_COUNT_BLOCK(OMP_MASKED);
873  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
874  status = 1;
875  }
876 
877 #if OMPT_SUPPORT && OMPT_OPTIONAL
878  if (status) {
879  if (ompt_enabled.ompt_callback_masked) {
880  kmp_info_t *this_thr = __kmp_threads[global_tid];
881  kmp_team_t *team = this_thr->th.th_team;
882  ompt_callbacks.ompt_callback(ompt_callback_masked)(
883  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
884  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
885  OMPT_GET_RETURN_ADDRESS(0));
886  }
887  }
888 #endif
889 
890  if (__kmp_env_consistency_check) {
891 #if KMP_USE_DYNAMIC_LOCK
892  if (status)
893  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
894  else
895  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
896 #else
897  if (status)
898  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
899  else
900  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
901 #endif
902  }
903 
904  return status;
905 }
906 
915 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
916  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
917  __kmp_assert_valid_gtid(global_tid);
918  KMP_POP_PARTITIONED_TIMER();
919 
920 #if OMPT_SUPPORT && OMPT_OPTIONAL
921  kmp_info_t *this_thr = __kmp_threads[global_tid];
922  kmp_team_t *team = this_thr->th.th_team;
923  if (ompt_enabled.ompt_callback_masked) {
924  int tid = __kmp_tid_from_gtid(global_tid);
925  ompt_callbacks.ompt_callback(ompt_callback_masked)(
926  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
927  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
928  OMPT_GET_RETURN_ADDRESS(0));
929  }
930 #endif
931 
932  if (__kmp_env_consistency_check) {
933  __kmp_pop_sync(global_tid, ct_masked, loc);
934  }
935 }
936 
944 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
945  int cid = 0;
946  kmp_info_t *th;
947  KMP_DEBUG_ASSERT(__kmp_init_serial);
948 
949  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
950  __kmp_assert_valid_gtid(gtid);
951 
952  if (!TCR_4(__kmp_init_parallel))
953  __kmp_parallel_initialize();
954 
955  __kmp_resume_if_soft_paused();
956 
957 #if USE_ITT_BUILD
958  __kmp_itt_ordered_prep(gtid);
959 // TODO: ordered_wait_id
960 #endif /* USE_ITT_BUILD */
961 
962  th = __kmp_threads[gtid];
963 
964 #if OMPT_SUPPORT && OMPT_OPTIONAL
965  kmp_team_t *team;
966  ompt_wait_id_t lck;
967  void *codeptr_ra;
968  OMPT_STORE_RETURN_ADDRESS(gtid);
969  if (ompt_enabled.enabled) {
970  team = __kmp_team_from_gtid(gtid);
971  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
972  /* OMPT state update */
973  th->th.ompt_thread_info.wait_id = lck;
974  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
975 
976  /* OMPT event callback */
977  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
978  if (ompt_enabled.ompt_callback_mutex_acquire) {
979  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
980  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
981  codeptr_ra);
982  }
983  }
984 #endif
985 
986  if (th->th.th_dispatch->th_deo_fcn != 0)
987  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
988  else
989  __kmp_parallel_deo(&gtid, &cid, loc);
990 
991 #if OMPT_SUPPORT && OMPT_OPTIONAL
992  if (ompt_enabled.enabled) {
993  /* OMPT state update */
994  th->th.ompt_thread_info.state = ompt_state_work_parallel;
995  th->th.ompt_thread_info.wait_id = 0;
996 
997  /* OMPT event callback */
998  if (ompt_enabled.ompt_callback_mutex_acquired) {
999  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1000  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1001  }
1002  }
1003 #endif
1004 
1005 #if USE_ITT_BUILD
1006  __kmp_itt_ordered_start(gtid);
1007 #endif /* USE_ITT_BUILD */
1008 }
1009 
1017 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1018  int cid = 0;
1019  kmp_info_t *th;
1020 
1021  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1022  __kmp_assert_valid_gtid(gtid);
1023 
1024 #if USE_ITT_BUILD
1025  __kmp_itt_ordered_end(gtid);
1026 // TODO: ordered_wait_id
1027 #endif /* USE_ITT_BUILD */
1028 
1029  th = __kmp_threads[gtid];
1030 
1031  if (th->th.th_dispatch->th_dxo_fcn != 0)
1032  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1033  else
1034  __kmp_parallel_dxo(&gtid, &cid, loc);
1035 
1036 #if OMPT_SUPPORT && OMPT_OPTIONAL
1037  OMPT_STORE_RETURN_ADDRESS(gtid);
1038  if (ompt_enabled.ompt_callback_mutex_released) {
1039  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1040  ompt_mutex_ordered,
1041  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1042  ->t.t_ordered.dt.t_value,
1043  OMPT_LOAD_RETURN_ADDRESS(gtid));
1044  }
1045 #endif
1046 }
1047 
1048 #if KMP_USE_DYNAMIC_LOCK
1049 
1050 static __forceinline void
1051 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1052  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1053  // Pointer to the allocated indirect lock is written to crit, while indexing
1054  // is ignored.
1055  void *idx;
1056  kmp_indirect_lock_t **lck;
1057  lck = (kmp_indirect_lock_t **)crit;
1058  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1059  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1060  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1061  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1062  KA_TRACE(20,
1063  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1064 #if USE_ITT_BUILD
1065  __kmp_itt_critical_creating(ilk->lock, loc);
1066 #endif
1067  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1068  if (status == 0) {
1069 #if USE_ITT_BUILD
1070  __kmp_itt_critical_destroyed(ilk->lock);
1071 #endif
1072  // We don't really need to destroy the unclaimed lock here since it will be
1073  // cleaned up at program exit.
1074  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1075  }
1076  KMP_DEBUG_ASSERT(*lck != NULL);
1077 }
1078 
1079 // Fast-path acquire tas lock
1080 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1081  { \
1082  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1083  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1084  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1085  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1086  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1087  kmp_uint32 spins; \
1088  KMP_FSYNC_PREPARE(l); \
1089  KMP_INIT_YIELD(spins); \
1090  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1091  do { \
1092  if (TCR_4(__kmp_nth) > \
1093  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1094  KMP_YIELD(TRUE); \
1095  } else { \
1096  KMP_YIELD_SPIN(spins); \
1097  } \
1098  __kmp_spin_backoff(&backoff); \
1099  } while ( \
1100  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1101  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1102  } \
1103  KMP_FSYNC_ACQUIRED(l); \
1104  }
1105 
1106 // Fast-path test tas lock
1107 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1108  { \
1109  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1110  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1111  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1112  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1113  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1114  }
1115 
1116 // Fast-path release tas lock
1117 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1118  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1119 
1120 #if KMP_USE_FUTEX
1121 
1122 #include <sys/syscall.h>
1123 #include <unistd.h>
1124 #ifndef FUTEX_WAIT
1125 #define FUTEX_WAIT 0
1126 #endif
1127 #ifndef FUTEX_WAKE
1128 #define FUTEX_WAKE 1
1129 #endif
1130 
1131 // Fast-path acquire futex lock
1132 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1133  { \
1134  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1135  kmp_int32 gtid_code = (gtid + 1) << 1; \
1136  KMP_MB(); \
1137  KMP_FSYNC_PREPARE(ftx); \
1138  kmp_int32 poll_val; \
1139  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1140  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1141  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1142  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1143  if (!cond) { \
1144  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1145  poll_val | \
1146  KMP_LOCK_BUSY(1, futex))) { \
1147  continue; \
1148  } \
1149  poll_val |= KMP_LOCK_BUSY(1, futex); \
1150  } \
1151  kmp_int32 rc; \
1152  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1153  NULL, NULL, 0)) != 0) { \
1154  continue; \
1155  } \
1156  gtid_code |= 1; \
1157  } \
1158  KMP_FSYNC_ACQUIRED(ftx); \
1159  }
1160 
1161 // Fast-path test futex lock
1162 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1163  { \
1164  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1165  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1166  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1167  KMP_FSYNC_ACQUIRED(ftx); \
1168  rc = TRUE; \
1169  } else { \
1170  rc = FALSE; \
1171  } \
1172  }
1173 
1174 // Fast-path release futex lock
1175 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1176  { \
1177  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1178  KMP_MB(); \
1179  KMP_FSYNC_RELEASING(ftx); \
1180  kmp_int32 poll_val = \
1181  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1182  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1183  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1184  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1185  } \
1186  KMP_MB(); \
1187  KMP_YIELD_OVERSUB(); \
1188  }
1189 
1190 #endif // KMP_USE_FUTEX
1191 
1192 #else // KMP_USE_DYNAMIC_LOCK
1193 
1194 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1195  ident_t const *loc,
1196  kmp_int32 gtid) {
1197  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1198 
1199  // Because of the double-check, the following load doesn't need to be volatile
1200  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1201 
1202  if (lck == NULL) {
1203  void *idx;
1204 
1205  // Allocate & initialize the lock.
1206  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1207  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1208  __kmp_init_user_lock_with_checks(lck);
1209  __kmp_set_user_lock_location(lck, loc);
1210 #if USE_ITT_BUILD
1211  __kmp_itt_critical_creating(lck);
1212 // __kmp_itt_critical_creating() should be called *before* the first usage
1213 // of underlying lock. It is the only place where we can guarantee it. There
1214 // are chances the lock will destroyed with no usage, but it is not a
1215 // problem, because this is not real event seen by user but rather setting
1216 // name for object (lock). See more details in kmp_itt.h.
1217 #endif /* USE_ITT_BUILD */
1218 
1219  // Use a cmpxchg instruction to slam the start of the critical section with
1220  // the lock pointer. If another thread beat us to it, deallocate the lock,
1221  // and use the lock that the other thread allocated.
1222  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1223 
1224  if (status == 0) {
1225 // Deallocate the lock and reload the value.
1226 #if USE_ITT_BUILD
1227  __kmp_itt_critical_destroyed(lck);
1228 // Let ITT know the lock is destroyed and the same memory location may be reused
1229 // for another purpose.
1230 #endif /* USE_ITT_BUILD */
1231  __kmp_destroy_user_lock_with_checks(lck);
1232  __kmp_user_lock_free(&idx, gtid, lck);
1233  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1234  KMP_DEBUG_ASSERT(lck != NULL);
1235  }
1236  }
1237  return lck;
1238 }
1239 
1240 #endif // KMP_USE_DYNAMIC_LOCK
1241 
1252 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1253  kmp_critical_name *crit) {
1254 #if KMP_USE_DYNAMIC_LOCK
1255 #if OMPT_SUPPORT && OMPT_OPTIONAL
1256  OMPT_STORE_RETURN_ADDRESS(global_tid);
1257 #endif // OMPT_SUPPORT
1258  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1259 #else
1260  KMP_COUNT_BLOCK(OMP_CRITICAL);
1261 #if OMPT_SUPPORT && OMPT_OPTIONAL
1262  ompt_state_t prev_state = ompt_state_undefined;
1263  ompt_thread_info_t ti;
1264 #endif
1265  kmp_user_lock_p lck;
1266 
1267  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1268  __kmp_assert_valid_gtid(global_tid);
1269 
1270  // TODO: add THR_OVHD_STATE
1271 
1272  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1273  KMP_CHECK_USER_LOCK_INIT();
1274 
1275  if ((__kmp_user_lock_kind == lk_tas) &&
1276  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1277  lck = (kmp_user_lock_p)crit;
1278  }
1279 #if KMP_USE_FUTEX
1280  else if ((__kmp_user_lock_kind == lk_futex) &&
1281  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1282  lck = (kmp_user_lock_p)crit;
1283  }
1284 #endif
1285  else { // ticket, queuing or drdpa
1286  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1287  }
1288 
1289  if (__kmp_env_consistency_check)
1290  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1291 
1292  // since the critical directive binds to all threads, not just the current
1293  // team we have to check this even if we are in a serialized team.
1294  // also, even if we are the uber thread, we still have to conduct the lock,
1295  // as we have to contend with sibling threads.
1296 
1297 #if USE_ITT_BUILD
1298  __kmp_itt_critical_acquiring(lck);
1299 #endif /* USE_ITT_BUILD */
1300 #if OMPT_SUPPORT && OMPT_OPTIONAL
1301  OMPT_STORE_RETURN_ADDRESS(gtid);
1302  void *codeptr_ra = NULL;
1303  if (ompt_enabled.enabled) {
1304  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1305  /* OMPT state update */
1306  prev_state = ti.state;
1307  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1308  ti.state = ompt_state_wait_critical;
1309 
1310  /* OMPT event callback */
1311  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1312  if (ompt_enabled.ompt_callback_mutex_acquire) {
1313  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1314  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1315  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1316  }
1317  }
1318 #endif
1319  // Value of 'crit' should be good for using as a critical_id of the critical
1320  // section directive.
1321  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1322 
1323 #if USE_ITT_BUILD
1324  __kmp_itt_critical_acquired(lck);
1325 #endif /* USE_ITT_BUILD */
1326 #if OMPT_SUPPORT && OMPT_OPTIONAL
1327  if (ompt_enabled.enabled) {
1328  /* OMPT state update */
1329  ti.state = prev_state;
1330  ti.wait_id = 0;
1331 
1332  /* OMPT event callback */
1333  if (ompt_enabled.ompt_callback_mutex_acquired) {
1334  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1335  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1336  }
1337  }
1338 #endif
1339  KMP_POP_PARTITIONED_TIMER();
1340 
1341  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1342  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1343 #endif // KMP_USE_DYNAMIC_LOCK
1344 }
1345 
1346 #if KMP_USE_DYNAMIC_LOCK
1347 
1348 // Converts the given hint to an internal lock implementation
1349 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1350 #if KMP_USE_TSX
1351 #define KMP_TSX_LOCK(seq) lockseq_##seq
1352 #else
1353 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1354 #endif
1355 
1356 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1357 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1358 #else
1359 #define KMP_CPUINFO_RTM 0
1360 #endif
1361 
1362  // Hints that do not require further logic
1363  if (hint & kmp_lock_hint_hle)
1364  return KMP_TSX_LOCK(hle);
1365  if (hint & kmp_lock_hint_rtm)
1366  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1367  if (hint & kmp_lock_hint_adaptive)
1368  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1369 
1370  // Rule out conflicting hints first by returning the default lock
1371  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1372  return __kmp_user_lock_seq;
1373  if ((hint & omp_lock_hint_speculative) &&
1374  (hint & omp_lock_hint_nonspeculative))
1375  return __kmp_user_lock_seq;
1376 
1377  // Do not even consider speculation when it appears to be contended
1378  if (hint & omp_lock_hint_contended)
1379  return lockseq_queuing;
1380 
1381  // Uncontended lock without speculation
1382  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1383  return lockseq_tas;
1384 
1385  // Use RTM lock for speculation
1386  if (hint & omp_lock_hint_speculative)
1387  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1388 
1389  return __kmp_user_lock_seq;
1390 }
1391 
1392 #if OMPT_SUPPORT && OMPT_OPTIONAL
1393 #if KMP_USE_DYNAMIC_LOCK
1394 static kmp_mutex_impl_t
1395 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1396  if (user_lock) {
1397  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1398  case 0:
1399  break;
1400 #if KMP_USE_FUTEX
1401  case locktag_futex:
1402  return kmp_mutex_impl_queuing;
1403 #endif
1404  case locktag_tas:
1405  return kmp_mutex_impl_spin;
1406 #if KMP_USE_TSX
1407  case locktag_hle:
1408  case locktag_rtm_spin:
1409  return kmp_mutex_impl_speculative;
1410 #endif
1411  default:
1412  return kmp_mutex_impl_none;
1413  }
1414  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1415  }
1416  KMP_ASSERT(ilock);
1417  switch (ilock->type) {
1418 #if KMP_USE_TSX
1419  case locktag_adaptive:
1420  case locktag_rtm_queuing:
1421  return kmp_mutex_impl_speculative;
1422 #endif
1423  case locktag_nested_tas:
1424  return kmp_mutex_impl_spin;
1425 #if KMP_USE_FUTEX
1426  case locktag_nested_futex:
1427 #endif
1428  case locktag_ticket:
1429  case locktag_queuing:
1430  case locktag_drdpa:
1431  case locktag_nested_ticket:
1432  case locktag_nested_queuing:
1433  case locktag_nested_drdpa:
1434  return kmp_mutex_impl_queuing;
1435  default:
1436  return kmp_mutex_impl_none;
1437  }
1438 }
1439 #else
1440 // For locks without dynamic binding
1441 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1442  switch (__kmp_user_lock_kind) {
1443  case lk_tas:
1444  return kmp_mutex_impl_spin;
1445 #if KMP_USE_FUTEX
1446  case lk_futex:
1447 #endif
1448  case lk_ticket:
1449  case lk_queuing:
1450  case lk_drdpa:
1451  return kmp_mutex_impl_queuing;
1452 #if KMP_USE_TSX
1453  case lk_hle:
1454  case lk_rtm_queuing:
1455  case lk_rtm_spin:
1456  case lk_adaptive:
1457  return kmp_mutex_impl_speculative;
1458 #endif
1459  default:
1460  return kmp_mutex_impl_none;
1461  }
1462 }
1463 #endif // KMP_USE_DYNAMIC_LOCK
1464 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1465 
1479 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1480  kmp_critical_name *crit, uint32_t hint) {
1481  KMP_COUNT_BLOCK(OMP_CRITICAL);
1482  kmp_user_lock_p lck;
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484  ompt_state_t prev_state = ompt_state_undefined;
1485  ompt_thread_info_t ti;
1486  // This is the case, if called from __kmpc_critical:
1487  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1488  if (!codeptr)
1489  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1490 #endif
1491 
1492  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1493  __kmp_assert_valid_gtid(global_tid);
1494 
1495  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1496  // Check if it is initialized.
1497  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1498  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1499  if (*lk == 0) {
1500  if (KMP_IS_D_LOCK(lockseq)) {
1501  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1502  KMP_GET_D_TAG(lockseq));
1503  } else {
1504  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1505  }
1506  }
1507  // Branch for accessing the actual lock object and set operation. This
1508  // branching is inevitable since this lock initialization does not follow the
1509  // normal dispatch path (lock table is not used).
1510  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1511  lck = (kmp_user_lock_p)lk;
1512  if (__kmp_env_consistency_check) {
1513  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1514  __kmp_map_hint_to_lock(hint));
1515  }
1516 #if USE_ITT_BUILD
1517  __kmp_itt_critical_acquiring(lck);
1518 #endif
1519 #if OMPT_SUPPORT && OMPT_OPTIONAL
1520  if (ompt_enabled.enabled) {
1521  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1522  /* OMPT state update */
1523  prev_state = ti.state;
1524  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1525  ti.state = ompt_state_wait_critical;
1526 
1527  /* OMPT event callback */
1528  if (ompt_enabled.ompt_callback_mutex_acquire) {
1529  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1530  ompt_mutex_critical, (unsigned int)hint,
1531  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1532  codeptr);
1533  }
1534  }
1535 #endif
1536 #if KMP_USE_INLINED_TAS
1537  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1538  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1539  } else
1540 #elif KMP_USE_INLINED_FUTEX
1541  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1542  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1543  } else
1544 #endif
1545  {
1546  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1547  }
1548  } else {
1549  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1550  lck = ilk->lock;
1551  if (__kmp_env_consistency_check) {
1552  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1553  __kmp_map_hint_to_lock(hint));
1554  }
1555 #if USE_ITT_BUILD
1556  __kmp_itt_critical_acquiring(lck);
1557 #endif
1558 #if OMPT_SUPPORT && OMPT_OPTIONAL
1559  if (ompt_enabled.enabled) {
1560  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1561  /* OMPT state update */
1562  prev_state = ti.state;
1563  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1564  ti.state = ompt_state_wait_critical;
1565 
1566  /* OMPT event callback */
1567  if (ompt_enabled.ompt_callback_mutex_acquire) {
1568  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1569  ompt_mutex_critical, (unsigned int)hint,
1570  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1571  codeptr);
1572  }
1573  }
1574 #endif
1575  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1576  }
1577  KMP_POP_PARTITIONED_TIMER();
1578 
1579 #if USE_ITT_BUILD
1580  __kmp_itt_critical_acquired(lck);
1581 #endif /* USE_ITT_BUILD */
1582 #if OMPT_SUPPORT && OMPT_OPTIONAL
1583  if (ompt_enabled.enabled) {
1584  /* OMPT state update */
1585  ti.state = prev_state;
1586  ti.wait_id = 0;
1587 
1588  /* OMPT event callback */
1589  if (ompt_enabled.ompt_callback_mutex_acquired) {
1590  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1591  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1592  }
1593  }
1594 #endif
1595 
1596  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1597  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1598 } // __kmpc_critical_with_hint
1599 
1600 #endif // KMP_USE_DYNAMIC_LOCK
1601 
1611 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1612  kmp_critical_name *crit) {
1613  kmp_user_lock_p lck;
1614 
1615  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1616 
1617 #if KMP_USE_DYNAMIC_LOCK
1618  int locktag = KMP_EXTRACT_D_TAG(crit);
1619  if (locktag) {
1620  lck = (kmp_user_lock_p)crit;
1621  KMP_ASSERT(lck != NULL);
1622  if (__kmp_env_consistency_check) {
1623  __kmp_pop_sync(global_tid, ct_critical, loc);
1624  }
1625 #if USE_ITT_BUILD
1626  __kmp_itt_critical_releasing(lck);
1627 #endif
1628 #if KMP_USE_INLINED_TAS
1629  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1630  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1631  } else
1632 #elif KMP_USE_INLINED_FUTEX
1633  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1634  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1635  } else
1636 #endif
1637  {
1638  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1639  }
1640  } else {
1641  kmp_indirect_lock_t *ilk =
1642  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1643  KMP_ASSERT(ilk != NULL);
1644  lck = ilk->lock;
1645  if (__kmp_env_consistency_check) {
1646  __kmp_pop_sync(global_tid, ct_critical, loc);
1647  }
1648 #if USE_ITT_BUILD
1649  __kmp_itt_critical_releasing(lck);
1650 #endif
1651  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1652  }
1653 
1654 #else // KMP_USE_DYNAMIC_LOCK
1655 
1656  if ((__kmp_user_lock_kind == lk_tas) &&
1657  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1658  lck = (kmp_user_lock_p)crit;
1659  }
1660 #if KMP_USE_FUTEX
1661  else if ((__kmp_user_lock_kind == lk_futex) &&
1662  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1663  lck = (kmp_user_lock_p)crit;
1664  }
1665 #endif
1666  else { // ticket, queuing or drdpa
1667  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1668  }
1669 
1670  KMP_ASSERT(lck != NULL);
1671 
1672  if (__kmp_env_consistency_check)
1673  __kmp_pop_sync(global_tid, ct_critical, loc);
1674 
1675 #if USE_ITT_BUILD
1676  __kmp_itt_critical_releasing(lck);
1677 #endif /* USE_ITT_BUILD */
1678  // Value of 'crit' should be good for using as a critical_id of the critical
1679  // section directive.
1680  __kmp_release_user_lock_with_checks(lck, global_tid);
1681 
1682 #endif // KMP_USE_DYNAMIC_LOCK
1683 
1684 #if OMPT_SUPPORT && OMPT_OPTIONAL
1685  /* OMPT release event triggers after lock is released; place here to trigger
1686  * for all #if branches */
1687  OMPT_STORE_RETURN_ADDRESS(global_tid);
1688  if (ompt_enabled.ompt_callback_mutex_released) {
1689  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1690  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1691  OMPT_LOAD_RETURN_ADDRESS(0));
1692  }
1693 #endif
1694 
1695  KMP_POP_PARTITIONED_TIMER();
1696  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1697 }
1698 
1708 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1709  int status;
1710  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1711  __kmp_assert_valid_gtid(global_tid);
1712 
1713  if (!TCR_4(__kmp_init_parallel))
1714  __kmp_parallel_initialize();
1715 
1716  __kmp_resume_if_soft_paused();
1717 
1718  if (__kmp_env_consistency_check)
1719  __kmp_check_barrier(global_tid, ct_barrier, loc);
1720 
1721 #if OMPT_SUPPORT
1722  ompt_frame_t *ompt_frame;
1723  if (ompt_enabled.enabled) {
1724  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1725  if (ompt_frame->enter_frame.ptr == NULL)
1726  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1727  }
1728  OMPT_STORE_RETURN_ADDRESS(global_tid);
1729 #endif
1730 #if USE_ITT_NOTIFY
1731  __kmp_threads[global_tid]->th.th_ident = loc;
1732 #endif
1733  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1734 #if OMPT_SUPPORT && OMPT_OPTIONAL
1735  if (ompt_enabled.enabled) {
1736  ompt_frame->enter_frame = ompt_data_none;
1737  }
1738 #endif
1739 
1740  return (status != 0) ? 0 : 1;
1741 }
1742 
1752 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1753  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1754  __kmp_assert_valid_gtid(global_tid);
1755  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1756 }
1757 
1768 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1769  kmp_int32 ret;
1770  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1771  __kmp_assert_valid_gtid(global_tid);
1772 
1773  if (!TCR_4(__kmp_init_parallel))
1774  __kmp_parallel_initialize();
1775 
1776  __kmp_resume_if_soft_paused();
1777 
1778  if (__kmp_env_consistency_check) {
1779  if (loc == 0) {
1780  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1781  }
1782  __kmp_check_barrier(global_tid, ct_barrier, loc);
1783  }
1784 
1785 #if OMPT_SUPPORT
1786  ompt_frame_t *ompt_frame;
1787  if (ompt_enabled.enabled) {
1788  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1789  if (ompt_frame->enter_frame.ptr == NULL)
1790  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1791  }
1792  OMPT_STORE_RETURN_ADDRESS(global_tid);
1793 #endif
1794 #if USE_ITT_NOTIFY
1795  __kmp_threads[global_tid]->th.th_ident = loc;
1796 #endif
1797  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1798 #if OMPT_SUPPORT && OMPT_OPTIONAL
1799  if (ompt_enabled.enabled) {
1800  ompt_frame->enter_frame = ompt_data_none;
1801  }
1802 #endif
1803 
1804  ret = __kmpc_master(loc, global_tid);
1805 
1806  if (__kmp_env_consistency_check) {
1807  /* there's no __kmpc_end_master called; so the (stats) */
1808  /* actions of __kmpc_end_master are done here */
1809  if (ret) {
1810  /* only one thread should do the pop since only */
1811  /* one did the push (see __kmpc_master()) */
1812  __kmp_pop_sync(global_tid, ct_master, loc);
1813  }
1814  }
1815 
1816  return (ret);
1817 }
1818 
1819 /* The BARRIER for a SINGLE process section is always explicit */
1831 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1832  __kmp_assert_valid_gtid(global_tid);
1833  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1834 
1835  if (rc) {
1836  // We are going to execute the single statement, so we should count it.
1837  KMP_COUNT_BLOCK(OMP_SINGLE);
1838  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1839  }
1840 
1841 #if OMPT_SUPPORT && OMPT_OPTIONAL
1842  kmp_info_t *this_thr = __kmp_threads[global_tid];
1843  kmp_team_t *team = this_thr->th.th_team;
1844  int tid = __kmp_tid_from_gtid(global_tid);
1845 
1846  if (ompt_enabled.enabled) {
1847  if (rc) {
1848  if (ompt_enabled.ompt_callback_work) {
1849  ompt_callbacks.ompt_callback(ompt_callback_work)(
1850  ompt_work_single_executor, ompt_scope_begin,
1851  &(team->t.ompt_team_info.parallel_data),
1852  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1853  1, OMPT_GET_RETURN_ADDRESS(0));
1854  }
1855  } else {
1856  if (ompt_enabled.ompt_callback_work) {
1857  ompt_callbacks.ompt_callback(ompt_callback_work)(
1858  ompt_work_single_other, ompt_scope_begin,
1859  &(team->t.ompt_team_info.parallel_data),
1860  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1861  1, OMPT_GET_RETURN_ADDRESS(0));
1862  ompt_callbacks.ompt_callback(ompt_callback_work)(
1863  ompt_work_single_other, ompt_scope_end,
1864  &(team->t.ompt_team_info.parallel_data),
1865  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1866  1, OMPT_GET_RETURN_ADDRESS(0));
1867  }
1868  }
1869  }
1870 #endif
1871 
1872  return rc;
1873 }
1874 
1884 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1885  __kmp_assert_valid_gtid(global_tid);
1886  __kmp_exit_single(global_tid);
1887  KMP_POP_PARTITIONED_TIMER();
1888 
1889 #if OMPT_SUPPORT && OMPT_OPTIONAL
1890  kmp_info_t *this_thr = __kmp_threads[global_tid];
1891  kmp_team_t *team = this_thr->th.th_team;
1892  int tid = __kmp_tid_from_gtid(global_tid);
1893 
1894  if (ompt_enabled.ompt_callback_work) {
1895  ompt_callbacks.ompt_callback(ompt_callback_work)(
1896  ompt_work_single_executor, ompt_scope_end,
1897  &(team->t.ompt_team_info.parallel_data),
1898  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1899  OMPT_GET_RETURN_ADDRESS(0));
1900  }
1901 #endif
1902 }
1903 
1911 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1912  KMP_POP_PARTITIONED_TIMER();
1913  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1914 
1915 #if OMPT_SUPPORT && OMPT_OPTIONAL
1916  if (ompt_enabled.ompt_callback_work) {
1917  ompt_work_t ompt_work_type = ompt_work_loop;
1918  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1919  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1920  // Determine workshare type
1921  if (loc != NULL) {
1922  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1923  ompt_work_type = ompt_work_loop;
1924  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1925  ompt_work_type = ompt_work_sections;
1926  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1927  ompt_work_type = ompt_work_distribute;
1928  } else {
1929  // use default set above.
1930  // a warning about this case is provided in __kmpc_for_static_init
1931  }
1932  KMP_DEBUG_ASSERT(ompt_work_type);
1933  }
1934  ompt_callbacks.ompt_callback(ompt_callback_work)(
1935  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1936  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1937  }
1938 #endif
1939  if (__kmp_env_consistency_check)
1940  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1941 }
1942 
1943 // User routines which take C-style arguments (call by value)
1944 // different from the Fortran equivalent routines
1945 
1946 void ompc_set_num_threads(int arg) {
1947  // !!!!! TODO: check the per-task binding
1948  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1949 }
1950 
1951 void ompc_set_dynamic(int flag) {
1952  kmp_info_t *thread;
1953 
1954  /* For the thread-private implementation of the internal controls */
1955  thread = __kmp_entry_thread();
1956 
1957  __kmp_save_internal_controls(thread);
1958 
1959  set__dynamic(thread, flag ? true : false);
1960 }
1961 
1962 void ompc_set_nested(int flag) {
1963  kmp_info_t *thread;
1964 
1965  /* For the thread-private internal controls implementation */
1966  thread = __kmp_entry_thread();
1967 
1968  __kmp_save_internal_controls(thread);
1969 
1970  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1971 }
1972 
1973 void ompc_set_max_active_levels(int max_active_levels) {
1974  /* TO DO */
1975  /* we want per-task implementation of this internal control */
1976 
1977  /* For the per-thread internal controls implementation */
1978  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1979 }
1980 
1981 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1982  // !!!!! TODO: check the per-task binding
1983  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1984 }
1985 
1986 int ompc_get_ancestor_thread_num(int level) {
1987  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1988 }
1989 
1990 int ompc_get_team_size(int level) {
1991  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1992 }
1993 
1994 /* OpenMP 5.0 Affinity Format API */
1995 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
1996  if (!__kmp_init_serial) {
1997  __kmp_serial_initialize();
1998  }
1999  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2000  format, KMP_STRLEN(format) + 1);
2001 }
2002 
2003 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2004  size_t format_size;
2005  if (!__kmp_init_serial) {
2006  __kmp_serial_initialize();
2007  }
2008  format_size = KMP_STRLEN(__kmp_affinity_format);
2009  if (buffer && size) {
2010  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2011  format_size + 1);
2012  }
2013  return format_size;
2014 }
2015 
2016 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2017  int gtid;
2018  if (!TCR_4(__kmp_init_middle)) {
2019  __kmp_middle_initialize();
2020  }
2021  __kmp_assign_root_init_mask();
2022  gtid = __kmp_get_gtid();
2023  __kmp_aux_display_affinity(gtid, format);
2024 }
2025 
2026 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2027  char const *format) {
2028  int gtid;
2029  size_t num_required;
2030  kmp_str_buf_t capture_buf;
2031  if (!TCR_4(__kmp_init_middle)) {
2032  __kmp_middle_initialize();
2033  }
2034  __kmp_assign_root_init_mask();
2035  gtid = __kmp_get_gtid();
2036  __kmp_str_buf_init(&capture_buf);
2037  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2038  if (buffer && buf_size) {
2039  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2040  capture_buf.used + 1);
2041  }
2042  __kmp_str_buf_free(&capture_buf);
2043  return num_required;
2044 }
2045 
2046 void kmpc_set_stacksize(int arg) {
2047  // __kmp_aux_set_stacksize initializes the library if needed
2048  __kmp_aux_set_stacksize(arg);
2049 }
2050 
2051 void kmpc_set_stacksize_s(size_t arg) {
2052  // __kmp_aux_set_stacksize initializes the library if needed
2053  __kmp_aux_set_stacksize(arg);
2054 }
2055 
2056 void kmpc_set_blocktime(int arg) {
2057  int gtid, tid;
2058  kmp_info_t *thread;
2059 
2060  gtid = __kmp_entry_gtid();
2061  tid = __kmp_tid_from_gtid(gtid);
2062  thread = __kmp_thread_from_gtid(gtid);
2063 
2064  __kmp_aux_set_blocktime(arg, thread, tid);
2065 }
2066 
2067 void kmpc_set_library(int arg) {
2068  // __kmp_user_set_library initializes the library if needed
2069  __kmp_user_set_library((enum library_type)arg);
2070 }
2071 
2072 void kmpc_set_defaults(char const *str) {
2073  // __kmp_aux_set_defaults initializes the library if needed
2074  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2075 }
2076 
2077 void kmpc_set_disp_num_buffers(int arg) {
2078  // ignore after initialization because some teams have already
2079  // allocated dispatch buffers
2080  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2081  arg <= KMP_MAX_DISP_NUM_BUFF) {
2082  __kmp_dispatch_num_buffers = arg;
2083  }
2084 }
2085 
2086 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2087 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2088  return -1;
2089 #else
2090  if (!TCR_4(__kmp_init_middle)) {
2091  __kmp_middle_initialize();
2092  }
2093  __kmp_assign_root_init_mask();
2094  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2095 #endif
2096 }
2097 
2098 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2099 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2100  return -1;
2101 #else
2102  if (!TCR_4(__kmp_init_middle)) {
2103  __kmp_middle_initialize();
2104  }
2105  __kmp_assign_root_init_mask();
2106  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2107 #endif
2108 }
2109 
2110 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2111 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2112  return -1;
2113 #else
2114  if (!TCR_4(__kmp_init_middle)) {
2115  __kmp_middle_initialize();
2116  }
2117  __kmp_assign_root_init_mask();
2118  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2119 #endif
2120 }
2121 
2122 /* -------------------------------------------------------------------------- */
2167 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2168  void *cpy_data, void (*cpy_func)(void *, void *),
2169  kmp_int32 didit) {
2170  void **data_ptr;
2171  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2172  __kmp_assert_valid_gtid(gtid);
2173 
2174  KMP_MB();
2175 
2176  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2177 
2178  if (__kmp_env_consistency_check) {
2179  if (loc == 0) {
2180  KMP_WARNING(ConstructIdentInvalid);
2181  }
2182  }
2183 
2184  // ToDo: Optimize the following two barriers into some kind of split barrier
2185 
2186  if (didit)
2187  *data_ptr = cpy_data;
2188 
2189 #if OMPT_SUPPORT
2190  ompt_frame_t *ompt_frame;
2191  if (ompt_enabled.enabled) {
2192  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2193  if (ompt_frame->enter_frame.ptr == NULL)
2194  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2195  }
2196  OMPT_STORE_RETURN_ADDRESS(gtid);
2197 #endif
2198 /* This barrier is not a barrier region boundary */
2199 #if USE_ITT_NOTIFY
2200  __kmp_threads[gtid]->th.th_ident = loc;
2201 #endif
2202  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2203 
2204  if (!didit)
2205  (*cpy_func)(cpy_data, *data_ptr);
2206 
2207  // Consider next barrier a user-visible barrier for barrier region boundaries
2208  // Nesting checks are already handled by the single construct checks
2209  {
2210 #if OMPT_SUPPORT
2211  OMPT_STORE_RETURN_ADDRESS(gtid);
2212 #endif
2213 #if USE_ITT_NOTIFY
2214  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2215 // tasks can overwrite the location)
2216 #endif
2217  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2218 #if OMPT_SUPPORT && OMPT_OPTIONAL
2219  if (ompt_enabled.enabled) {
2220  ompt_frame->enter_frame = ompt_data_none;
2221  }
2222 #endif
2223  }
2224 }
2225 
2226 /* -------------------------------------------------------------------------- */
2227 
2228 #define INIT_LOCK __kmp_init_user_lock_with_checks
2229 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2230 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2231 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2232 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2233 #define ACQUIRE_NESTED_LOCK_TIMED \
2234  __kmp_acquire_nested_user_lock_with_checks_timed
2235 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2236 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2237 #define TEST_LOCK __kmp_test_user_lock_with_checks
2238 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2239 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2240 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2241 
2242 // TODO: Make check abort messages use location info & pass it into
2243 // with_checks routines
2244 
2245 #if KMP_USE_DYNAMIC_LOCK
2246 
2247 // internal lock initializer
2248 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2249  kmp_dyna_lockseq_t seq) {
2250  if (KMP_IS_D_LOCK(seq)) {
2251  KMP_INIT_D_LOCK(lock, seq);
2252 #if USE_ITT_BUILD
2253  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2254 #endif
2255  } else {
2256  KMP_INIT_I_LOCK(lock, seq);
2257 #if USE_ITT_BUILD
2258  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2259  __kmp_itt_lock_creating(ilk->lock, loc);
2260 #endif
2261  }
2262 }
2263 
2264 // internal nest lock initializer
2265 static __forceinline void
2266 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2267  kmp_dyna_lockseq_t seq) {
2268 #if KMP_USE_TSX
2269  // Don't have nested lock implementation for speculative locks
2270  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2271  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2272  seq = __kmp_user_lock_seq;
2273 #endif
2274  switch (seq) {
2275  case lockseq_tas:
2276  seq = lockseq_nested_tas;
2277  break;
2278 #if KMP_USE_FUTEX
2279  case lockseq_futex:
2280  seq = lockseq_nested_futex;
2281  break;
2282 #endif
2283  case lockseq_ticket:
2284  seq = lockseq_nested_ticket;
2285  break;
2286  case lockseq_queuing:
2287  seq = lockseq_nested_queuing;
2288  break;
2289  case lockseq_drdpa:
2290  seq = lockseq_nested_drdpa;
2291  break;
2292  default:
2293  seq = lockseq_nested_queuing;
2294  }
2295  KMP_INIT_I_LOCK(lock, seq);
2296 #if USE_ITT_BUILD
2297  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2298  __kmp_itt_lock_creating(ilk->lock, loc);
2299 #endif
2300 }
2301 
2302 /* initialize the lock with a hint */
2303 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2304  uintptr_t hint) {
2305  KMP_DEBUG_ASSERT(__kmp_init_serial);
2306  if (__kmp_env_consistency_check && user_lock == NULL) {
2307  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2308  }
2309 
2310  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2311 
2312 #if OMPT_SUPPORT && OMPT_OPTIONAL
2313  // This is the case, if called from omp_init_lock_with_hint:
2314  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2315  if (!codeptr)
2316  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2317  if (ompt_enabled.ompt_callback_lock_init) {
2318  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2319  ompt_mutex_lock, (omp_lock_hint_t)hint,
2320  __ompt_get_mutex_impl_type(user_lock),
2321  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2322  }
2323 #endif
2324 }
2325 
2326 /* initialize the lock with a hint */
2327 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2328  void **user_lock, uintptr_t hint) {
2329  KMP_DEBUG_ASSERT(__kmp_init_serial);
2330  if (__kmp_env_consistency_check && user_lock == NULL) {
2331  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2332  }
2333 
2334  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2335 
2336 #if OMPT_SUPPORT && OMPT_OPTIONAL
2337  // This is the case, if called from omp_init_lock_with_hint:
2338  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2339  if (!codeptr)
2340  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2341  if (ompt_enabled.ompt_callback_lock_init) {
2342  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2343  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2344  __ompt_get_mutex_impl_type(user_lock),
2345  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2346  }
2347 #endif
2348 }
2349 
2350 #endif // KMP_USE_DYNAMIC_LOCK
2351 
2352 /* initialize the lock */
2353 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2354 #if KMP_USE_DYNAMIC_LOCK
2355 
2356  KMP_DEBUG_ASSERT(__kmp_init_serial);
2357  if (__kmp_env_consistency_check && user_lock == NULL) {
2358  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2359  }
2360  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2361 
2362 #if OMPT_SUPPORT && OMPT_OPTIONAL
2363  // This is the case, if called from omp_init_lock_with_hint:
2364  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2365  if (!codeptr)
2366  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2367  if (ompt_enabled.ompt_callback_lock_init) {
2368  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2369  ompt_mutex_lock, omp_lock_hint_none,
2370  __ompt_get_mutex_impl_type(user_lock),
2371  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2372  }
2373 #endif
2374 
2375 #else // KMP_USE_DYNAMIC_LOCK
2376 
2377  static char const *const func = "omp_init_lock";
2378  kmp_user_lock_p lck;
2379  KMP_DEBUG_ASSERT(__kmp_init_serial);
2380 
2381  if (__kmp_env_consistency_check) {
2382  if (user_lock == NULL) {
2383  KMP_FATAL(LockIsUninitialized, func);
2384  }
2385  }
2386 
2387  KMP_CHECK_USER_LOCK_INIT();
2388 
2389  if ((__kmp_user_lock_kind == lk_tas) &&
2390  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2391  lck = (kmp_user_lock_p)user_lock;
2392  }
2393 #if KMP_USE_FUTEX
2394  else if ((__kmp_user_lock_kind == lk_futex) &&
2395  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2396  lck = (kmp_user_lock_p)user_lock;
2397  }
2398 #endif
2399  else {
2400  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2401  }
2402  INIT_LOCK(lck);
2403  __kmp_set_user_lock_location(lck, loc);
2404 
2405 #if OMPT_SUPPORT && OMPT_OPTIONAL
2406  // This is the case, if called from omp_init_lock_with_hint:
2407  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2408  if (!codeptr)
2409  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2410  if (ompt_enabled.ompt_callback_lock_init) {
2411  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2412  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2413  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2414  }
2415 #endif
2416 
2417 #if USE_ITT_BUILD
2418  __kmp_itt_lock_creating(lck);
2419 #endif /* USE_ITT_BUILD */
2420 
2421 #endif // KMP_USE_DYNAMIC_LOCK
2422 } // __kmpc_init_lock
2423 
2424 /* initialize the lock */
2425 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2426 #if KMP_USE_DYNAMIC_LOCK
2427 
2428  KMP_DEBUG_ASSERT(__kmp_init_serial);
2429  if (__kmp_env_consistency_check && user_lock == NULL) {
2430  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2431  }
2432  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2433 
2434 #if OMPT_SUPPORT && OMPT_OPTIONAL
2435  // This is the case, if called from omp_init_lock_with_hint:
2436  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2437  if (!codeptr)
2438  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2439  if (ompt_enabled.ompt_callback_lock_init) {
2440  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2441  ompt_mutex_nest_lock, omp_lock_hint_none,
2442  __ompt_get_mutex_impl_type(user_lock),
2443  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2444  }
2445 #endif
2446 
2447 #else // KMP_USE_DYNAMIC_LOCK
2448 
2449  static char const *const func = "omp_init_nest_lock";
2450  kmp_user_lock_p lck;
2451  KMP_DEBUG_ASSERT(__kmp_init_serial);
2452 
2453  if (__kmp_env_consistency_check) {
2454  if (user_lock == NULL) {
2455  KMP_FATAL(LockIsUninitialized, func);
2456  }
2457  }
2458 
2459  KMP_CHECK_USER_LOCK_INIT();
2460 
2461  if ((__kmp_user_lock_kind == lk_tas) &&
2462  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2463  OMP_NEST_LOCK_T_SIZE)) {
2464  lck = (kmp_user_lock_p)user_lock;
2465  }
2466 #if KMP_USE_FUTEX
2467  else if ((__kmp_user_lock_kind == lk_futex) &&
2468  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2469  OMP_NEST_LOCK_T_SIZE)) {
2470  lck = (kmp_user_lock_p)user_lock;
2471  }
2472 #endif
2473  else {
2474  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2475  }
2476 
2477  INIT_NESTED_LOCK(lck);
2478  __kmp_set_user_lock_location(lck, loc);
2479 
2480 #if OMPT_SUPPORT && OMPT_OPTIONAL
2481  // This is the case, if called from omp_init_lock_with_hint:
2482  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2483  if (!codeptr)
2484  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2485  if (ompt_enabled.ompt_callback_lock_init) {
2486  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2487  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2488  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2489  }
2490 #endif
2491 
2492 #if USE_ITT_BUILD
2493  __kmp_itt_lock_creating(lck);
2494 #endif /* USE_ITT_BUILD */
2495 
2496 #endif // KMP_USE_DYNAMIC_LOCK
2497 } // __kmpc_init_nest_lock
2498 
2499 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2500 #if KMP_USE_DYNAMIC_LOCK
2501 
2502 #if USE_ITT_BUILD
2503  kmp_user_lock_p lck;
2504  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2505  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2506  } else {
2507  lck = (kmp_user_lock_p)user_lock;
2508  }
2509  __kmp_itt_lock_destroyed(lck);
2510 #endif
2511 #if OMPT_SUPPORT && OMPT_OPTIONAL
2512  // This is the case, if called from omp_init_lock_with_hint:
2513  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2514  if (!codeptr)
2515  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2516  if (ompt_enabled.ompt_callback_lock_destroy) {
2517  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2518  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2519  }
2520 #endif
2521  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2522 #else
2523  kmp_user_lock_p lck;
2524 
2525  if ((__kmp_user_lock_kind == lk_tas) &&
2526  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2527  lck = (kmp_user_lock_p)user_lock;
2528  }
2529 #if KMP_USE_FUTEX
2530  else if ((__kmp_user_lock_kind == lk_futex) &&
2531  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2532  lck = (kmp_user_lock_p)user_lock;
2533  }
2534 #endif
2535  else {
2536  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2537  }
2538 
2539 #if OMPT_SUPPORT && OMPT_OPTIONAL
2540  // This is the case, if called from omp_init_lock_with_hint:
2541  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2542  if (!codeptr)
2543  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2544  if (ompt_enabled.ompt_callback_lock_destroy) {
2545  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2546  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2547  }
2548 #endif
2549 
2550 #if USE_ITT_BUILD
2551  __kmp_itt_lock_destroyed(lck);
2552 #endif /* USE_ITT_BUILD */
2553  DESTROY_LOCK(lck);
2554 
2555  if ((__kmp_user_lock_kind == lk_tas) &&
2556  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2557  ;
2558  }
2559 #if KMP_USE_FUTEX
2560  else if ((__kmp_user_lock_kind == lk_futex) &&
2561  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2562  ;
2563  }
2564 #endif
2565  else {
2566  __kmp_user_lock_free(user_lock, gtid, lck);
2567  }
2568 #endif // KMP_USE_DYNAMIC_LOCK
2569 } // __kmpc_destroy_lock
2570 
2571 /* destroy the lock */
2572 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2573 #if KMP_USE_DYNAMIC_LOCK
2574 
2575 #if USE_ITT_BUILD
2576  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2577  __kmp_itt_lock_destroyed(ilk->lock);
2578 #endif
2579 #if OMPT_SUPPORT && OMPT_OPTIONAL
2580  // This is the case, if called from omp_init_lock_with_hint:
2581  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2582  if (!codeptr)
2583  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2584  if (ompt_enabled.ompt_callback_lock_destroy) {
2585  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2586  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2587  }
2588 #endif
2589  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2590 
2591 #else // KMP_USE_DYNAMIC_LOCK
2592 
2593  kmp_user_lock_p lck;
2594 
2595  if ((__kmp_user_lock_kind == lk_tas) &&
2596  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2597  OMP_NEST_LOCK_T_SIZE)) {
2598  lck = (kmp_user_lock_p)user_lock;
2599  }
2600 #if KMP_USE_FUTEX
2601  else if ((__kmp_user_lock_kind == lk_futex) &&
2602  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2603  OMP_NEST_LOCK_T_SIZE)) {
2604  lck = (kmp_user_lock_p)user_lock;
2605  }
2606 #endif
2607  else {
2608  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2609  }
2610 
2611 #if OMPT_SUPPORT && OMPT_OPTIONAL
2612  // This is the case, if called from omp_init_lock_with_hint:
2613  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2614  if (!codeptr)
2615  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2616  if (ompt_enabled.ompt_callback_lock_destroy) {
2617  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2618  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2619  }
2620 #endif
2621 
2622 #if USE_ITT_BUILD
2623  __kmp_itt_lock_destroyed(lck);
2624 #endif /* USE_ITT_BUILD */
2625 
2626  DESTROY_NESTED_LOCK(lck);
2627 
2628  if ((__kmp_user_lock_kind == lk_tas) &&
2629  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2630  OMP_NEST_LOCK_T_SIZE)) {
2631  ;
2632  }
2633 #if KMP_USE_FUTEX
2634  else if ((__kmp_user_lock_kind == lk_futex) &&
2635  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2636  OMP_NEST_LOCK_T_SIZE)) {
2637  ;
2638  }
2639 #endif
2640  else {
2641  __kmp_user_lock_free(user_lock, gtid, lck);
2642  }
2643 #endif // KMP_USE_DYNAMIC_LOCK
2644 } // __kmpc_destroy_nest_lock
2645 
2646 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2647  KMP_COUNT_BLOCK(OMP_set_lock);
2648 #if KMP_USE_DYNAMIC_LOCK
2649  int tag = KMP_EXTRACT_D_TAG(user_lock);
2650 #if USE_ITT_BUILD
2651  __kmp_itt_lock_acquiring(
2652  (kmp_user_lock_p)
2653  user_lock); // itt function will get to the right lock object.
2654 #endif
2655 #if OMPT_SUPPORT && OMPT_OPTIONAL
2656  // This is the case, if called from omp_init_lock_with_hint:
2657  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2658  if (!codeptr)
2659  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2660  if (ompt_enabled.ompt_callback_mutex_acquire) {
2661  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2662  ompt_mutex_lock, omp_lock_hint_none,
2663  __ompt_get_mutex_impl_type(user_lock),
2664  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2665  }
2666 #endif
2667 #if KMP_USE_INLINED_TAS
2668  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2669  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2670  } else
2671 #elif KMP_USE_INLINED_FUTEX
2672  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2673  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2674  } else
2675 #endif
2676  {
2677  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2678  }
2679 #if USE_ITT_BUILD
2680  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2681 #endif
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683  if (ompt_enabled.ompt_callback_mutex_acquired) {
2684  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2685  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2686  }
2687 #endif
2688 
2689 #else // KMP_USE_DYNAMIC_LOCK
2690 
2691  kmp_user_lock_p lck;
2692 
2693  if ((__kmp_user_lock_kind == lk_tas) &&
2694  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2695  lck = (kmp_user_lock_p)user_lock;
2696  }
2697 #if KMP_USE_FUTEX
2698  else if ((__kmp_user_lock_kind == lk_futex) &&
2699  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700  lck = (kmp_user_lock_p)user_lock;
2701  }
2702 #endif
2703  else {
2704  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2705  }
2706 
2707 #if USE_ITT_BUILD
2708  __kmp_itt_lock_acquiring(lck);
2709 #endif /* USE_ITT_BUILD */
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2711  // This is the case, if called from omp_init_lock_with_hint:
2712  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2713  if (!codeptr)
2714  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2715  if (ompt_enabled.ompt_callback_mutex_acquire) {
2716  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2717  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2718  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2719  }
2720 #endif
2721 
2722  ACQUIRE_LOCK(lck, gtid);
2723 
2724 #if USE_ITT_BUILD
2725  __kmp_itt_lock_acquired(lck);
2726 #endif /* USE_ITT_BUILD */
2727 
2728 #if OMPT_SUPPORT && OMPT_OPTIONAL
2729  if (ompt_enabled.ompt_callback_mutex_acquired) {
2730  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2731  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2732  }
2733 #endif
2734 
2735 #endif // KMP_USE_DYNAMIC_LOCK
2736 }
2737 
2738 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2739 #if KMP_USE_DYNAMIC_LOCK
2740 
2741 #if USE_ITT_BUILD
2742  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2743 #endif
2744 #if OMPT_SUPPORT && OMPT_OPTIONAL
2745  // This is the case, if called from omp_init_lock_with_hint:
2746  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2747  if (!codeptr)
2748  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2749  if (ompt_enabled.enabled) {
2750  if (ompt_enabled.ompt_callback_mutex_acquire) {
2751  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2752  ompt_mutex_nest_lock, omp_lock_hint_none,
2753  __ompt_get_mutex_impl_type(user_lock),
2754  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2755  }
2756  }
2757 #endif
2758  int acquire_status =
2759  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2760  (void)acquire_status;
2761 #if USE_ITT_BUILD
2762  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2763 #endif
2764 
2765 #if OMPT_SUPPORT && OMPT_OPTIONAL
2766  if (ompt_enabled.enabled) {
2767  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2768  if (ompt_enabled.ompt_callback_mutex_acquired) {
2769  // lock_first
2770  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2771  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2772  codeptr);
2773  }
2774  } else {
2775  if (ompt_enabled.ompt_callback_nest_lock) {
2776  // lock_next
2777  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2778  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779  }
2780  }
2781  }
2782 #endif
2783 
2784 #else // KMP_USE_DYNAMIC_LOCK
2785  int acquire_status;
2786  kmp_user_lock_p lck;
2787 
2788  if ((__kmp_user_lock_kind == lk_tas) &&
2789  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790  OMP_NEST_LOCK_T_SIZE)) {
2791  lck = (kmp_user_lock_p)user_lock;
2792  }
2793 #if KMP_USE_FUTEX
2794  else if ((__kmp_user_lock_kind == lk_futex) &&
2795  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2796  OMP_NEST_LOCK_T_SIZE)) {
2797  lck = (kmp_user_lock_p)user_lock;
2798  }
2799 #endif
2800  else {
2801  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2802  }
2803 
2804 #if USE_ITT_BUILD
2805  __kmp_itt_lock_acquiring(lck);
2806 #endif /* USE_ITT_BUILD */
2807 #if OMPT_SUPPORT && OMPT_OPTIONAL
2808  // This is the case, if called from omp_init_lock_with_hint:
2809  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2810  if (!codeptr)
2811  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2812  if (ompt_enabled.enabled) {
2813  if (ompt_enabled.ompt_callback_mutex_acquire) {
2814  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2815  ompt_mutex_nest_lock, omp_lock_hint_none,
2816  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2817  codeptr);
2818  }
2819  }
2820 #endif
2821 
2822  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2823 
2824 #if USE_ITT_BUILD
2825  __kmp_itt_lock_acquired(lck);
2826 #endif /* USE_ITT_BUILD */
2827 
2828 #if OMPT_SUPPORT && OMPT_OPTIONAL
2829  if (ompt_enabled.enabled) {
2830  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2831  if (ompt_enabled.ompt_callback_mutex_acquired) {
2832  // lock_first
2833  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2834  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2835  }
2836  } else {
2837  if (ompt_enabled.ompt_callback_nest_lock) {
2838  // lock_next
2839  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2840  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2841  }
2842  }
2843  }
2844 #endif
2845 
2846 #endif // KMP_USE_DYNAMIC_LOCK
2847 }
2848 
2849 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2850 #if KMP_USE_DYNAMIC_LOCK
2851 
2852  int tag = KMP_EXTRACT_D_TAG(user_lock);
2853 #if USE_ITT_BUILD
2854  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2855 #endif
2856 #if KMP_USE_INLINED_TAS
2857  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2858  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2859  } else
2860 #elif KMP_USE_INLINED_FUTEX
2861  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2862  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2863  } else
2864 #endif
2865  {
2866  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2867  }
2868 
2869 #if OMPT_SUPPORT && OMPT_OPTIONAL
2870  // This is the case, if called from omp_init_lock_with_hint:
2871  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2872  if (!codeptr)
2873  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2874  if (ompt_enabled.ompt_callback_mutex_released) {
2875  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2876  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2877  }
2878 #endif
2879 
2880 #else // KMP_USE_DYNAMIC_LOCK
2881 
2882  kmp_user_lock_p lck;
2883 
2884  /* Can't use serial interval since not block structured */
2885  /* release the lock */
2886 
2887  if ((__kmp_user_lock_kind == lk_tas) &&
2888  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2889 #if KMP_OS_LINUX && \
2890  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2891 // "fast" path implemented to fix customer performance issue
2892 #if USE_ITT_BUILD
2893  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2894 #endif /* USE_ITT_BUILD */
2895  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2896  KMP_MB();
2897 
2898 #if OMPT_SUPPORT && OMPT_OPTIONAL
2899  // This is the case, if called from omp_init_lock_with_hint:
2900  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2901  if (!codeptr)
2902  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2903  if (ompt_enabled.ompt_callback_mutex_released) {
2904  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2905  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2906  }
2907 #endif
2908 
2909  return;
2910 #else
2911  lck = (kmp_user_lock_p)user_lock;
2912 #endif
2913  }
2914 #if KMP_USE_FUTEX
2915  else if ((__kmp_user_lock_kind == lk_futex) &&
2916  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2917  lck = (kmp_user_lock_p)user_lock;
2918  }
2919 #endif
2920  else {
2921  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2922  }
2923 
2924 #if USE_ITT_BUILD
2925  __kmp_itt_lock_releasing(lck);
2926 #endif /* USE_ITT_BUILD */
2927 
2928  RELEASE_LOCK(lck, gtid);
2929 
2930 #if OMPT_SUPPORT && OMPT_OPTIONAL
2931  // This is the case, if called from omp_init_lock_with_hint:
2932  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2933  if (!codeptr)
2934  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2935  if (ompt_enabled.ompt_callback_mutex_released) {
2936  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2937  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2938  }
2939 #endif
2940 
2941 #endif // KMP_USE_DYNAMIC_LOCK
2942 }
2943 
2944 /* release the lock */
2945 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2946 #if KMP_USE_DYNAMIC_LOCK
2947 
2948 #if USE_ITT_BUILD
2949  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2950 #endif
2951  int release_status =
2952  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2953  (void)release_status;
2954 
2955 #if OMPT_SUPPORT && OMPT_OPTIONAL
2956  // This is the case, if called from omp_init_lock_with_hint:
2957  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2958  if (!codeptr)
2959  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2960  if (ompt_enabled.enabled) {
2961  if (release_status == KMP_LOCK_RELEASED) {
2962  if (ompt_enabled.ompt_callback_mutex_released) {
2963  // release_lock_last
2964  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2965  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2966  codeptr);
2967  }
2968  } else if (ompt_enabled.ompt_callback_nest_lock) {
2969  // release_lock_prev
2970  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2971  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2972  }
2973  }
2974 #endif
2975 
2976 #else // KMP_USE_DYNAMIC_LOCK
2977 
2978  kmp_user_lock_p lck;
2979 
2980  /* Can't use serial interval since not block structured */
2981 
2982  if ((__kmp_user_lock_kind == lk_tas) &&
2983  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2984  OMP_NEST_LOCK_T_SIZE)) {
2985 #if KMP_OS_LINUX && \
2986  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2987  // "fast" path implemented to fix customer performance issue
2988  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2989 #if USE_ITT_BUILD
2990  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2991 #endif /* USE_ITT_BUILD */
2992 
2993 #if OMPT_SUPPORT && OMPT_OPTIONAL
2994  int release_status = KMP_LOCK_STILL_HELD;
2995 #endif
2996 
2997  if (--(tl->lk.depth_locked) == 0) {
2998  TCW_4(tl->lk.poll, 0);
2999 #if OMPT_SUPPORT && OMPT_OPTIONAL
3000  release_status = KMP_LOCK_RELEASED;
3001 #endif
3002  }
3003  KMP_MB();
3004 
3005 #if OMPT_SUPPORT && OMPT_OPTIONAL
3006  // This is the case, if called from omp_init_lock_with_hint:
3007  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3008  if (!codeptr)
3009  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3010  if (ompt_enabled.enabled) {
3011  if (release_status == KMP_LOCK_RELEASED) {
3012  if (ompt_enabled.ompt_callback_mutex_released) {
3013  // release_lock_last
3014  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3015  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3016  }
3017  } else if (ompt_enabled.ompt_callback_nest_lock) {
3018  // release_lock_previous
3019  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3020  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3021  }
3022  }
3023 #endif
3024 
3025  return;
3026 #else
3027  lck = (kmp_user_lock_p)user_lock;
3028 #endif
3029  }
3030 #if KMP_USE_FUTEX
3031  else if ((__kmp_user_lock_kind == lk_futex) &&
3032  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3033  OMP_NEST_LOCK_T_SIZE)) {
3034  lck = (kmp_user_lock_p)user_lock;
3035  }
3036 #endif
3037  else {
3038  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3039  }
3040 
3041 #if USE_ITT_BUILD
3042  __kmp_itt_lock_releasing(lck);
3043 #endif /* USE_ITT_BUILD */
3044 
3045  int release_status;
3046  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3047 #if OMPT_SUPPORT && OMPT_OPTIONAL
3048  // This is the case, if called from omp_init_lock_with_hint:
3049  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3050  if (!codeptr)
3051  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3052  if (ompt_enabled.enabled) {
3053  if (release_status == KMP_LOCK_RELEASED) {
3054  if (ompt_enabled.ompt_callback_mutex_released) {
3055  // release_lock_last
3056  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3057  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3058  }
3059  } else if (ompt_enabled.ompt_callback_nest_lock) {
3060  // release_lock_previous
3061  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3062  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3063  }
3064  }
3065 #endif
3066 
3067 #endif // KMP_USE_DYNAMIC_LOCK
3068 }
3069 
3070 /* try to acquire the lock */
3071 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3072  KMP_COUNT_BLOCK(OMP_test_lock);
3073 
3074 #if KMP_USE_DYNAMIC_LOCK
3075  int rc;
3076  int tag = KMP_EXTRACT_D_TAG(user_lock);
3077 #if USE_ITT_BUILD
3078  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3079 #endif
3080 #if OMPT_SUPPORT && OMPT_OPTIONAL
3081  // This is the case, if called from omp_init_lock_with_hint:
3082  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3083  if (!codeptr)
3084  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3085  if (ompt_enabled.ompt_callback_mutex_acquire) {
3086  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3087  ompt_mutex_lock, omp_lock_hint_none,
3088  __ompt_get_mutex_impl_type(user_lock),
3089  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3090  }
3091 #endif
3092 #if KMP_USE_INLINED_TAS
3093  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3094  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3095  } else
3096 #elif KMP_USE_INLINED_FUTEX
3097  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3098  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3099  } else
3100 #endif
3101  {
3102  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3103  }
3104  if (rc) {
3105 #if USE_ITT_BUILD
3106  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3107 #endif
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109  if (ompt_enabled.ompt_callback_mutex_acquired) {
3110  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3111  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3112  }
3113 #endif
3114  return FTN_TRUE;
3115  } else {
3116 #if USE_ITT_BUILD
3117  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3118 #endif
3119  return FTN_FALSE;
3120  }
3121 
3122 #else // KMP_USE_DYNAMIC_LOCK
3123 
3124  kmp_user_lock_p lck;
3125  int rc;
3126 
3127  if ((__kmp_user_lock_kind == lk_tas) &&
3128  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3129  lck = (kmp_user_lock_p)user_lock;
3130  }
3131 #if KMP_USE_FUTEX
3132  else if ((__kmp_user_lock_kind == lk_futex) &&
3133  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3134  lck = (kmp_user_lock_p)user_lock;
3135  }
3136 #endif
3137  else {
3138  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3139  }
3140 
3141 #if USE_ITT_BUILD
3142  __kmp_itt_lock_acquiring(lck);
3143 #endif /* USE_ITT_BUILD */
3144 #if OMPT_SUPPORT && OMPT_OPTIONAL
3145  // This is the case, if called from omp_init_lock_with_hint:
3146  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3147  if (!codeptr)
3148  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3149  if (ompt_enabled.ompt_callback_mutex_acquire) {
3150  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3151  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3152  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3153  }
3154 #endif
3155 
3156  rc = TEST_LOCK(lck, gtid);
3157 #if USE_ITT_BUILD
3158  if (rc) {
3159  __kmp_itt_lock_acquired(lck);
3160  } else {
3161  __kmp_itt_lock_cancelled(lck);
3162  }
3163 #endif /* USE_ITT_BUILD */
3164 #if OMPT_SUPPORT && OMPT_OPTIONAL
3165  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3166  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3167  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3168  }
3169 #endif
3170 
3171  return (rc ? FTN_TRUE : FTN_FALSE);
3172 
3173  /* Can't use serial interval since not block structured */
3174 
3175 #endif // KMP_USE_DYNAMIC_LOCK
3176 }
3177 
3178 /* try to acquire the lock */
3179 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3180 #if KMP_USE_DYNAMIC_LOCK
3181  int rc;
3182 #if USE_ITT_BUILD
3183  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3184 #endif
3185 #if OMPT_SUPPORT && OMPT_OPTIONAL
3186  // This is the case, if called from omp_init_lock_with_hint:
3187  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3188  if (!codeptr)
3189  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3190  if (ompt_enabled.ompt_callback_mutex_acquire) {
3191  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3192  ompt_mutex_nest_lock, omp_lock_hint_none,
3193  __ompt_get_mutex_impl_type(user_lock),
3194  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3195  }
3196 #endif
3197  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3198 #if USE_ITT_BUILD
3199  if (rc) {
3200  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3201  } else {
3202  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3203  }
3204 #endif
3205 #if OMPT_SUPPORT && OMPT_OPTIONAL
3206  if (ompt_enabled.enabled && rc) {
3207  if (rc == 1) {
3208  if (ompt_enabled.ompt_callback_mutex_acquired) {
3209  // lock_first
3210  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3211  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3212  codeptr);
3213  }
3214  } else {
3215  if (ompt_enabled.ompt_callback_nest_lock) {
3216  // lock_next
3217  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3218  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3219  }
3220  }
3221  }
3222 #endif
3223  return rc;
3224 
3225 #else // KMP_USE_DYNAMIC_LOCK
3226 
3227  kmp_user_lock_p lck;
3228  int rc;
3229 
3230  if ((__kmp_user_lock_kind == lk_tas) &&
3231  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3232  OMP_NEST_LOCK_T_SIZE)) {
3233  lck = (kmp_user_lock_p)user_lock;
3234  }
3235 #if KMP_USE_FUTEX
3236  else if ((__kmp_user_lock_kind == lk_futex) &&
3237  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3238  OMP_NEST_LOCK_T_SIZE)) {
3239  lck = (kmp_user_lock_p)user_lock;
3240  }
3241 #endif
3242  else {
3243  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3244  }
3245 
3246 #if USE_ITT_BUILD
3247  __kmp_itt_lock_acquiring(lck);
3248 #endif /* USE_ITT_BUILD */
3249 
3250 #if OMPT_SUPPORT && OMPT_OPTIONAL
3251  // This is the case, if called from omp_init_lock_with_hint:
3252  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3253  if (!codeptr)
3254  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3255  if (ompt_enabled.enabled) &&
3256  ompt_enabled.ompt_callback_mutex_acquire) {
3257  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3258  ompt_mutex_nest_lock, omp_lock_hint_none,
3259  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3260  codeptr);
3261  }
3262 #endif
3263 
3264  rc = TEST_NESTED_LOCK(lck, gtid);
3265 #if USE_ITT_BUILD
3266  if (rc) {
3267  __kmp_itt_lock_acquired(lck);
3268  } else {
3269  __kmp_itt_lock_cancelled(lck);
3270  }
3271 #endif /* USE_ITT_BUILD */
3272 #if OMPT_SUPPORT && OMPT_OPTIONAL
3273  if (ompt_enabled.enabled && rc) {
3274  if (rc == 1) {
3275  if (ompt_enabled.ompt_callback_mutex_acquired) {
3276  // lock_first
3277  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3278  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3279  }
3280  } else {
3281  if (ompt_enabled.ompt_callback_nest_lock) {
3282  // lock_next
3283  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3284  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3285  }
3286  }
3287  }
3288 #endif
3289  return rc;
3290 
3291  /* Can't use serial interval since not block structured */
3292 
3293 #endif // KMP_USE_DYNAMIC_LOCK
3294 }
3295 
3296 // Interface to fast scalable reduce methods routines
3297 
3298 // keep the selected method in a thread local structure for cross-function
3299 // usage: will be used in __kmpc_end_reduce* functions;
3300 // another solution: to re-determine the method one more time in
3301 // __kmpc_end_reduce* functions (new prototype required then)
3302 // AT: which solution is better?
3303 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3304  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3305 
3306 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3307  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3308 
3309 // description of the packed_reduction_method variable: look at the macros in
3310 // kmp.h
3311 
3312 // used in a critical section reduce block
3313 static __forceinline void
3314 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3315  kmp_critical_name *crit) {
3316 
3317  // this lock was visible to a customer and to the threading profile tool as a
3318  // serial overhead span (although it's used for an internal purpose only)
3319  // why was it visible in previous implementation?
3320  // should we keep it visible in new reduce block?
3321  kmp_user_lock_p lck;
3322 
3323 #if KMP_USE_DYNAMIC_LOCK
3324 
3325  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3326  // Check if it is initialized.
3327  if (*lk == 0) {
3328  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3329  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3330  KMP_GET_D_TAG(__kmp_user_lock_seq));
3331  } else {
3332  __kmp_init_indirect_csptr(crit, loc, global_tid,
3333  KMP_GET_I_TAG(__kmp_user_lock_seq));
3334  }
3335  }
3336  // Branch for accessing the actual lock object and set operation. This
3337  // branching is inevitable since this lock initialization does not follow the
3338  // normal dispatch path (lock table is not used).
3339  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3340  lck = (kmp_user_lock_p)lk;
3341  KMP_DEBUG_ASSERT(lck != NULL);
3342  if (__kmp_env_consistency_check) {
3343  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3344  }
3345  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3346  } else {
3347  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3348  lck = ilk->lock;
3349  KMP_DEBUG_ASSERT(lck != NULL);
3350  if (__kmp_env_consistency_check) {
3351  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3352  }
3353  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3354  }
3355 
3356 #else // KMP_USE_DYNAMIC_LOCK
3357 
3358  // We know that the fast reduction code is only emitted by Intel compilers
3359  // with 32 byte critical sections. If there isn't enough space, then we
3360  // have to use a pointer.
3361  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3362  lck = (kmp_user_lock_p)crit;
3363  } else {
3364  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3365  }
3366  KMP_DEBUG_ASSERT(lck != NULL);
3367 
3368  if (__kmp_env_consistency_check)
3369  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3370 
3371  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3372 
3373 #endif // KMP_USE_DYNAMIC_LOCK
3374 }
3375 
3376 // used in a critical section reduce block
3377 static __forceinline void
3378 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3379  kmp_critical_name *crit) {
3380 
3381  kmp_user_lock_p lck;
3382 
3383 #if KMP_USE_DYNAMIC_LOCK
3384 
3385  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3386  lck = (kmp_user_lock_p)crit;
3387  if (__kmp_env_consistency_check)
3388  __kmp_pop_sync(global_tid, ct_critical, loc);
3389  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3390  } else {
3391  kmp_indirect_lock_t *ilk =
3392  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3393  if (__kmp_env_consistency_check)
3394  __kmp_pop_sync(global_tid, ct_critical, loc);
3395  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3396  }
3397 
3398 #else // KMP_USE_DYNAMIC_LOCK
3399 
3400  // We know that the fast reduction code is only emitted by Intel compilers
3401  // with 32 byte critical sections. If there isn't enough space, then we have
3402  // to use a pointer.
3403  if (__kmp_base_user_lock_size > 32) {
3404  lck = *((kmp_user_lock_p *)crit);
3405  KMP_ASSERT(lck != NULL);
3406  } else {
3407  lck = (kmp_user_lock_p)crit;
3408  }
3409 
3410  if (__kmp_env_consistency_check)
3411  __kmp_pop_sync(global_tid, ct_critical, loc);
3412 
3413  __kmp_release_user_lock_with_checks(lck, global_tid);
3414 
3415 #endif // KMP_USE_DYNAMIC_LOCK
3416 } // __kmp_end_critical_section_reduce_block
3417 
3418 static __forceinline int
3419 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3420  int *task_state) {
3421  kmp_team_t *team;
3422 
3423  // Check if we are inside the teams construct?
3424  if (th->th.th_teams_microtask) {
3425  *team_p = team = th->th.th_team;
3426  if (team->t.t_level == th->th.th_teams_level) {
3427  // This is reduction at teams construct.
3428  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3429  // Let's swap teams temporarily for the reduction.
3430  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3431  th->th.th_team = team->t.t_parent;
3432  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3433  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3434  *task_state = th->th.th_task_state;
3435  th->th.th_task_state = 0;
3436 
3437  return 1;
3438  }
3439  }
3440  return 0;
3441 }
3442 
3443 static __forceinline void
3444 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3445  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3446  th->th.th_info.ds.ds_tid = 0;
3447  th->th.th_team = team;
3448  th->th.th_team_nproc = team->t.t_nproc;
3449  th->th.th_task_team = team->t.t_task_team[task_state];
3450  __kmp_type_convert(task_state, &(th->th.th_task_state));
3451 }
3452 
3453 /* 2.a.i. Reduce Block without a terminating barrier */
3469 kmp_int32
3470 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3471  size_t reduce_size, void *reduce_data,
3472  void (*reduce_func)(void *lhs_data, void *rhs_data),
3473  kmp_critical_name *lck) {
3474 
3475  KMP_COUNT_BLOCK(REDUCE_nowait);
3476  int retval = 0;
3477  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3478  kmp_info_t *th;
3479  kmp_team_t *team;
3480  int teams_swapped = 0, task_state;
3481  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3482  __kmp_assert_valid_gtid(global_tid);
3483 
3484  // why do we need this initialization here at all?
3485  // Reduction clause can not be used as a stand-alone directive.
3486 
3487  // do not call __kmp_serial_initialize(), it will be called by
3488  // __kmp_parallel_initialize() if needed
3489  // possible detection of false-positive race by the threadchecker ???
3490  if (!TCR_4(__kmp_init_parallel))
3491  __kmp_parallel_initialize();
3492 
3493  __kmp_resume_if_soft_paused();
3494 
3495 // check correctness of reduce block nesting
3496 #if KMP_USE_DYNAMIC_LOCK
3497  if (__kmp_env_consistency_check)
3498  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3499 #else
3500  if (__kmp_env_consistency_check)
3501  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3502 #endif
3503 
3504  th = __kmp_thread_from_gtid(global_tid);
3505  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3506 
3507  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3508  // the value should be kept in a variable
3509  // the variable should be either a construct-specific or thread-specific
3510  // property, not a team specific property
3511  // (a thread can reach the next reduce block on the next construct, reduce
3512  // method may differ on the next construct)
3513  // an ident_t "loc" parameter could be used as a construct-specific property
3514  // (what if loc == 0?)
3515  // (if both construct-specific and team-specific variables were shared,
3516  // then unness extra syncs should be needed)
3517  // a thread-specific variable is better regarding two issues above (next
3518  // construct and extra syncs)
3519  // a thread-specific "th_local.reduction_method" variable is used currently
3520  // each thread executes 'determine' and 'set' lines (no need to execute by one
3521  // thread, to avoid unness extra syncs)
3522 
3523  packed_reduction_method = __kmp_determine_reduction_method(
3524  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3525  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3526 
3527  OMPT_REDUCTION_DECL(th, global_tid);
3528  if (packed_reduction_method == critical_reduce_block) {
3529 
3530  OMPT_REDUCTION_BEGIN;
3531 
3532  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3533  retval = 1;
3534 
3535  } else if (packed_reduction_method == empty_reduce_block) {
3536 
3537  OMPT_REDUCTION_BEGIN;
3538 
3539  // usage: if team size == 1, no synchronization is required ( Intel
3540  // platforms only )
3541  retval = 1;
3542 
3543  } else if (packed_reduction_method == atomic_reduce_block) {
3544 
3545  retval = 2;
3546 
3547  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3548  // won't be called by the code gen)
3549  // (it's not quite good, because the checking block has been closed by
3550  // this 'pop',
3551  // but atomic operation has not been executed yet, will be executed
3552  // slightly later, literally on next instruction)
3553  if (__kmp_env_consistency_check)
3554  __kmp_pop_sync(global_tid, ct_reduce, loc);
3555 
3556  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3557  tree_reduce_block)) {
3558 
3559 // AT: performance issue: a real barrier here
3560 // AT: (if primary thread is slow, other threads are blocked here waiting for
3561 // the primary thread to come and release them)
3562 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3563 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3564 // be confusing to a customer)
3565 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3566 // might go faster and be more in line with sense of NOWAIT
3567 // AT: TO DO: do epcc test and compare times
3568 
3569 // this barrier should be invisible to a customer and to the threading profile
3570 // tool (it's neither a terminating barrier nor customer's code, it's
3571 // used for an internal purpose)
3572 #if OMPT_SUPPORT
3573  // JP: can this barrier potentially leed to task scheduling?
3574  // JP: as long as there is a barrier in the implementation, OMPT should and
3575  // will provide the barrier events
3576  // so we set-up the necessary frame/return addresses.
3577  ompt_frame_t *ompt_frame;
3578  if (ompt_enabled.enabled) {
3579  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3580  if (ompt_frame->enter_frame.ptr == NULL)
3581  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3582  }
3583  OMPT_STORE_RETURN_ADDRESS(global_tid);
3584 #endif
3585 #if USE_ITT_NOTIFY
3586  __kmp_threads[global_tid]->th.th_ident = loc;
3587 #endif
3588  retval =
3589  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3590  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3591  retval = (retval != 0) ? (0) : (1);
3592 #if OMPT_SUPPORT && OMPT_OPTIONAL
3593  if (ompt_enabled.enabled) {
3594  ompt_frame->enter_frame = ompt_data_none;
3595  }
3596 #endif
3597 
3598  // all other workers except primary thread should do this pop here
3599  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3600  if (__kmp_env_consistency_check) {
3601  if (retval == 0) {
3602  __kmp_pop_sync(global_tid, ct_reduce, loc);
3603  }
3604  }
3605 
3606  } else {
3607 
3608  // should never reach this block
3609  KMP_ASSERT(0); // "unexpected method"
3610  }
3611  if (teams_swapped) {
3612  __kmp_restore_swapped_teams(th, team, task_state);
3613  }
3614  KA_TRACE(
3615  10,
3616  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3617  global_tid, packed_reduction_method, retval));
3618 
3619  return retval;
3620 }
3621 
3630 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3631  kmp_critical_name *lck) {
3632 
3633  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3634 
3635  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3636  __kmp_assert_valid_gtid(global_tid);
3637 
3638  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3639 
3640  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3641 
3642  if (packed_reduction_method == critical_reduce_block) {
3643 
3644  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3645  OMPT_REDUCTION_END;
3646 
3647  } else if (packed_reduction_method == empty_reduce_block) {
3648 
3649  // usage: if team size == 1, no synchronization is required ( on Intel
3650  // platforms only )
3651 
3652  OMPT_REDUCTION_END;
3653 
3654  } else if (packed_reduction_method == atomic_reduce_block) {
3655 
3656  // neither primary thread nor other workers should get here
3657  // (code gen does not generate this call in case 2: atomic reduce block)
3658  // actually it's better to remove this elseif at all;
3659  // after removal this value will checked by the 'else' and will assert
3660 
3661  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3662  tree_reduce_block)) {
3663 
3664  // only primary thread gets here
3665  // OMPT: tree reduction is annotated in the barrier code
3666 
3667  } else {
3668 
3669  // should never reach this block
3670  KMP_ASSERT(0); // "unexpected method"
3671  }
3672 
3673  if (__kmp_env_consistency_check)
3674  __kmp_pop_sync(global_tid, ct_reduce, loc);
3675 
3676  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3677  global_tid, packed_reduction_method));
3678 
3679  return;
3680 }
3681 
3682 /* 2.a.ii. Reduce Block with a terminating barrier */
3683 
3699 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3700  size_t reduce_size, void *reduce_data,
3701  void (*reduce_func)(void *lhs_data, void *rhs_data),
3702  kmp_critical_name *lck) {
3703  KMP_COUNT_BLOCK(REDUCE_wait);
3704  int retval = 0;
3705  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3706  kmp_info_t *th;
3707  kmp_team_t *team;
3708  int teams_swapped = 0, task_state;
3709 
3710  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3711  __kmp_assert_valid_gtid(global_tid);
3712 
3713  // why do we need this initialization here at all?
3714  // Reduction clause can not be a stand-alone directive.
3715 
3716  // do not call __kmp_serial_initialize(), it will be called by
3717  // __kmp_parallel_initialize() if needed
3718  // possible detection of false-positive race by the threadchecker ???
3719  if (!TCR_4(__kmp_init_parallel))
3720  __kmp_parallel_initialize();
3721 
3722  __kmp_resume_if_soft_paused();
3723 
3724 // check correctness of reduce block nesting
3725 #if KMP_USE_DYNAMIC_LOCK
3726  if (__kmp_env_consistency_check)
3727  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3728 #else
3729  if (__kmp_env_consistency_check)
3730  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3731 #endif
3732 
3733  th = __kmp_thread_from_gtid(global_tid);
3734  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3735 
3736  packed_reduction_method = __kmp_determine_reduction_method(
3737  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3738  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3739 
3740  OMPT_REDUCTION_DECL(th, global_tid);
3741 
3742  if (packed_reduction_method == critical_reduce_block) {
3743 
3744  OMPT_REDUCTION_BEGIN;
3745  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3746  retval = 1;
3747 
3748  } else if (packed_reduction_method == empty_reduce_block) {
3749 
3750  OMPT_REDUCTION_BEGIN;
3751  // usage: if team size == 1, no synchronization is required ( Intel
3752  // platforms only )
3753  retval = 1;
3754 
3755  } else if (packed_reduction_method == atomic_reduce_block) {
3756 
3757  retval = 2;
3758 
3759  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3760  tree_reduce_block)) {
3761 
3762 // case tree_reduce_block:
3763 // this barrier should be visible to a customer and to the threading profile
3764 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3765 #if OMPT_SUPPORT
3766  ompt_frame_t *ompt_frame;
3767  if (ompt_enabled.enabled) {
3768  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3769  if (ompt_frame->enter_frame.ptr == NULL)
3770  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3771  }
3772  OMPT_STORE_RETURN_ADDRESS(global_tid);
3773 #endif
3774 #if USE_ITT_NOTIFY
3775  __kmp_threads[global_tid]->th.th_ident =
3776  loc; // needed for correct notification of frames
3777 #endif
3778  retval =
3779  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3780  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3781  retval = (retval != 0) ? (0) : (1);
3782 #if OMPT_SUPPORT && OMPT_OPTIONAL
3783  if (ompt_enabled.enabled) {
3784  ompt_frame->enter_frame = ompt_data_none;
3785  }
3786 #endif
3787 
3788  // all other workers except primary thread should do this pop here
3789  // (none of other workers except primary will enter __kmpc_end_reduce())
3790  if (__kmp_env_consistency_check) {
3791  if (retval == 0) { // 0: all other workers; 1: primary thread
3792  __kmp_pop_sync(global_tid, ct_reduce, loc);
3793  }
3794  }
3795 
3796  } else {
3797 
3798  // should never reach this block
3799  KMP_ASSERT(0); // "unexpected method"
3800  }
3801  if (teams_swapped) {
3802  __kmp_restore_swapped_teams(th, team, task_state);
3803  }
3804 
3805  KA_TRACE(10,
3806  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3807  global_tid, packed_reduction_method, retval));
3808  return retval;
3809 }
3810 
3821 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3822  kmp_critical_name *lck) {
3823 
3824  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3825  kmp_info_t *th;
3826  kmp_team_t *team;
3827  int teams_swapped = 0, task_state;
3828 
3829  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3830  __kmp_assert_valid_gtid(global_tid);
3831 
3832  th = __kmp_thread_from_gtid(global_tid);
3833  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3834 
3835  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3836 
3837  // this barrier should be visible to a customer and to the threading profile
3838  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3839  OMPT_REDUCTION_DECL(th, global_tid);
3840 
3841  if (packed_reduction_method == critical_reduce_block) {
3842  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3843 
3844  OMPT_REDUCTION_END;
3845 
3846 // TODO: implicit barrier: should be exposed
3847 #if OMPT_SUPPORT
3848  ompt_frame_t *ompt_frame;
3849  if (ompt_enabled.enabled) {
3850  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3851  if (ompt_frame->enter_frame.ptr == NULL)
3852  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3853  }
3854  OMPT_STORE_RETURN_ADDRESS(global_tid);
3855 #endif
3856 #if USE_ITT_NOTIFY
3857  __kmp_threads[global_tid]->th.th_ident = loc;
3858 #endif
3859  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3860 #if OMPT_SUPPORT && OMPT_OPTIONAL
3861  if (ompt_enabled.enabled) {
3862  ompt_frame->enter_frame = ompt_data_none;
3863  }
3864 #endif
3865 
3866  } else if (packed_reduction_method == empty_reduce_block) {
3867 
3868  OMPT_REDUCTION_END;
3869 
3870 // usage: if team size==1, no synchronization is required (Intel platforms only)
3871 
3872 // TODO: implicit barrier: should be exposed
3873 #if OMPT_SUPPORT
3874  ompt_frame_t *ompt_frame;
3875  if (ompt_enabled.enabled) {
3876  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3877  if (ompt_frame->enter_frame.ptr == NULL)
3878  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3879  }
3880  OMPT_STORE_RETURN_ADDRESS(global_tid);
3881 #endif
3882 #if USE_ITT_NOTIFY
3883  __kmp_threads[global_tid]->th.th_ident = loc;
3884 #endif
3885  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3886 #if OMPT_SUPPORT && OMPT_OPTIONAL
3887  if (ompt_enabled.enabled) {
3888  ompt_frame->enter_frame = ompt_data_none;
3889  }
3890 #endif
3891 
3892  } else if (packed_reduction_method == atomic_reduce_block) {
3893 
3894 #if OMPT_SUPPORT
3895  ompt_frame_t *ompt_frame;
3896  if (ompt_enabled.enabled) {
3897  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3898  if (ompt_frame->enter_frame.ptr == NULL)
3899  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3900  }
3901  OMPT_STORE_RETURN_ADDRESS(global_tid);
3902 #endif
3903 // TODO: implicit barrier: should be exposed
3904 #if USE_ITT_NOTIFY
3905  __kmp_threads[global_tid]->th.th_ident = loc;
3906 #endif
3907  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3908 #if OMPT_SUPPORT && OMPT_OPTIONAL
3909  if (ompt_enabled.enabled) {
3910  ompt_frame->enter_frame = ompt_data_none;
3911  }
3912 #endif
3913 
3914  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3915  tree_reduce_block)) {
3916 
3917  // only primary thread executes here (primary releases all other workers)
3918  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3919  global_tid);
3920 
3921  } else {
3922 
3923  // should never reach this block
3924  KMP_ASSERT(0); // "unexpected method"
3925  }
3926  if (teams_swapped) {
3927  __kmp_restore_swapped_teams(th, team, task_state);
3928  }
3929 
3930  if (__kmp_env_consistency_check)
3931  __kmp_pop_sync(global_tid, ct_reduce, loc);
3932 
3933  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3934  global_tid, packed_reduction_method));
3935 
3936  return;
3937 }
3938 
3939 #undef __KMP_GET_REDUCTION_METHOD
3940 #undef __KMP_SET_REDUCTION_METHOD
3941 
3942 /* end of interface to fast scalable reduce routines */
3943 
3944 kmp_uint64 __kmpc_get_taskid() {
3945 
3946  kmp_int32 gtid;
3947  kmp_info_t *thread;
3948 
3949  gtid = __kmp_get_gtid();
3950  if (gtid < 0) {
3951  return 0;
3952  }
3953  thread = __kmp_thread_from_gtid(gtid);
3954  return thread->th.th_current_task->td_task_id;
3955 
3956 } // __kmpc_get_taskid
3957 
3958 kmp_uint64 __kmpc_get_parent_taskid() {
3959 
3960  kmp_int32 gtid;
3961  kmp_info_t *thread;
3962  kmp_taskdata_t *parent_task;
3963 
3964  gtid = __kmp_get_gtid();
3965  if (gtid < 0) {
3966  return 0;
3967  }
3968  thread = __kmp_thread_from_gtid(gtid);
3969  parent_task = thread->th.th_current_task->td_parent;
3970  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3971 
3972 } // __kmpc_get_parent_taskid
3973 
3985 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3986  const struct kmp_dim *dims) {
3987  __kmp_assert_valid_gtid(gtid);
3988  int j, idx;
3989  kmp_int64 last, trace_count;
3990  kmp_info_t *th = __kmp_threads[gtid];
3991  kmp_team_t *team = th->th.th_team;
3992  kmp_uint32 *flags;
3993  kmp_disp_t *pr_buf = th->th.th_dispatch;
3994  dispatch_shared_info_t *sh_buf;
3995 
3996  KA_TRACE(
3997  20,
3998  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3999  gtid, num_dims, !team->t.t_serialized));
4000  KMP_DEBUG_ASSERT(dims != NULL);
4001  KMP_DEBUG_ASSERT(num_dims > 0);
4002 
4003  if (team->t.t_serialized) {
4004  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4005  return; // no dependencies if team is serialized
4006  }
4007  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4008  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4009  // the next loop
4010  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4011 
4012  // Save bounds info into allocated private buffer
4013  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4014  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4015  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4016  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4017  pr_buf->th_doacross_info[0] =
4018  (kmp_int64)num_dims; // first element is number of dimensions
4019  // Save also address of num_done in order to access it later without knowing
4020  // the buffer index
4021  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4022  pr_buf->th_doacross_info[2] = dims[0].lo;
4023  pr_buf->th_doacross_info[3] = dims[0].up;
4024  pr_buf->th_doacross_info[4] = dims[0].st;
4025  last = 5;
4026  for (j = 1; j < num_dims; ++j) {
4027  kmp_int64
4028  range_length; // To keep ranges of all dimensions but the first dims[0]
4029  if (dims[j].st == 1) { // most common case
4030  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4031  range_length = dims[j].up - dims[j].lo + 1;
4032  } else {
4033  if (dims[j].st > 0) {
4034  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4035  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4036  } else { // negative increment
4037  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4038  range_length =
4039  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4040  }
4041  }
4042  pr_buf->th_doacross_info[last++] = range_length;
4043  pr_buf->th_doacross_info[last++] = dims[j].lo;
4044  pr_buf->th_doacross_info[last++] = dims[j].up;
4045  pr_buf->th_doacross_info[last++] = dims[j].st;
4046  }
4047 
4048  // Compute total trip count.
4049  // Start with range of dims[0] which we don't need to keep in the buffer.
4050  if (dims[0].st == 1) { // most common case
4051  trace_count = dims[0].up - dims[0].lo + 1;
4052  } else if (dims[0].st > 0) {
4053  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4054  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4055  } else { // negative increment
4056  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4057  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4058  }
4059  for (j = 1; j < num_dims; ++j) {
4060  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4061  }
4062  KMP_DEBUG_ASSERT(trace_count > 0);
4063 
4064  // Check if shared buffer is not occupied by other loop (idx -
4065  // __kmp_dispatch_num_buffers)
4066  if (idx != sh_buf->doacross_buf_idx) {
4067  // Shared buffer is occupied, wait for it to be free
4068  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4069  __kmp_eq_4, NULL);
4070  }
4071 #if KMP_32_BIT_ARCH
4072  // Check if we are the first thread. After the CAS the first thread gets 0,
4073  // others get 1 if initialization is in progress, allocated pointer otherwise.
4074  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4075  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4076  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4077 #else
4078  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4079  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4080 #endif
4081  if (flags == NULL) {
4082  // we are the first thread, allocate the array of flags
4083  size_t size =
4084  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4085  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4086  KMP_MB();
4087  sh_buf->doacross_flags = flags;
4088  } else if (flags == (kmp_uint32 *)1) {
4089 #if KMP_32_BIT_ARCH
4090  // initialization is still in progress, need to wait
4091  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4092 #else
4093  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4094 #endif
4095  KMP_YIELD(TRUE);
4096  KMP_MB();
4097  } else {
4098  KMP_MB();
4099  }
4100  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4101  pr_buf->th_doacross_flags =
4102  sh_buf->doacross_flags; // save private copy in order to not
4103  // touch shared buffer on each iteration
4104  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4105 }
4106 
4107 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4108  __kmp_assert_valid_gtid(gtid);
4109  kmp_int64 shft;
4110  size_t num_dims, i;
4111  kmp_uint32 flag;
4112  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4113  kmp_info_t *th = __kmp_threads[gtid];
4114  kmp_team_t *team = th->th.th_team;
4115  kmp_disp_t *pr_buf;
4116  kmp_int64 lo, up, st;
4117 
4118  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4119  if (team->t.t_serialized) {
4120  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4121  return; // no dependencies if team is serialized
4122  }
4123 
4124  // calculate sequential iteration number and check out-of-bounds condition
4125  pr_buf = th->th.th_dispatch;
4126  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4127  num_dims = (size_t)pr_buf->th_doacross_info[0];
4128  lo = pr_buf->th_doacross_info[2];
4129  up = pr_buf->th_doacross_info[3];
4130  st = pr_buf->th_doacross_info[4];
4131 #if OMPT_SUPPORT && OMPT_OPTIONAL
4132  ompt_dependence_t deps[num_dims];
4133 #endif
4134  if (st == 1) { // most common case
4135  if (vec[0] < lo || vec[0] > up) {
4136  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4137  "bounds [%lld,%lld]\n",
4138  gtid, vec[0], lo, up));
4139  return;
4140  }
4141  iter_number = vec[0] - lo;
4142  } else if (st > 0) {
4143  if (vec[0] < lo || vec[0] > up) {
4144  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4145  "bounds [%lld,%lld]\n",
4146  gtid, vec[0], lo, up));
4147  return;
4148  }
4149  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4150  } else { // negative increment
4151  if (vec[0] > lo || vec[0] < up) {
4152  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4153  "bounds [%lld,%lld]\n",
4154  gtid, vec[0], lo, up));
4155  return;
4156  }
4157  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4158  }
4159 #if OMPT_SUPPORT && OMPT_OPTIONAL
4160  deps[0].variable.value = iter_number;
4161  deps[0].dependence_type = ompt_dependence_type_sink;
4162 #endif
4163  for (i = 1; i < num_dims; ++i) {
4164  kmp_int64 iter, ln;
4165  size_t j = i * 4;
4166  ln = pr_buf->th_doacross_info[j + 1];
4167  lo = pr_buf->th_doacross_info[j + 2];
4168  up = pr_buf->th_doacross_info[j + 3];
4169  st = pr_buf->th_doacross_info[j + 4];
4170  if (st == 1) {
4171  if (vec[i] < lo || vec[i] > up) {
4172  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4173  "bounds [%lld,%lld]\n",
4174  gtid, vec[i], lo, up));
4175  return;
4176  }
4177  iter = vec[i] - lo;
4178  } else if (st > 0) {
4179  if (vec[i] < lo || vec[i] > up) {
4180  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4181  "bounds [%lld,%lld]\n",
4182  gtid, vec[i], lo, up));
4183  return;
4184  }
4185  iter = (kmp_uint64)(vec[i] - lo) / st;
4186  } else { // st < 0
4187  if (vec[i] > lo || vec[i] < up) {
4188  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4189  "bounds [%lld,%lld]\n",
4190  gtid, vec[i], lo, up));
4191  return;
4192  }
4193  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4194  }
4195  iter_number = iter + ln * iter_number;
4196 #if OMPT_SUPPORT && OMPT_OPTIONAL
4197  deps[i].variable.value = iter;
4198  deps[i].dependence_type = ompt_dependence_type_sink;
4199 #endif
4200  }
4201  shft = iter_number % 32; // use 32-bit granularity
4202  iter_number >>= 5; // divided by 32
4203  flag = 1 << shft;
4204  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4205  KMP_YIELD(TRUE);
4206  }
4207  KMP_MB();
4208 #if OMPT_SUPPORT && OMPT_OPTIONAL
4209  if (ompt_enabled.ompt_callback_dependences) {
4210  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4211  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4212  }
4213 #endif
4214  KA_TRACE(20,
4215  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4216  gtid, (iter_number << 5) + shft));
4217 }
4218 
4219 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4220  __kmp_assert_valid_gtid(gtid);
4221  kmp_int64 shft;
4222  size_t num_dims, i;
4223  kmp_uint32 flag;
4224  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4225  kmp_info_t *th = __kmp_threads[gtid];
4226  kmp_team_t *team = th->th.th_team;
4227  kmp_disp_t *pr_buf;
4228  kmp_int64 lo, st;
4229 
4230  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4231  if (team->t.t_serialized) {
4232  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4233  return; // no dependencies if team is serialized
4234  }
4235 
4236  // calculate sequential iteration number (same as in "wait" but no
4237  // out-of-bounds checks)
4238  pr_buf = th->th.th_dispatch;
4239  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4240  num_dims = (size_t)pr_buf->th_doacross_info[0];
4241  lo = pr_buf->th_doacross_info[2];
4242  st = pr_buf->th_doacross_info[4];
4243 #if OMPT_SUPPORT && OMPT_OPTIONAL
4244  ompt_dependence_t deps[num_dims];
4245 #endif
4246  if (st == 1) { // most common case
4247  iter_number = vec[0] - lo;
4248  } else if (st > 0) {
4249  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4250  } else { // negative increment
4251  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4252  }
4253 #if OMPT_SUPPORT && OMPT_OPTIONAL
4254  deps[0].variable.value = iter_number;
4255  deps[0].dependence_type = ompt_dependence_type_source;
4256 #endif
4257  for (i = 1; i < num_dims; ++i) {
4258  kmp_int64 iter, ln;
4259  size_t j = i * 4;
4260  ln = pr_buf->th_doacross_info[j + 1];
4261  lo = pr_buf->th_doacross_info[j + 2];
4262  st = pr_buf->th_doacross_info[j + 4];
4263  if (st == 1) {
4264  iter = vec[i] - lo;
4265  } else if (st > 0) {
4266  iter = (kmp_uint64)(vec[i] - lo) / st;
4267  } else { // st < 0
4268  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4269  }
4270  iter_number = iter + ln * iter_number;
4271 #if OMPT_SUPPORT && OMPT_OPTIONAL
4272  deps[i].variable.value = iter;
4273  deps[i].dependence_type = ompt_dependence_type_source;
4274 #endif
4275  }
4276 #if OMPT_SUPPORT && OMPT_OPTIONAL
4277  if (ompt_enabled.ompt_callback_dependences) {
4278  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4279  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4280  }
4281 #endif
4282  shft = iter_number % 32; // use 32-bit granularity
4283  iter_number >>= 5; // divided by 32
4284  flag = 1 << shft;
4285  KMP_MB();
4286  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4287  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4288  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4289  (iter_number << 5) + shft));
4290 }
4291 
4292 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4293  __kmp_assert_valid_gtid(gtid);
4294  kmp_int32 num_done;
4295  kmp_info_t *th = __kmp_threads[gtid];
4296  kmp_team_t *team = th->th.th_team;
4297  kmp_disp_t *pr_buf = th->th.th_dispatch;
4298 
4299  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4300  if (team->t.t_serialized) {
4301  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4302  return; // nothing to do
4303  }
4304  num_done =
4305  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4306  if (num_done == th->th.th_team_nproc) {
4307  // we are the last thread, need to free shared resources
4308  int idx = pr_buf->th_doacross_buf_idx - 1;
4309  dispatch_shared_info_t *sh_buf =
4310  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4311  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4312  (kmp_int64)&sh_buf->doacross_num_done);
4313  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4314  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4315  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4316  sh_buf->doacross_flags = NULL;
4317  sh_buf->doacross_num_done = 0;
4318  sh_buf->doacross_buf_idx +=
4319  __kmp_dispatch_num_buffers; // free buffer for future re-use
4320  }
4321  // free private resources (need to keep buffer index forever)
4322  pr_buf->th_doacross_flags = NULL;
4323  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4324  pr_buf->th_doacross_info = NULL;
4325  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4326 }
4327 
4328 /* OpenMP 5.1 Memory Management routines */
4329 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4330  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4331 }
4332 
4333 void *omp_aligned_alloc(size_t align, size_t size,
4334  omp_allocator_handle_t allocator) {
4335  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4336 }
4337 
4338 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4339  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4340 }
4341 
4342 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4343  omp_allocator_handle_t allocator) {
4344  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4345 }
4346 
4347 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4348  omp_allocator_handle_t free_allocator) {
4349  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4350  free_allocator);
4351 }
4352 
4353 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4354  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4355 }
4356 /* end of OpenMP 5.1 Memory Management routines */
4357 
4358 int __kmpc_get_target_offload(void) {
4359  if (!__kmp_init_serial) {
4360  __kmp_serial_initialize();
4361  }
4362  return __kmp_target_offload;
4363 }
4364 
4365 int __kmpc_pause_resource(kmp_pause_status_t level) {
4366  if (!__kmp_init_serial) {
4367  return 1; // Can't pause if runtime is not initialized
4368  }
4369  return __kmp_pause_resource(level);
4370 }
4371 
4372 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4373  if (!__kmp_init_serial)
4374  __kmp_serial_initialize();
4375 
4376  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4377 
4378 #if OMPT_SUPPORT
4379  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4380  ompt_callbacks.ompt_callback(ompt_callback_error)(
4381  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4382  OMPT_GET_RETURN_ADDRESS(0));
4383  }
4384 #endif // OMPT_SUPPORT
4385 
4386  char *src_loc;
4387  if (loc && loc->psource) {
4388  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4389  src_loc =
4390  __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4391  __kmp_str_loc_free(&str_loc);
4392  } else {
4393  src_loc = __kmp_str_format("unknown");
4394  }
4395 
4396  if (severity == severity_warning)
4397  KMP_WARNING(UserDirectedWarning, src_loc, message);
4398  else
4399  KMP_FATAL(UserDirectedError, src_loc, message);
4400 
4401  __kmp_str_free(&src_loc);
4402 }
4403 
4404 // Mark begin of scope directive.
4405 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4406 // reserved is for extension of scope directive and not used.
4407 #if OMPT_SUPPORT && OMPT_OPTIONAL
4408  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4409  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4410  int tid = __kmp_tid_from_gtid(gtid);
4411  ompt_callbacks.ompt_callback(ompt_callback_work)(
4412  ompt_work_scope, ompt_scope_begin,
4413  &(team->t.ompt_team_info.parallel_data),
4414  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4415  OMPT_GET_RETURN_ADDRESS(0));
4416  }
4417 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4418 }
4419 
4420 // Mark end of scope directive
4421 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4422 // reserved is for extension of scope directive and not used.
4423 #if OMPT_SUPPORT && OMPT_OPTIONAL
4424  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4425  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4426  int tid = __kmp_tid_from_gtid(gtid);
4427  ompt_callbacks.ompt_callback(ompt_callback_work)(
4428  ompt_work_scope, ompt_scope_end,
4429  &(team->t.ompt_team_info.parallel_data),
4430  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4431  OMPT_GET_RETURN_ADDRESS(0));
4432  }
4433 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4434 }
4435 
4436 #ifdef KMP_USE_VERSION_SYMBOLS
4437 // For GOMP compatibility there are two versions of each omp_* API.
4438 // One is the plain C symbol and one is the Fortran symbol with an appended
4439 // underscore. When we implement a specific ompc_* version of an omp_*
4440 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4441 // instead of the Fortran versions in kmp_ftn_entry.h
4442 extern "C" {
4443 // Have to undef these from omp.h so they aren't translated into
4444 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4445 #ifdef omp_set_affinity_format
4446 #undef omp_set_affinity_format
4447 #endif
4448 #ifdef omp_get_affinity_format
4449 #undef omp_get_affinity_format
4450 #endif
4451 #ifdef omp_display_affinity
4452 #undef omp_display_affinity
4453 #endif
4454 #ifdef omp_capture_affinity
4455 #undef omp_capture_affinity
4456 #endif
4457 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4458  "OMP_5.0");
4459 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4460  "OMP_5.0");
4461 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4462  "OMP_5.0");
4463 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4464  "OMP_5.0");
4465 } // extern "C"
4466 #endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1547
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236