LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "kmp_utils.h"
22 #include "ompt-specific.h"
23 
24 #define MAX_MESSAGE 512
25 
26 // flags will be used in future, e.g. to implement openmp_strict library
27 // restrictions
28 
37 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38  // By default __kmpc_begin() is no-op.
39  char *env;
40  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41  __kmp_str_match_true(env)) {
42  __kmp_middle_initialize();
43  __kmp_assign_root_init_mask();
44  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45  } else if (__kmp_ignore_mppbeg() == FALSE) {
46  // By default __kmp_ignore_mppbeg() returns TRUE.
47  __kmp_internal_begin();
48  KC_TRACE(10, ("__kmpc_begin: called\n"));
49  }
50 }
51 
60 void __kmpc_end(ident_t *loc) {
61  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64  // returns FALSE and __kmpc_end() will unregister this root (it can cause
65  // library shut down).
66  if (__kmp_ignore_mppend() == FALSE) {
67  KC_TRACE(10, ("__kmpc_end: called\n"));
68  KA_TRACE(30, ("__kmpc_end\n"));
69 
70  __kmp_internal_end_thread(-1);
71  }
72 #if KMP_OS_WINDOWS && OMPT_SUPPORT
73  // Normal exit process on Windows does not allow worker threads of the final
74  // parallel region to finish reporting their events, so shutting down the
75  // library here fixes the issue at least for the cases where __kmpc_end() is
76  // placed properly.
77  if (ompt_enabled.enabled)
78  __kmp_internal_end_library(__kmp_gtid_get_specific());
79 #endif
80 }
81 
101  kmp_int32 gtid = __kmp_entry_gtid();
102 
103  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104 
105  return gtid;
106 }
107 
123  KC_TRACE(10,
124  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 
126  return TCR_4(__kmp_all_nth);
127 }
128 
136  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137  return __kmp_tid_from_gtid(__kmp_entry_gtid());
138 }
139 
146  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147 
148  return __kmp_entry_thread()->th.th_team->t.t_nproc;
149 }
150 
157 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158 #ifndef KMP_DEBUG
159 
160  return TRUE;
161 
162 #else
163 
164  const char *semi2;
165  const char *semi3;
166  int line_no;
167 
168  if (__kmp_par_range == 0) {
169  return TRUE;
170  }
171  semi2 = loc->psource;
172  if (semi2 == NULL) {
173  return TRUE;
174  }
175  semi2 = strchr(semi2, ';');
176  if (semi2 == NULL) {
177  return TRUE;
178  }
179  semi2 = strchr(semi2 + 1, ';');
180  if (semi2 == NULL) {
181  return TRUE;
182  }
183  if (__kmp_par_range_filename[0]) {
184  const char *name = semi2 - 1;
185  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186  name--;
187  }
188  if ((*name == '/') || (*name == ';')) {
189  name++;
190  }
191  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192  return __kmp_par_range < 0;
193  }
194  }
195  semi3 = strchr(semi2 + 1, ';');
196  if (__kmp_par_range_routine[0]) {
197  if ((semi3 != NULL) && (semi3 > semi2) &&
198  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199  return __kmp_par_range < 0;
200  }
201  }
202  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204  return __kmp_par_range > 0;
205  }
206  return __kmp_par_range < 0;
207  }
208  return TRUE;
209 
210 #endif /* KMP_DEBUG */
211 }
212 
219 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
220  return __kmp_entry_thread()->th.th_root->r.r_active;
221 }
222 
232 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233  kmp_int32 num_threads) {
234  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235  global_tid, num_threads));
236  __kmp_assert_valid_gtid(global_tid);
237  __kmp_push_num_threads(loc, global_tid, num_threads);
238 }
239 
240 void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
241  kmp_int32 num_threads, int severity,
242  const char *message) {
243  __kmp_push_num_threads(loc, global_tid, num_threads);
244  __kmp_set_strict_num_threads(loc, global_tid, severity, message);
245 }
246 
260 void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
261  kmp_uint32 list_length,
262  kmp_int32 *num_threads_list) {
263  KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
264  global_tid));
265  KA_TRACE(20, ("%d", num_threads_list[0]));
266 #ifdef KMP_DEBUG
267  for (kmp_uint32 i = 1; i < list_length; ++i)
268  KA_TRACE(20, (", %d", num_threads_list[i]));
269 #endif
270  KA_TRACE(20, ("/n"));
271 
272  __kmp_assert_valid_gtid(global_tid);
273  __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
274 }
275 
276 void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
277  kmp_uint32 list_length,
278  kmp_int32 *num_threads_list,
279  int severity, const char *message) {
280  __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281  __kmp_set_strict_num_threads(loc, global_tid, severity, message);
282 }
283 
284 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
285  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
286  /* the num_threads are automatically popped */
287 }
288 
289 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
290  kmp_int32 proc_bind) {
291  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
292  proc_bind));
293  __kmp_assert_valid_gtid(global_tid);
294  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
295 }
296 
307 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
308  int gtid = __kmp_entry_gtid();
309 
310 #if (KMP_STATS_ENABLED)
311  // If we were in a serial region, then stop the serial timer, record
312  // the event, and start parallel region timer
313  stats_state_e previous_state = KMP_GET_THREAD_STATE();
314  if (previous_state == stats_state_e::SERIAL_REGION) {
315  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
316  } else {
317  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
318  }
319  int inParallel = __kmpc_in_parallel(loc);
320  if (inParallel) {
321  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
322  } else {
323  KMP_COUNT_BLOCK(OMP_PARALLEL);
324  }
325 #endif
326 
327  // maybe to save thr_state is enough here
328  {
329  va_list ap;
330  va_start(ap, microtask);
331 
332 #if OMPT_SUPPORT
333  ompt_frame_t *ompt_frame;
334  if (ompt_enabled.enabled) {
335  kmp_info_t *master_th = __kmp_threads[gtid];
336  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
338  }
339  OMPT_STORE_RETURN_ADDRESS(gtid);
340 #endif
341 
342 #if INCLUDE_SSC_MARKS
343  SSC_MARK_FORKING();
344 #endif
345  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
346  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
347  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
348  kmp_va_addr_of(ap));
349 #if INCLUDE_SSC_MARKS
350  SSC_MARK_JOINING();
351 #endif
352  __kmp_join_call(loc, gtid
353 #if OMPT_SUPPORT
354  ,
355  fork_context_intel
356 #endif
357  );
358 
359  va_end(ap);
360 
361 #if OMPT_SUPPORT
362  if (ompt_enabled.enabled) {
363  ompt_frame->enter_frame = ompt_data_none;
364  }
365 #endif
366  }
367 
368 #if KMP_STATS_ENABLED
369  if (previous_state == stats_state_e::SERIAL_REGION) {
370  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371  KMP_SET_THREAD_STATE(previous_state);
372  } else {
373  KMP_POP_PARTITIONED_TIMER();
374  }
375 #endif // KMP_STATS_ENABLED
376 }
377 
388 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
389  kmp_int32 cond, void *args) {
390  int gtid = __kmp_entry_gtid();
391  if (cond) {
392  if (args)
393  __kmpc_fork_call(loc, argc, microtask, args);
394  else
395  __kmpc_fork_call(loc, argc, microtask);
396  } else {
397  __kmpc_serialized_parallel(loc, gtid);
398 
399 #if OMPT_SUPPORT
400  void *exit_frame_ptr;
401 #endif
402 
403  if (args)
404  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
405  /*npr=*/0,
406  /*argc=*/1, &args
407 #if OMPT_SUPPORT
408  ,
409  &exit_frame_ptr
410 #endif
411  );
412  else
413  __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
414  /*npr=*/0,
415  /*argc=*/0,
416  /*args=*/nullptr
417 #if OMPT_SUPPORT
418  ,
419  &exit_frame_ptr
420 #endif
421  );
422 
424  }
425 }
426 
438 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
439  kmp_int32 num_teams, kmp_int32 num_threads) {
440  KA_TRACE(20,
441  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442  global_tid, num_teams, num_threads));
443  __kmp_assert_valid_gtid(global_tid);
444  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
445 }
446 
457 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
458  kmp_int32 thread_limit) {
459  __kmp_assert_valid_gtid(global_tid);
460  kmp_info_t *thread = __kmp_threads[global_tid];
461  if (thread_limit > 0)
462  thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
463 }
464 
481 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
482  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483  kmp_int32 num_threads) {
484  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485  " num_teams_ub=%d num_threads=%d\n",
486  global_tid, num_teams_lb, num_teams_ub, num_threads));
487  __kmp_assert_valid_gtid(global_tid);
488  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
489  num_threads);
490 }
491 
502 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
503  ...) {
504  int gtid = __kmp_entry_gtid();
505  kmp_info_t *this_thr = __kmp_threads[gtid];
506  va_list ap;
507  va_start(ap, microtask);
508 
509 #if KMP_STATS_ENABLED
510  KMP_COUNT_BLOCK(OMP_TEAMS);
511  stats_state_e previous_state = KMP_GET_THREAD_STATE();
512  if (previous_state == stats_state_e::SERIAL_REGION) {
513  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
514  } else {
515  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
516  }
517 #endif
518 
519  // remember teams entry point and nesting level
520  this_thr->th.th_teams_microtask = microtask;
521  this_thr->th.th_teams_level =
522  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
523 
524 #if OMPT_SUPPORT
525  kmp_team_t *parent_team = this_thr->th.th_team;
526  int tid = __kmp_tid_from_gtid(gtid);
527  if (ompt_enabled.enabled) {
528  parent_team->t.t_implicit_task_taskdata[tid]
529  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
530  }
531  OMPT_STORE_RETURN_ADDRESS(gtid);
532 #endif
533 
534  // check if __kmpc_push_num_teams called, set default number of teams
535  // otherwise
536  if (this_thr->th.th_teams_size.nteams == 0) {
537  __kmp_push_num_teams(loc, gtid, 0, 0);
538  }
539  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
542 
543  __kmp_fork_call(
544  loc, gtid, fork_context_intel, argc,
545  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
546  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
547  __kmp_join_call(loc, gtid
548 #if OMPT_SUPPORT
549  ,
550  fork_context_intel
551 #endif
552  );
553 
554  // Pop current CG root off list
555  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557  this_thr->th.th_cg_roots = tmp->up;
558  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
559  " to node %p. cg_nthreads was %d\n",
560  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
561  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
562  int i = tmp->cg_nthreads--;
563  if (i == 1) { // check is we are the last thread in CG (not always the case)
564  __kmp_free(tmp);
565  }
566  // Restore current task's thread_limit from CG root
567  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568  this_thr->th.th_current_task->td_icvs.thread_limit =
569  this_thr->th.th_cg_roots->cg_thread_limit;
570 
571  this_thr->th.th_teams_microtask = NULL;
572  this_thr->th.th_teams_level = 0;
573  memset(&this_thr->th.th_teams_size, 0, sizeof(kmp_teams_size_t));
574  va_end(ap);
575 #if KMP_STATS_ENABLED
576  if (previous_state == stats_state_e::SERIAL_REGION) {
577  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578  KMP_SET_THREAD_STATE(previous_state);
579  } else {
580  KMP_POP_PARTITIONED_TIMER();
581  }
582 #endif // KMP_STATS_ENABLED
583 }
584 
585 // I don't think this function should ever have been exported.
586 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
587 // openmp code ever called it, but it's been exported from the RTL for so
588 // long that I'm afraid to remove the definition.
589 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
590 
603 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
604  // The implementation is now in kmp_runtime.cpp so that it can share static
605  // functions with kmp_fork_call since the tasks to be done are similar in
606  // each case.
607  __kmp_assert_valid_gtid(global_tid);
608 #if OMPT_SUPPORT
609  OMPT_STORE_RETURN_ADDRESS(global_tid);
610 #endif
611  __kmp_serialized_parallel(loc, global_tid);
612 }
613 
621 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
622  kmp_internal_control_t *top;
623  kmp_info_t *this_thr;
624  kmp_team_t *serial_team;
625 
626  KC_TRACE(10,
627  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
628 
629  /* skip all this code for autopar serialized loops since it results in
630  unacceptable overhead */
631  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
632  return;
633 
634  // Not autopar code
635  __kmp_assert_valid_gtid(global_tid);
636  if (!TCR_4(__kmp_init_parallel))
637  __kmp_parallel_initialize();
638 
639  __kmp_resume_if_soft_paused();
640 
641  this_thr = __kmp_threads[global_tid];
642  serial_team = this_thr->th.th_serial_team;
643 
644  kmp_task_team_t *task_team = this_thr->th.th_task_team;
645  // we need to wait for the proxy tasks before finishing the thread
646  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
647  task_team->tt.tt_hidden_helper_task_encountered))
648  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
649 
650  KMP_MB();
651  KMP_DEBUG_ASSERT(serial_team);
652  KMP_ASSERT(serial_team->t.t_serialized);
653  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
657 
658 #if OMPT_SUPPORT
659  if (ompt_enabled.enabled &&
660  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662  if (ompt_enabled.ompt_callback_implicit_task) {
663  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
666  }
667 
668  // reset clear the task id only after unlinking the task
669  ompt_data_t *parent_task_data;
670  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
671 
672  if (ompt_enabled.ompt_callback_parallel_end) {
673  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675  ompt_parallel_invoker_program | ompt_parallel_team,
676  OMPT_LOAD_RETURN_ADDRESS(global_tid));
677  }
678  __ompt_lw_taskteam_unlink(this_thr);
679  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
680  }
681 #endif
682 
683  /* If necessary, pop the internal control stack values and replace the team
684  * values */
685  top = serial_team->t.t_control_stack_top;
686  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688  serial_team->t.t_control_stack_top = top->next;
689  __kmp_free(top);
690  }
691 
692  /* pop dispatch buffers stack */
693  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
694  {
695  dispatch_private_info_t *disp_buffer =
696  serial_team->t.t_dispatch->th_disp_buffer;
697  serial_team->t.t_dispatch->th_disp_buffer =
698  serial_team->t.t_dispatch->th_disp_buffer->next;
699  __kmp_free(disp_buffer);
700  }
701 
702  /* pop the task team stack */
703  if (serial_team->t.t_serialized > 1) {
704  __kmp_pop_task_team_node(this_thr, serial_team);
705  }
706 
707  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
708 
709  --serial_team->t.t_serialized;
710  if (serial_team->t.t_serialized == 0) {
711 
712  /* return to the parallel section */
713 
714 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
715  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716  __kmp_clear_x87_fpu_status_word();
717  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
719  }
720 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
721 
722  __kmp_pop_current_task_from_thread(this_thr);
723 #if OMPD_SUPPORT
724  if (ompd_state & OMPD_ENABLE_BP)
725  ompd_bp_parallel_end();
726 #endif
727 
728  this_thr->th.th_team = serial_team->t.t_parent;
729  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
730 
731  /* restore values cached in the thread */
732  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
733  this_thr->th.th_team_master =
734  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
735  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
736 
737  /* TODO the below shouldn't need to be adjusted for serialized teams */
738  this_thr->th.th_dispatch =
739  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
740 
741  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742  this_thr->th.th_current_task->td_flags.executing = 1;
743 
744  if (__kmp_tasking_mode != tskm_immediate_exec) {
745  // Restore task state from serial team structure
746  KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747  serial_team->t.t_primary_task_state == 1);
748  this_thr->th.th_task_state =
749  (kmp_uint8)serial_team->t.t_primary_task_state;
750  // Copy the task team from the new child / old parent team to the thread.
751  this_thr->th.th_task_team =
752  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
753  KA_TRACE(20,
754  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
755  "team %p\n",
756  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
757  }
758 #if KMP_AFFINITY_SUPPORTED
759  if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760  __kmp_reset_root_init_mask(global_tid);
761  }
762 #endif
763  } else {
764  if (__kmp_tasking_mode != tskm_immediate_exec) {
765  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766  "depth of serial team %p to %d\n",
767  global_tid, serial_team, serial_team->t.t_serialized));
768  }
769  }
770 
771  serial_team->t.t_level--;
772  if (__kmp_env_consistency_check)
773  __kmp_pop_parallel(global_tid, NULL);
774 #if OMPT_SUPPORT
775  if (ompt_enabled.enabled)
776  this_thr->th.ompt_thread_info.state =
777  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778  : ompt_state_work_parallel);
779 #endif
780 }
781 
790 void __kmpc_flush(ident_t *loc) {
791  KC_TRACE(10, ("__kmpc_flush: called\n"));
792 
793  /* need explicit __mf() here since use volatile instead in library */
794  KMP_MFENCE(); /* Flush all pending memory write invalidates. */
795 
796 #if OMPT_SUPPORT && OMPT_OPTIONAL
797  if (ompt_enabled.ompt_callback_flush) {
798  ompt_callbacks.ompt_callback(ompt_callback_flush)(
799  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
800  }
801 #endif
802 }
803 
804 /* -------------------------------------------------------------------------- */
812 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
813  KMP_COUNT_BLOCK(OMP_BARRIER);
814  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
815  __kmp_assert_valid_gtid(global_tid);
816 
817  if (!TCR_4(__kmp_init_parallel))
818  __kmp_parallel_initialize();
819 
820  __kmp_resume_if_soft_paused();
821 
822  if (__kmp_env_consistency_check) {
823  if (loc == 0) {
824  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
825  }
826  __kmp_check_barrier(global_tid, ct_barrier, loc);
827  }
828 
829 #if OMPT_SUPPORT
830  ompt_frame_t *ompt_frame;
831  if (ompt_enabled.enabled) {
832  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833  if (ompt_frame->enter_frame.ptr == NULL)
834  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
835  }
836  OMPT_STORE_RETURN_ADDRESS(global_tid);
837 #endif
838  __kmp_threads[global_tid]->th.th_ident = loc;
839  // TODO: explicit barrier_wait_id:
840  // this function is called when 'barrier' directive is present or
841  // implicit barrier at the end of a worksharing construct.
842  // 1) better to add a per-thread barrier counter to a thread data structure
843  // 2) set to 0 when a new team is created
844  // 4) no sync is required
845 
846  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847 #if OMPT_SUPPORT && OMPT_OPTIONAL
848  if (ompt_enabled.enabled) {
849  ompt_frame->enter_frame = ompt_data_none;
850  }
851 #endif
852 }
853 
854 /* The BARRIER for a MASTER section is always explicit */
861 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
862  int status = 0;
863 
864  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
865  __kmp_assert_valid_gtid(global_tid);
866 
867  if (!TCR_4(__kmp_init_parallel))
868  __kmp_parallel_initialize();
869 
870  __kmp_resume_if_soft_paused();
871 
872  if (KMP_MASTER_GTID(global_tid)) {
873  KMP_COUNT_BLOCK(OMP_MASTER);
874  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
875  status = 1;
876  }
877 
878 #if OMPT_SUPPORT && OMPT_OPTIONAL
879  if (status) {
880  if (ompt_enabled.ompt_callback_masked) {
881  kmp_info_t *this_thr = __kmp_threads[global_tid];
882  kmp_team_t *team = this_thr->th.th_team;
883 
884  int tid = __kmp_tid_from_gtid(global_tid);
885  ompt_callbacks.ompt_callback(ompt_callback_masked)(
886  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888  OMPT_GET_RETURN_ADDRESS(0));
889  }
890  }
891 #endif
892 
893  if (__kmp_env_consistency_check) {
894 #if KMP_USE_DYNAMIC_LOCK
895  if (status)
896  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
897  else
898  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
899 #else
900  if (status)
901  __kmp_push_sync(global_tid, ct_master, loc, NULL);
902  else
903  __kmp_check_sync(global_tid, ct_master, loc, NULL);
904 #endif
905  }
906 
907  return status;
908 }
909 
918 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
919  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
920  __kmp_assert_valid_gtid(global_tid);
921  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
922  KMP_POP_PARTITIONED_TIMER();
923 
924 #if OMPT_SUPPORT && OMPT_OPTIONAL
925  kmp_info_t *this_thr = __kmp_threads[global_tid];
926  kmp_team_t *team = this_thr->th.th_team;
927  if (ompt_enabled.ompt_callback_masked) {
928  int tid = __kmp_tid_from_gtid(global_tid);
929  ompt_callbacks.ompt_callback(ompt_callback_masked)(
930  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932  OMPT_GET_RETURN_ADDRESS(0));
933  }
934 #endif
935 
936  if (__kmp_env_consistency_check) {
937  if (KMP_MASTER_GTID(global_tid))
938  __kmp_pop_sync(global_tid, ct_master, loc);
939  }
940 }
941 
950 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
951  int status = 0;
952  int tid;
953  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
954  __kmp_assert_valid_gtid(global_tid);
955 
956  if (!TCR_4(__kmp_init_parallel))
957  __kmp_parallel_initialize();
958 
959  __kmp_resume_if_soft_paused();
960 
961  tid = __kmp_tid_from_gtid(global_tid);
962  if (tid == filter) {
963  KMP_COUNT_BLOCK(OMP_MASKED);
964  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
965  status = 1;
966  }
967 
968 #if OMPT_SUPPORT && OMPT_OPTIONAL
969  if (status) {
970  if (ompt_enabled.ompt_callback_masked) {
971  kmp_info_t *this_thr = __kmp_threads[global_tid];
972  kmp_team_t *team = this_thr->th.th_team;
973  ompt_callbacks.ompt_callback(ompt_callback_masked)(
974  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
976  OMPT_GET_RETURN_ADDRESS(0));
977  }
978  }
979 #endif
980 
981  if (__kmp_env_consistency_check) {
982 #if KMP_USE_DYNAMIC_LOCK
983  if (status)
984  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
985  else
986  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
987 #else
988  if (status)
989  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
990  else
991  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
992 #endif
993  }
994 
995  return status;
996 }
997 
1006 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
1007  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
1008  __kmp_assert_valid_gtid(global_tid);
1009  KMP_POP_PARTITIONED_TIMER();
1010 
1011 #if OMPT_SUPPORT && OMPT_OPTIONAL
1012  kmp_info_t *this_thr = __kmp_threads[global_tid];
1013  kmp_team_t *team = this_thr->th.th_team;
1014  if (ompt_enabled.ompt_callback_masked) {
1015  int tid = __kmp_tid_from_gtid(global_tid);
1016  ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1019  OMPT_GET_RETURN_ADDRESS(0));
1020  }
1021 #endif
1022 
1023  if (__kmp_env_consistency_check) {
1024  __kmp_pop_sync(global_tid, ct_masked, loc);
1025  }
1026 }
1027 
1035 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
1036  int cid = 0;
1037  kmp_info_t *th;
1038  KMP_DEBUG_ASSERT(__kmp_init_serial);
1039 
1040  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
1041  __kmp_assert_valid_gtid(gtid);
1042 
1043  if (!TCR_4(__kmp_init_parallel))
1044  __kmp_parallel_initialize();
1045 
1046  __kmp_resume_if_soft_paused();
1047 
1048 #if USE_ITT_BUILD
1049  __kmp_itt_ordered_prep(gtid);
1050 // TODO: ordered_wait_id
1051 #endif /* USE_ITT_BUILD */
1052 
1053  th = __kmp_threads[gtid];
1054 
1055 #if OMPT_SUPPORT && OMPT_OPTIONAL
1056  kmp_team_t *team;
1057  ompt_wait_id_t lck;
1058  void *codeptr_ra;
1059  OMPT_STORE_RETURN_ADDRESS(gtid);
1060  if (ompt_enabled.enabled) {
1061  team = __kmp_team_from_gtid(gtid);
1062  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1063  /* OMPT state update */
1064  th->th.ompt_thread_info.wait_id = lck;
1065  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1066 
1067  /* OMPT event callback */
1068  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069  if (ompt_enabled.ompt_callback_mutex_acquire) {
1070  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1072  codeptr_ra);
1073  }
1074  }
1075 #endif
1076 
1077  if (th->th.th_dispatch->th_deo_fcn != 0)
1078  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1079  else
1080  __kmp_parallel_deo(&gtid, &cid, loc);
1081 
1082 #if OMPT_SUPPORT && OMPT_OPTIONAL
1083  if (ompt_enabled.enabled) {
1084  /* OMPT state update */
1085  th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086  th->th.ompt_thread_info.wait_id = 0;
1087 
1088  /* OMPT event callback */
1089  if (ompt_enabled.ompt_callback_mutex_acquired) {
1090  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1092  }
1093  }
1094 #endif
1095 
1096 #if USE_ITT_BUILD
1097  __kmp_itt_ordered_start(gtid);
1098 #endif /* USE_ITT_BUILD */
1099 }
1100 
1108 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1109  int cid = 0;
1110  kmp_info_t *th;
1111 
1112  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1113  __kmp_assert_valid_gtid(gtid);
1114 
1115 #if USE_ITT_BUILD
1116  __kmp_itt_ordered_end(gtid);
1117 // TODO: ordered_wait_id
1118 #endif /* USE_ITT_BUILD */
1119 
1120  th = __kmp_threads[gtid];
1121 
1122  if (th->th.th_dispatch->th_dxo_fcn != 0)
1123  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1124  else
1125  __kmp_parallel_dxo(&gtid, &cid, loc);
1126 
1127 #if OMPT_SUPPORT && OMPT_OPTIONAL
1128  OMPT_STORE_RETURN_ADDRESS(gtid);
1129  if (ompt_enabled.ompt_callback_mutex_released) {
1130  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1131  ompt_mutex_ordered,
1132  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133  ->t.t_ordered.dt.t_value,
1134  OMPT_LOAD_RETURN_ADDRESS(gtid));
1135  }
1136 #endif
1137 }
1138 
1139 #if KMP_USE_DYNAMIC_LOCK
1140 
1141 static __forceinline void
1142 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1143  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1144  // Pointer to the allocated indirect lock is written to crit, while indexing
1145  // is ignored.
1146  void *idx;
1147  kmp_indirect_lock_t **lck;
1148  lck = (kmp_indirect_lock_t **)crit;
1149  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1153  KA_TRACE(20,
1154  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1155 #if USE_ITT_BUILD
1156  __kmp_itt_critical_creating(ilk->lock, loc);
1157 #endif
1158  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1159  if (status == 0) {
1160 #if USE_ITT_BUILD
1161  __kmp_itt_critical_destroyed(ilk->lock);
1162 #endif
1163  // We don't really need to destroy the unclaimed lock here since it will be
1164  // cleaned up at program exit.
1165  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1166  }
1167  KMP_DEBUG_ASSERT(*lck != NULL);
1168 }
1169 
1170 // Fast-path acquire tas lock
1171 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1172  { \
1173  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1178  kmp_uint32 spins; \
1179  KMP_FSYNC_PREPARE(l); \
1180  KMP_INIT_YIELD(spins); \
1181  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1182  do { \
1183  if (TCR_4(__kmp_nth) > \
1184  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1185  KMP_YIELD(TRUE); \
1186  } else { \
1187  KMP_YIELD_SPIN(spins); \
1188  } \
1189  __kmp_spin_backoff(&backoff); \
1190  } while ( \
1191  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1193  } \
1194  KMP_FSYNC_ACQUIRED(l); \
1195  }
1196 
1197 // Fast-path test tas lock
1198 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1199  { \
1200  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1205  }
1206 
1207 // Fast-path release tas lock
1208 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1210 
1211 #if KMP_USE_FUTEX
1212 
1213 #include <sys/syscall.h>
1214 #include <unistd.h>
1215 #ifndef FUTEX_WAIT
1216 #define FUTEX_WAIT 0
1217 #endif
1218 #ifndef FUTEX_WAKE
1219 #define FUTEX_WAKE 1
1220 #endif
1221 
1222 // Fast-path acquire futex lock
1223 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1224  { \
1225  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226  kmp_int32 gtid_code = (gtid + 1) << 1; \
1227  KMP_MB(); \
1228  KMP_FSYNC_PREPARE(ftx); \
1229  kmp_int32 poll_val; \
1230  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1234  if (!cond) { \
1235  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1236  poll_val | \
1237  KMP_LOCK_BUSY(1, futex))) { \
1238  continue; \
1239  } \
1240  poll_val |= KMP_LOCK_BUSY(1, futex); \
1241  } \
1242  kmp_int32 rc; \
1243  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244  NULL, NULL, 0)) != 0) { \
1245  continue; \
1246  } \
1247  gtid_code |= 1; \
1248  } \
1249  KMP_FSYNC_ACQUIRED(ftx); \
1250  }
1251 
1252 // Fast-path test futex lock
1253 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1254  { \
1255  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258  KMP_FSYNC_ACQUIRED(ftx); \
1259  rc = TRUE; \
1260  } else { \
1261  rc = FALSE; \
1262  } \
1263  }
1264 
1265 // Fast-path release futex lock
1266 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1267  { \
1268  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1269  KMP_MB(); \
1270  KMP_FSYNC_RELEASING(ftx); \
1271  kmp_int32 poll_val = \
1272  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1276  } \
1277  KMP_MB(); \
1278  KMP_YIELD_OVERSUB(); \
1279  }
1280 
1281 #endif // KMP_USE_FUTEX
1282 
1283 #else // KMP_USE_DYNAMIC_LOCK
1284 
1285 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1286  ident_t const *loc,
1287  kmp_int32 gtid) {
1288  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1289 
1290  // Because of the double-check, the following load doesn't need to be volatile
1291  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1292 
1293  if (lck == NULL) {
1294  void *idx;
1295 
1296  // Allocate & initialize the lock.
1297  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1298  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1299  __kmp_init_user_lock_with_checks(lck);
1300  __kmp_set_user_lock_location(lck, loc);
1301 #if USE_ITT_BUILD
1302  __kmp_itt_critical_creating(lck);
1303 // __kmp_itt_critical_creating() should be called *before* the first usage
1304 // of underlying lock. It is the only place where we can guarantee it. There
1305 // are chances the lock will destroyed with no usage, but it is not a
1306 // problem, because this is not real event seen by user but rather setting
1307 // name for object (lock). See more details in kmp_itt.h.
1308 #endif /* USE_ITT_BUILD */
1309 
1310  // Use a cmpxchg instruction to slam the start of the critical section with
1311  // the lock pointer. If another thread beat us to it, deallocate the lock,
1312  // and use the lock that the other thread allocated.
1313  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1314 
1315  if (status == 0) {
1316 // Deallocate the lock and reload the value.
1317 #if USE_ITT_BUILD
1318  __kmp_itt_critical_destroyed(lck);
1319 // Let ITT know the lock is destroyed and the same memory location may be reused
1320 // for another purpose.
1321 #endif /* USE_ITT_BUILD */
1322  __kmp_destroy_user_lock_with_checks(lck);
1323  __kmp_user_lock_free(&idx, gtid, lck);
1324  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325  KMP_DEBUG_ASSERT(lck != NULL);
1326  }
1327  }
1328  return lck;
1329 }
1330 
1331 #endif // KMP_USE_DYNAMIC_LOCK
1332 
1343 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1344  kmp_critical_name *crit) {
1345 #if KMP_USE_DYNAMIC_LOCK
1346 #if OMPT_SUPPORT && OMPT_OPTIONAL
1347  OMPT_STORE_RETURN_ADDRESS(global_tid);
1348 #endif // OMPT_SUPPORT
1349  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1350 #else
1351  KMP_COUNT_BLOCK(OMP_CRITICAL);
1352 #if OMPT_SUPPORT && OMPT_OPTIONAL
1353  ompt_state_t prev_state = ompt_state_undefined;
1354  ompt_thread_info_t ti;
1355 #endif
1356  kmp_user_lock_p lck;
1357 
1358  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1359  __kmp_assert_valid_gtid(global_tid);
1360 
1361  // TODO: add THR_OVHD_STATE
1362 
1363  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1364  KMP_CHECK_USER_LOCK_INIT();
1365 
1366  if ((__kmp_user_lock_kind == lk_tas) &&
1367  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1368  lck = (kmp_user_lock_p)crit;
1369  }
1370 #if KMP_USE_FUTEX
1371  else if ((__kmp_user_lock_kind == lk_futex) &&
1372  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1373  lck = (kmp_user_lock_p)crit;
1374  }
1375 #endif
1376  else { // ticket, queuing or drdpa
1377  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1378  }
1379 
1380  if (__kmp_env_consistency_check)
1381  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1382 
1383  // since the critical directive binds to all threads, not just the current
1384  // team we have to check this even if we are in a serialized team.
1385  // also, even if we are the uber thread, we still have to conduct the lock,
1386  // as we have to contend with sibling threads.
1387 
1388 #if USE_ITT_BUILD
1389  __kmp_itt_critical_acquiring(lck);
1390 #endif /* USE_ITT_BUILD */
1391 #if OMPT_SUPPORT && OMPT_OPTIONAL
1392  OMPT_STORE_RETURN_ADDRESS(gtid);
1393  void *codeptr_ra = NULL;
1394  if (ompt_enabled.enabled) {
1395  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1396  /* OMPT state update */
1397  prev_state = ti.state;
1398  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399  ti.state = ompt_state_wait_critical;
1400 
1401  /* OMPT event callback */
1402  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403  if (ompt_enabled.ompt_callback_mutex_acquire) {
1404  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1407  }
1408  }
1409 #endif
1410  // Value of 'crit' should be good for using as a critical_id of the critical
1411  // section directive.
1412  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1413 
1414 #if USE_ITT_BUILD
1415  __kmp_itt_critical_acquired(lck);
1416 #endif /* USE_ITT_BUILD */
1417 #if OMPT_SUPPORT && OMPT_OPTIONAL
1418  if (ompt_enabled.enabled) {
1419  /* OMPT state update */
1420  ti.state = prev_state;
1421  ti.wait_id = 0;
1422 
1423  /* OMPT event callback */
1424  if (ompt_enabled.ompt_callback_mutex_acquired) {
1425  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1427  }
1428  }
1429 #endif
1430  KMP_POP_PARTITIONED_TIMER();
1431 
1432  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1434 #endif // KMP_USE_DYNAMIC_LOCK
1435 }
1436 
1437 #if KMP_USE_DYNAMIC_LOCK
1438 
1439 // Converts the given hint to an internal lock implementation
1440 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1441 #if KMP_USE_TSX
1442 #define KMP_TSX_LOCK(seq) lockseq_##seq
1443 #else
1444 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1445 #endif
1446 
1447 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1449 #else
1450 #define KMP_CPUINFO_RTM 0
1451 #endif
1452 
1453  // Hints that do not require further logic
1454  if (hint & kmp_lock_hint_hle)
1455  return KMP_TSX_LOCK(hle);
1456  if (hint & kmp_lock_hint_rtm)
1457  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458  if (hint & kmp_lock_hint_adaptive)
1459  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1460 
1461  // Rule out conflicting hints first by returning the default lock
1462  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463  return __kmp_user_lock_seq;
1464  if ((hint & omp_lock_hint_speculative) &&
1465  (hint & omp_lock_hint_nonspeculative))
1466  return __kmp_user_lock_seq;
1467 
1468  // Do not even consider speculation when it appears to be contended
1469  if (hint & omp_lock_hint_contended)
1470  return lockseq_queuing;
1471 
1472  // Uncontended lock without speculation
1473  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1474  return lockseq_tas;
1475 
1476  // Use RTM lock for speculation
1477  if (hint & omp_lock_hint_speculative)
1478  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1479 
1480  return __kmp_user_lock_seq;
1481 }
1482 
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484 #if KMP_USE_DYNAMIC_LOCK
1485 static kmp_mutex_impl_t
1486 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1487  if (user_lock) {
1488  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1489  case 0:
1490  break;
1491 #if KMP_USE_FUTEX
1492  case locktag_futex:
1493  return kmp_mutex_impl_queuing;
1494 #endif
1495  case locktag_tas:
1496  return kmp_mutex_impl_spin;
1497 #if KMP_USE_TSX
1498  case locktag_hle:
1499  case locktag_rtm_spin:
1500  return kmp_mutex_impl_speculative;
1501 #endif
1502  default:
1503  return kmp_mutex_impl_none;
1504  }
1505  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1506  }
1507  KMP_ASSERT(ilock);
1508  switch (ilock->type) {
1509 #if KMP_USE_TSX
1510  case locktag_adaptive:
1511  case locktag_rtm_queuing:
1512  return kmp_mutex_impl_speculative;
1513 #endif
1514  case locktag_nested_tas:
1515  return kmp_mutex_impl_spin;
1516 #if KMP_USE_FUTEX
1517  case locktag_nested_futex:
1518 #endif
1519  case locktag_ticket:
1520  case locktag_queuing:
1521  case locktag_drdpa:
1522  case locktag_nested_ticket:
1523  case locktag_nested_queuing:
1524  case locktag_nested_drdpa:
1525  return kmp_mutex_impl_queuing;
1526  default:
1527  return kmp_mutex_impl_none;
1528  }
1529 }
1530 #else
1531 // For locks without dynamic binding
1532 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533  switch (__kmp_user_lock_kind) {
1534  case lk_tas:
1535  return kmp_mutex_impl_spin;
1536 #if KMP_USE_FUTEX
1537  case lk_futex:
1538 #endif
1539  case lk_ticket:
1540  case lk_queuing:
1541  case lk_drdpa:
1542  return kmp_mutex_impl_queuing;
1543 #if KMP_USE_TSX
1544  case lk_hle:
1545  case lk_rtm_queuing:
1546  case lk_rtm_spin:
1547  case lk_adaptive:
1548  return kmp_mutex_impl_speculative;
1549 #endif
1550  default:
1551  return kmp_mutex_impl_none;
1552  }
1553 }
1554 #endif // KMP_USE_DYNAMIC_LOCK
1555 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1556 
1570 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1571  kmp_critical_name *crit, uint32_t hint) {
1572  KMP_COUNT_BLOCK(OMP_CRITICAL);
1573  kmp_user_lock_p lck;
1574 #if OMPT_SUPPORT && OMPT_OPTIONAL
1575  ompt_state_t prev_state = ompt_state_undefined;
1576  ompt_thread_info_t ti;
1577  // This is the case, if called from __kmpc_critical:
1578  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1579  if (!codeptr)
1580  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1581 #endif
1582 
1583  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1584  __kmp_assert_valid_gtid(global_tid);
1585 
1586  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1587  // Check if it is initialized.
1588  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1590  if (*lk == 0) {
1591  if (KMP_IS_D_LOCK(lockseq)) {
1592  (void)KMP_COMPARE_AND_STORE_ACQ32(
1593  (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594  KMP_GET_D_TAG(lockseq));
1595  } else {
1596  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1597  }
1598  }
1599  // Branch for accessing the actual lock object and set operation. This
1600  // branching is inevitable since this lock initialization does not follow the
1601  // normal dispatch path (lock table is not used).
1602  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603  lck = (kmp_user_lock_p)lk;
1604  if (__kmp_env_consistency_check) {
1605  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1606  __kmp_map_hint_to_lock(hint));
1607  }
1608 #if USE_ITT_BUILD
1609  __kmp_itt_critical_acquiring(lck);
1610 #endif
1611 #if OMPT_SUPPORT && OMPT_OPTIONAL
1612  if (ompt_enabled.enabled) {
1613  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1614  /* OMPT state update */
1615  prev_state = ti.state;
1616  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617  ti.state = ompt_state_wait_critical;
1618 
1619  /* OMPT event callback */
1620  if (ompt_enabled.ompt_callback_mutex_acquire) {
1621  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622  ompt_mutex_critical, (unsigned int)hint,
1623  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1624  codeptr);
1625  }
1626  }
1627 #endif
1628 #if KMP_USE_INLINED_TAS
1629  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1631  } else
1632 #elif KMP_USE_INLINED_FUTEX
1633  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1635  } else
1636 #endif
1637  {
1638  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1639  }
1640  } else {
1641  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1642  lck = ilk->lock;
1643  if (__kmp_env_consistency_check) {
1644  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1645  __kmp_map_hint_to_lock(hint));
1646  }
1647 #if USE_ITT_BUILD
1648  __kmp_itt_critical_acquiring(lck);
1649 #endif
1650 #if OMPT_SUPPORT && OMPT_OPTIONAL
1651  if (ompt_enabled.enabled) {
1652  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1653  /* OMPT state update */
1654  prev_state = ti.state;
1655  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656  ti.state = ompt_state_wait_critical;
1657 
1658  /* OMPT event callback */
1659  if (ompt_enabled.ompt_callback_mutex_acquire) {
1660  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661  ompt_mutex_critical, (unsigned int)hint,
1662  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1663  codeptr);
1664  }
1665  }
1666 #endif
1667  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1668  }
1669  KMP_POP_PARTITIONED_TIMER();
1670 
1671 #if USE_ITT_BUILD
1672  __kmp_itt_critical_acquired(lck);
1673 #endif /* USE_ITT_BUILD */
1674 #if OMPT_SUPPORT && OMPT_OPTIONAL
1675  if (ompt_enabled.enabled) {
1676  /* OMPT state update */
1677  ti.state = prev_state;
1678  ti.wait_id = 0;
1679 
1680  /* OMPT event callback */
1681  if (ompt_enabled.ompt_callback_mutex_acquired) {
1682  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1684  }
1685  }
1686 #endif
1687 
1688  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1690 } // __kmpc_critical_with_hint
1691 
1692 #endif // KMP_USE_DYNAMIC_LOCK
1693 
1703 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1704  kmp_critical_name *crit) {
1705  kmp_user_lock_p lck;
1706 
1707  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1708 
1709 #if KMP_USE_DYNAMIC_LOCK
1710  int locktag = KMP_EXTRACT_D_TAG(crit);
1711  if (locktag) {
1712  lck = (kmp_user_lock_p)crit;
1713  KMP_ASSERT(lck != NULL);
1714  if (__kmp_env_consistency_check) {
1715  __kmp_pop_sync(global_tid, ct_critical, loc);
1716  }
1717 #if USE_ITT_BUILD
1718  __kmp_itt_critical_releasing(lck);
1719 #endif
1720 #if KMP_USE_INLINED_TAS
1721  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1723  } else
1724 #elif KMP_USE_INLINED_FUTEX
1725  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1727  } else
1728 #endif
1729  {
1730  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1731  }
1732  } else {
1733  kmp_indirect_lock_t *ilk =
1734  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735  KMP_ASSERT(ilk != NULL);
1736  lck = ilk->lock;
1737  if (__kmp_env_consistency_check) {
1738  __kmp_pop_sync(global_tid, ct_critical, loc);
1739  }
1740 #if USE_ITT_BUILD
1741  __kmp_itt_critical_releasing(lck);
1742 #endif
1743  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1744  }
1745 
1746 #else // KMP_USE_DYNAMIC_LOCK
1747 
1748  if ((__kmp_user_lock_kind == lk_tas) &&
1749  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1750  lck = (kmp_user_lock_p)crit;
1751  }
1752 #if KMP_USE_FUTEX
1753  else if ((__kmp_user_lock_kind == lk_futex) &&
1754  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1755  lck = (kmp_user_lock_p)crit;
1756  }
1757 #endif
1758  else { // ticket, queuing or drdpa
1759  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1760  }
1761 
1762  KMP_ASSERT(lck != NULL);
1763 
1764  if (__kmp_env_consistency_check)
1765  __kmp_pop_sync(global_tid, ct_critical, loc);
1766 
1767 #if USE_ITT_BUILD
1768  __kmp_itt_critical_releasing(lck);
1769 #endif /* USE_ITT_BUILD */
1770  // Value of 'crit' should be good for using as a critical_id of the critical
1771  // section directive.
1772  __kmp_release_user_lock_with_checks(lck, global_tid);
1773 
1774 #endif // KMP_USE_DYNAMIC_LOCK
1775 
1776 #if OMPT_SUPPORT && OMPT_OPTIONAL
1777  /* OMPT release event triggers after lock is released; place here to trigger
1778  * for all #if branches */
1779  OMPT_STORE_RETURN_ADDRESS(global_tid);
1780  if (ompt_enabled.ompt_callback_mutex_released) {
1781  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783  OMPT_LOAD_RETURN_ADDRESS(0));
1784  }
1785 #endif
1786 
1787  KMP_POP_PARTITIONED_TIMER();
1788  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1789 }
1790 
1800 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1801  int status;
1802  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1803  __kmp_assert_valid_gtid(global_tid);
1804 
1805  if (!TCR_4(__kmp_init_parallel))
1806  __kmp_parallel_initialize();
1807 
1808  __kmp_resume_if_soft_paused();
1809 
1810  if (__kmp_env_consistency_check)
1811  __kmp_check_barrier(global_tid, ct_barrier, loc);
1812 
1813 #if OMPT_SUPPORT
1814  ompt_frame_t *ompt_frame;
1815  if (ompt_enabled.enabled) {
1816  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817  if (ompt_frame->enter_frame.ptr == NULL)
1818  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1819  }
1820  OMPT_STORE_RETURN_ADDRESS(global_tid);
1821 #endif
1822 #if USE_ITT_NOTIFY
1823  __kmp_threads[global_tid]->th.th_ident = loc;
1824 #endif
1825  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826 #if OMPT_SUPPORT && OMPT_OPTIONAL
1827  if (ompt_enabled.enabled) {
1828  ompt_frame->enter_frame = ompt_data_none;
1829  }
1830 #endif
1831 
1832  return (status != 0) ? 0 : 1;
1833 }
1834 
1844 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1845  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846  __kmp_assert_valid_gtid(global_tid);
1847  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1848 }
1849 
1860 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1861  kmp_int32 ret;
1862  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863  __kmp_assert_valid_gtid(global_tid);
1864 
1865  if (!TCR_4(__kmp_init_parallel))
1866  __kmp_parallel_initialize();
1867 
1868  __kmp_resume_if_soft_paused();
1869 
1870  if (__kmp_env_consistency_check) {
1871  if (loc == 0) {
1872  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1873  }
1874  __kmp_check_barrier(global_tid, ct_barrier, loc);
1875  }
1876 
1877 #if OMPT_SUPPORT
1878  ompt_frame_t *ompt_frame;
1879  if (ompt_enabled.enabled) {
1880  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881  if (ompt_frame->enter_frame.ptr == NULL)
1882  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1883  }
1884  OMPT_STORE_RETURN_ADDRESS(global_tid);
1885 #endif
1886 #if USE_ITT_NOTIFY
1887  __kmp_threads[global_tid]->th.th_ident = loc;
1888 #endif
1889  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890 #if OMPT_SUPPORT && OMPT_OPTIONAL
1891  if (ompt_enabled.enabled) {
1892  ompt_frame->enter_frame = ompt_data_none;
1893  }
1894 #endif
1895 
1896  ret = __kmpc_master(loc, global_tid);
1897 
1898  if (__kmp_env_consistency_check) {
1899  /* there's no __kmpc_end_master called; so the (stats) */
1900  /* actions of __kmpc_end_master are done here */
1901  if (ret) {
1902  /* only one thread should do the pop since only */
1903  /* one did the push (see __kmpc_master()) */
1904  __kmp_pop_sync(global_tid, ct_master, loc);
1905  }
1906  }
1907 
1908  return (ret);
1909 }
1910 
1911 /* The BARRIER for a SINGLE process section is always explicit */
1923 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1924  __kmp_assert_valid_gtid(global_tid);
1925  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1926 
1927  if (rc) {
1928  // We are going to execute the single statement, so we should count it.
1929  KMP_COUNT_BLOCK(OMP_SINGLE);
1930  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1931  }
1932 
1933 #if OMPT_SUPPORT && OMPT_OPTIONAL
1934  kmp_info_t *this_thr = __kmp_threads[global_tid];
1935  kmp_team_t *team = this_thr->th.th_team;
1936  int tid = __kmp_tid_from_gtid(global_tid);
1937 
1938  if (ompt_enabled.enabled) {
1939  if (rc) {
1940  if (ompt_enabled.ompt_callback_work) {
1941  ompt_callbacks.ompt_callback(ompt_callback_work)(
1942  ompt_work_single_executor, ompt_scope_begin,
1943  &(team->t.ompt_team_info.parallel_data),
1944  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1945  1, OMPT_GET_RETURN_ADDRESS(0));
1946  }
1947  } else {
1948  if (ompt_enabled.ompt_callback_work) {
1949  ompt_callbacks.ompt_callback(ompt_callback_work)(
1950  ompt_work_single_other, ompt_scope_begin,
1951  &(team->t.ompt_team_info.parallel_data),
1952  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1953  1, OMPT_GET_RETURN_ADDRESS(0));
1954  ompt_callbacks.ompt_callback(ompt_callback_work)(
1955  ompt_work_single_other, ompt_scope_end,
1956  &(team->t.ompt_team_info.parallel_data),
1957  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1958  1, OMPT_GET_RETURN_ADDRESS(0));
1959  }
1960  }
1961  }
1962 #endif
1963 
1964  return rc;
1965 }
1966 
1976 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1977  __kmp_assert_valid_gtid(global_tid);
1978  __kmp_exit_single(global_tid);
1979  KMP_POP_PARTITIONED_TIMER();
1980 
1981 #if OMPT_SUPPORT && OMPT_OPTIONAL
1982  kmp_info_t *this_thr = __kmp_threads[global_tid];
1983  kmp_team_t *team = this_thr->th.th_team;
1984  int tid = __kmp_tid_from_gtid(global_tid);
1985 
1986  if (ompt_enabled.ompt_callback_work) {
1987  ompt_callbacks.ompt_callback(ompt_callback_work)(
1988  ompt_work_single_executor, ompt_scope_end,
1989  &(team->t.ompt_team_info.parallel_data),
1990  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1991  OMPT_GET_RETURN_ADDRESS(0));
1992  }
1993 #endif
1994 }
1995 
2003 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
2004  KMP_POP_PARTITIONED_TIMER();
2005  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
2006 
2007 #if OMPT_SUPPORT && OMPT_OPTIONAL
2008  if (ompt_enabled.ompt_callback_work) {
2009  ompt_work_t ompt_work_type = ompt_work_loop_static;
2010  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2011  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2012  // Determine workshare type
2013  if (loc != NULL) {
2014  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
2015  ompt_work_type = ompt_work_loop_static;
2016  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
2017  ompt_work_type = ompt_work_sections;
2018  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
2019  ompt_work_type = ompt_work_distribute;
2020  } else {
2021  // use default set above.
2022  // a warning about this case is provided in __kmpc_for_static_init
2023  }
2024  KMP_DEBUG_ASSERT(ompt_work_type);
2025  }
2026  ompt_callbacks.ompt_callback(ompt_callback_work)(
2027  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2029  }
2030 #endif
2031  if (__kmp_env_consistency_check)
2032  __kmp_pop_workshare(global_tid, ct_pdo, loc);
2033 }
2034 
2035 // User routines which take C-style arguments (call by value)
2036 // different from the Fortran equivalent routines
2037 
2038 void ompc_set_num_threads(int arg) {
2039  // !!!!! TODO: check the per-task binding
2040  __kmp_set_num_threads(arg, __kmp_entry_gtid());
2041 }
2042 
2043 void ompc_set_dynamic(int flag) {
2044  kmp_info_t *thread;
2045 
2046  /* For the thread-private implementation of the internal controls */
2047  thread = __kmp_entry_thread();
2048 
2049  __kmp_save_internal_controls(thread);
2050 
2051  set__dynamic(thread, flag ? true : false);
2052 }
2053 
2054 void ompc_set_nested(int flag) {
2055  kmp_info_t *thread;
2056 
2057  /* For the thread-private internal controls implementation */
2058  thread = __kmp_entry_thread();
2059 
2060  __kmp_save_internal_controls(thread);
2061 
2062  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2063 }
2064 
2065 void ompc_set_max_active_levels(int max_active_levels) {
2066  /* TO DO */
2067  /* we want per-task implementation of this internal control */
2068 
2069  /* For the per-thread internal controls implementation */
2070  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2071 }
2072 
2073 void ompc_set_schedule(omp_sched_t kind, int modifier) {
2074  // !!!!! TODO: check the per-task binding
2075  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2076 }
2077 
2078 int ompc_get_ancestor_thread_num(int level) {
2079  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2080 }
2081 
2082 int ompc_get_team_size(int level) {
2083  return __kmp_get_team_size(__kmp_entry_gtid(), level);
2084 }
2085 
2086 /* OpenMP 5.0 Affinity Format API */
2087 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2088  if (!__kmp_init_serial) {
2089  __kmp_serial_initialize();
2090  }
2091  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2092  format, KMP_STRLEN(format) + 1);
2093 }
2094 
2095 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2096  size_t format_size;
2097  if (!__kmp_init_serial) {
2098  __kmp_serial_initialize();
2099  }
2100  format_size = KMP_STRLEN(__kmp_affinity_format);
2101  if (buffer && size) {
2102  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2103  format_size + 1);
2104  }
2105  return format_size;
2106 }
2107 
2108 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2109  int gtid;
2110  if (!TCR_4(__kmp_init_middle)) {
2111  __kmp_middle_initialize();
2112  }
2113  __kmp_assign_root_init_mask();
2114  gtid = __kmp_get_gtid();
2115 #if KMP_AFFINITY_SUPPORTED
2116  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117  __kmp_affinity.flags.reset) {
2118  __kmp_reset_root_init_mask(gtid);
2119  }
2120 #endif
2121  __kmp_aux_display_affinity(gtid, format);
2122 }
2123 
2124 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2125  char const *format) {
2126  int gtid;
2127  size_t num_required;
2128  kmp_str_buf_t capture_buf;
2129  if (!TCR_4(__kmp_init_middle)) {
2130  __kmp_middle_initialize();
2131  }
2132  __kmp_assign_root_init_mask();
2133  gtid = __kmp_get_gtid();
2134 #if KMP_AFFINITY_SUPPORTED
2135  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136  __kmp_affinity.flags.reset) {
2137  __kmp_reset_root_init_mask(gtid);
2138  }
2139 #endif
2140  __kmp_str_buf_init(&capture_buf);
2141  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142  if (buffer && buf_size) {
2143  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144  capture_buf.used + 1);
2145  }
2146  __kmp_str_buf_free(&capture_buf);
2147  return num_required;
2148 }
2149 
2150 void kmpc_set_stacksize(int arg) {
2151  // __kmp_aux_set_stacksize initializes the library if needed
2152  __kmp_aux_set_stacksize(arg);
2153 }
2154 
2155 void kmpc_set_stacksize_s(size_t arg) {
2156  // __kmp_aux_set_stacksize initializes the library if needed
2157  __kmp_aux_set_stacksize(arg);
2158 }
2159 
2160 void kmpc_set_blocktime(int arg) {
2161  int gtid, tid, bt = arg;
2162  kmp_info_t *thread;
2163 
2164  gtid = __kmp_entry_gtid();
2165  tid = __kmp_tid_from_gtid(gtid);
2166  thread = __kmp_thread_from_gtid(gtid);
2167 
2168  __kmp_aux_convert_blocktime(&bt);
2169  __kmp_aux_set_blocktime(bt, thread, tid);
2170 }
2171 
2172 void kmpc_set_library(int arg) {
2173  // __kmp_user_set_library initializes the library if needed
2174  __kmp_user_set_library((enum library_type)arg);
2175 }
2176 
2177 void kmpc_set_defaults(char const *str) {
2178  // __kmp_aux_set_defaults initializes the library if needed
2179  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2180 }
2181 
2182 void kmpc_set_disp_num_buffers(int arg) {
2183  // ignore after initialization because some teams have already
2184  // allocated dispatch buffers
2185  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2186  arg <= KMP_MAX_DISP_NUM_BUFF) {
2187  __kmp_dispatch_num_buffers = arg;
2188  }
2189 }
2190 
2191 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2192 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2193  return -1;
2194 #else
2195  if (!TCR_4(__kmp_init_middle)) {
2196  __kmp_middle_initialize();
2197  }
2198  __kmp_assign_root_init_mask();
2199  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2200 #endif
2201 }
2202 
2203 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2204 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2205  return -1;
2206 #else
2207  if (!TCR_4(__kmp_init_middle)) {
2208  __kmp_middle_initialize();
2209  }
2210  __kmp_assign_root_init_mask();
2211  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2212 #endif
2213 }
2214 
2215 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2216 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2217  return -1;
2218 #else
2219  if (!TCR_4(__kmp_init_middle)) {
2220  __kmp_middle_initialize();
2221  }
2222  __kmp_assign_root_init_mask();
2223  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2224 #endif
2225 }
2226 
2227 /* -------------------------------------------------------------------------- */
2272 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2273  void *cpy_data, void (*cpy_func)(void *, void *),
2274  kmp_int32 didit) {
2275  void **data_ptr;
2276  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2277  __kmp_assert_valid_gtid(gtid);
2278 
2279  KMP_MB();
2280 
2281  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2282 
2283  if (__kmp_env_consistency_check) {
2284  if (loc == 0) {
2285  KMP_WARNING(ConstructIdentInvalid);
2286  }
2287  }
2288 
2289  // ToDo: Optimize the following two barriers into some kind of split barrier
2290 
2291  if (didit)
2292  *data_ptr = cpy_data;
2293 
2294 #if OMPT_SUPPORT
2295  ompt_frame_t *ompt_frame;
2296  if (ompt_enabled.enabled) {
2297  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298  if (ompt_frame->enter_frame.ptr == NULL)
2299  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2300  }
2301  OMPT_STORE_RETURN_ADDRESS(gtid);
2302 #endif
2303 /* This barrier is not a barrier region boundary */
2304 #if USE_ITT_NOTIFY
2305  __kmp_threads[gtid]->th.th_ident = loc;
2306 #endif
2307  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2308 
2309  if (!didit)
2310  (*cpy_func)(cpy_data, *data_ptr);
2311 
2312  // Consider next barrier a user-visible barrier for barrier region boundaries
2313  // Nesting checks are already handled by the single construct checks
2314  {
2315 #if OMPT_SUPPORT
2316  OMPT_STORE_RETURN_ADDRESS(gtid);
2317 #endif
2318 #if USE_ITT_NOTIFY
2319  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2320 // tasks can overwrite the location)
2321 #endif
2322  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323 #if OMPT_SUPPORT && OMPT_OPTIONAL
2324  if (ompt_enabled.enabled) {
2325  ompt_frame->enter_frame = ompt_data_none;
2326  }
2327 #endif
2328  }
2329 }
2330 
2331 /* --------------------------------------------------------------------------*/
2348 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2349  void **data_ptr;
2350 
2351  KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2352 
2353  KMP_MB();
2354 
2355  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2356 
2357  if (__kmp_env_consistency_check) {
2358  if (loc == 0) {
2359  KMP_WARNING(ConstructIdentInvalid);
2360  }
2361  }
2362 
2363  // ToDo: Optimize the following barrier
2364 
2365  if (cpy_data)
2366  *data_ptr = cpy_data;
2367 
2368 #if OMPT_SUPPORT
2369  ompt_frame_t *ompt_frame;
2370  if (ompt_enabled.enabled) {
2371  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372  if (ompt_frame->enter_frame.ptr == NULL)
2373  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374  OMPT_STORE_RETURN_ADDRESS(gtid);
2375  }
2376 #endif
2377 /* This barrier is not a barrier region boundary */
2378 #if USE_ITT_NOTIFY
2379  __kmp_threads[gtid]->th.th_ident = loc;
2380 #endif
2381  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2382 
2383  return *data_ptr;
2384 }
2385 
2386 /* -------------------------------------------------------------------------- */
2387 
2388 #define INIT_LOCK __kmp_init_user_lock_with_checks
2389 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393 #define ACQUIRE_NESTED_LOCK_TIMED \
2394  __kmp_acquire_nested_user_lock_with_checks_timed
2395 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397 #define TEST_LOCK __kmp_test_user_lock_with_checks
2398 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2401 
2402 // TODO: Make check abort messages use location info & pass it into
2403 // with_checks routines
2404 
2405 #if KMP_USE_DYNAMIC_LOCK
2406 
2407 // internal lock initializer
2408 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2409  kmp_dyna_lockseq_t seq) {
2410  if (KMP_IS_D_LOCK(seq)) {
2411  KMP_INIT_D_LOCK(lock, seq);
2412 #if USE_ITT_BUILD
2413  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2414 #endif
2415  } else {
2416  KMP_INIT_I_LOCK(lock, seq);
2417 #if USE_ITT_BUILD
2418  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419  __kmp_itt_lock_creating(ilk->lock, loc);
2420 #endif
2421  }
2422 }
2423 
2424 // internal nest lock initializer
2425 static __forceinline void
2426 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2427  kmp_dyna_lockseq_t seq) {
2428 #if KMP_USE_TSX
2429  // Don't have nested lock implementation for speculative locks
2430  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432  seq = __kmp_user_lock_seq;
2433 #endif
2434  switch (seq) {
2435  case lockseq_tas:
2436  seq = lockseq_nested_tas;
2437  break;
2438 #if KMP_USE_FUTEX
2439  case lockseq_futex:
2440  seq = lockseq_nested_futex;
2441  break;
2442 #endif
2443  case lockseq_ticket:
2444  seq = lockseq_nested_ticket;
2445  break;
2446  case lockseq_queuing:
2447  seq = lockseq_nested_queuing;
2448  break;
2449  case lockseq_drdpa:
2450  seq = lockseq_nested_drdpa;
2451  break;
2452  default:
2453  seq = lockseq_nested_queuing;
2454  }
2455  KMP_INIT_I_LOCK(lock, seq);
2456 #if USE_ITT_BUILD
2457  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458  __kmp_itt_lock_creating(ilk->lock, loc);
2459 #endif
2460 }
2461 
2462 /* initialize the lock with a hint */
2463 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2464  uintptr_t hint) {
2465  KMP_DEBUG_ASSERT(__kmp_init_serial);
2466  if (__kmp_env_consistency_check && user_lock == NULL) {
2467  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2468  }
2469 
2470  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2471 
2472 #if OMPT_SUPPORT && OMPT_OPTIONAL
2473  // This is the case, if called from omp_init_lock_with_hint:
2474  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475  if (!codeptr)
2476  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477  if (ompt_enabled.ompt_callback_lock_init) {
2478  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479  ompt_mutex_lock, (omp_lock_hint_t)hint,
2480  __ompt_get_mutex_impl_type(user_lock),
2481  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482  }
2483 #endif
2484 }
2485 
2486 /* initialize the lock with a hint */
2487 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2488  void **user_lock, uintptr_t hint) {
2489  KMP_DEBUG_ASSERT(__kmp_init_serial);
2490  if (__kmp_env_consistency_check && user_lock == NULL) {
2491  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2492  }
2493 
2494  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2495 
2496 #if OMPT_SUPPORT && OMPT_OPTIONAL
2497  // This is the case, if called from omp_init_lock_with_hint:
2498  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499  if (!codeptr)
2500  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501  if (ompt_enabled.ompt_callback_lock_init) {
2502  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504  __ompt_get_mutex_impl_type(user_lock),
2505  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2506  }
2507 #endif
2508 }
2509 
2510 #endif // KMP_USE_DYNAMIC_LOCK
2511 
2512 /* initialize the lock */
2513 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2514 #if KMP_USE_DYNAMIC_LOCK
2515 
2516  KMP_DEBUG_ASSERT(__kmp_init_serial);
2517  if (__kmp_env_consistency_check && user_lock == NULL) {
2518  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2519  }
2520  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2521 
2522 #if OMPT_SUPPORT && OMPT_OPTIONAL
2523  // This is the case, if called from omp_init_lock_with_hint:
2524  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525  if (!codeptr)
2526  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527  if (ompt_enabled.ompt_callback_lock_init) {
2528  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529  ompt_mutex_lock, omp_lock_hint_none,
2530  __ompt_get_mutex_impl_type(user_lock),
2531  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2532  }
2533 #endif
2534 
2535 #else // KMP_USE_DYNAMIC_LOCK
2536 
2537  static char const *const func = "omp_init_lock";
2538  kmp_user_lock_p lck;
2539  KMP_DEBUG_ASSERT(__kmp_init_serial);
2540 
2541  if (__kmp_env_consistency_check) {
2542  if (user_lock == NULL) {
2543  KMP_FATAL(LockIsUninitialized, func);
2544  }
2545  }
2546 
2547  KMP_CHECK_USER_LOCK_INIT();
2548 
2549  if ((__kmp_user_lock_kind == lk_tas) &&
2550  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551  lck = (kmp_user_lock_p)user_lock;
2552  }
2553 #if KMP_USE_FUTEX
2554  else if ((__kmp_user_lock_kind == lk_futex) &&
2555  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556  lck = (kmp_user_lock_p)user_lock;
2557  }
2558 #endif
2559  else {
2560  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2561  }
2562  INIT_LOCK(lck);
2563  __kmp_set_user_lock_location(lck, loc);
2564 
2565 #if OMPT_SUPPORT && OMPT_OPTIONAL
2566  // This is the case, if called from omp_init_lock_with_hint:
2567  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2568  if (!codeptr)
2569  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570  if (ompt_enabled.ompt_callback_lock_init) {
2571  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2574  }
2575 #endif
2576 
2577 #if USE_ITT_BUILD
2578  __kmp_itt_lock_creating(lck);
2579 #endif /* USE_ITT_BUILD */
2580 
2581 #endif // KMP_USE_DYNAMIC_LOCK
2582 } // __kmpc_init_lock
2583 
2584 /* initialize the lock */
2585 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2586 #if KMP_USE_DYNAMIC_LOCK
2587 
2588  KMP_DEBUG_ASSERT(__kmp_init_serial);
2589  if (__kmp_env_consistency_check && user_lock == NULL) {
2590  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2591  }
2592  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2593 
2594 #if OMPT_SUPPORT && OMPT_OPTIONAL
2595  // This is the case, if called from omp_init_lock_with_hint:
2596  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2597  if (!codeptr)
2598  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599  if (ompt_enabled.ompt_callback_lock_init) {
2600  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601  ompt_mutex_nest_lock, omp_lock_hint_none,
2602  __ompt_get_mutex_impl_type(user_lock),
2603  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2604  }
2605 #endif
2606 
2607 #else // KMP_USE_DYNAMIC_LOCK
2608 
2609  static char const *const func = "omp_init_nest_lock";
2610  kmp_user_lock_p lck;
2611  KMP_DEBUG_ASSERT(__kmp_init_serial);
2612 
2613  if (__kmp_env_consistency_check) {
2614  if (user_lock == NULL) {
2615  KMP_FATAL(LockIsUninitialized, func);
2616  }
2617  }
2618 
2619  KMP_CHECK_USER_LOCK_INIT();
2620 
2621  if ((__kmp_user_lock_kind == lk_tas) &&
2622  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2623  OMP_NEST_LOCK_T_SIZE)) {
2624  lck = (kmp_user_lock_p)user_lock;
2625  }
2626 #if KMP_USE_FUTEX
2627  else if ((__kmp_user_lock_kind == lk_futex) &&
2628  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2629  OMP_NEST_LOCK_T_SIZE)) {
2630  lck = (kmp_user_lock_p)user_lock;
2631  }
2632 #endif
2633  else {
2634  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2635  }
2636 
2637  INIT_NESTED_LOCK(lck);
2638  __kmp_set_user_lock_location(lck, loc);
2639 
2640 #if OMPT_SUPPORT && OMPT_OPTIONAL
2641  // This is the case, if called from omp_init_lock_with_hint:
2642  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643  if (!codeptr)
2644  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645  if (ompt_enabled.ompt_callback_lock_init) {
2646  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2649  }
2650 #endif
2651 
2652 #if USE_ITT_BUILD
2653  __kmp_itt_lock_creating(lck);
2654 #endif /* USE_ITT_BUILD */
2655 
2656 #endif // KMP_USE_DYNAMIC_LOCK
2657 } // __kmpc_init_nest_lock
2658 
2659 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2660 #if KMP_USE_DYNAMIC_LOCK
2661 
2662 #if USE_ITT_BUILD
2663  kmp_user_lock_p lck;
2664  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2666  } else {
2667  lck = (kmp_user_lock_p)user_lock;
2668  }
2669  __kmp_itt_lock_destroyed(lck);
2670 #endif
2671 #if OMPT_SUPPORT && OMPT_OPTIONAL
2672  // This is the case, if called from omp_init_lock_with_hint:
2673  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674  if (!codeptr)
2675  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676  if (ompt_enabled.ompt_callback_lock_destroy) {
2677  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2679  }
2680 #endif
2681  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2682 #else
2683  kmp_user_lock_p lck;
2684 
2685  if ((__kmp_user_lock_kind == lk_tas) &&
2686  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687  lck = (kmp_user_lock_p)user_lock;
2688  }
2689 #if KMP_USE_FUTEX
2690  else if ((__kmp_user_lock_kind == lk_futex) &&
2691  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692  lck = (kmp_user_lock_p)user_lock;
2693  }
2694 #endif
2695  else {
2696  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2697  }
2698 
2699 #if OMPT_SUPPORT && OMPT_OPTIONAL
2700  // This is the case, if called from omp_init_lock_with_hint:
2701  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2702  if (!codeptr)
2703  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704  if (ompt_enabled.ompt_callback_lock_destroy) {
2705  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2707  }
2708 #endif
2709 
2710 #if USE_ITT_BUILD
2711  __kmp_itt_lock_destroyed(lck);
2712 #endif /* USE_ITT_BUILD */
2713  DESTROY_LOCK(lck);
2714 
2715  if ((__kmp_user_lock_kind == lk_tas) &&
2716  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2717  ;
2718  }
2719 #if KMP_USE_FUTEX
2720  else if ((__kmp_user_lock_kind == lk_futex) &&
2721  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2722  ;
2723  }
2724 #endif
2725  else {
2726  __kmp_user_lock_free(user_lock, gtid, lck);
2727  }
2728 #endif // KMP_USE_DYNAMIC_LOCK
2729 } // __kmpc_destroy_lock
2730 
2731 /* destroy the lock */
2732 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2733 #if KMP_USE_DYNAMIC_LOCK
2734 
2735 #if USE_ITT_BUILD
2736  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737  __kmp_itt_lock_destroyed(ilk->lock);
2738 #endif
2739 #if OMPT_SUPPORT && OMPT_OPTIONAL
2740  // This is the case, if called from omp_init_lock_with_hint:
2741  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2742  if (!codeptr)
2743  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744  if (ompt_enabled.ompt_callback_lock_destroy) {
2745  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2747  }
2748 #endif
2749  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2750 
2751 #else // KMP_USE_DYNAMIC_LOCK
2752 
2753  kmp_user_lock_p lck;
2754 
2755  if ((__kmp_user_lock_kind == lk_tas) &&
2756  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2757  OMP_NEST_LOCK_T_SIZE)) {
2758  lck = (kmp_user_lock_p)user_lock;
2759  }
2760 #if KMP_USE_FUTEX
2761  else if ((__kmp_user_lock_kind == lk_futex) &&
2762  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2763  OMP_NEST_LOCK_T_SIZE)) {
2764  lck = (kmp_user_lock_p)user_lock;
2765  }
2766 #endif
2767  else {
2768  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2769  }
2770 
2771 #if OMPT_SUPPORT && OMPT_OPTIONAL
2772  // This is the case, if called from omp_init_lock_with_hint:
2773  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774  if (!codeptr)
2775  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776  if (ompt_enabled.ompt_callback_lock_destroy) {
2777  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779  }
2780 #endif
2781 
2782 #if USE_ITT_BUILD
2783  __kmp_itt_lock_destroyed(lck);
2784 #endif /* USE_ITT_BUILD */
2785 
2786  DESTROY_NESTED_LOCK(lck);
2787 
2788  if ((__kmp_user_lock_kind == lk_tas) &&
2789  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790  OMP_NEST_LOCK_T_SIZE)) {
2791  ;
2792  }
2793 #if KMP_USE_FUTEX
2794  else if ((__kmp_user_lock_kind == lk_futex) &&
2795  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2796  OMP_NEST_LOCK_T_SIZE)) {
2797  ;
2798  }
2799 #endif
2800  else {
2801  __kmp_user_lock_free(user_lock, gtid, lck);
2802  }
2803 #endif // KMP_USE_DYNAMIC_LOCK
2804 } // __kmpc_destroy_nest_lock
2805 
2806 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2807  KMP_COUNT_BLOCK(OMP_set_lock);
2808 #if KMP_USE_DYNAMIC_LOCK
2809  int tag = KMP_EXTRACT_D_TAG(user_lock);
2810 #if USE_ITT_BUILD
2811  __kmp_itt_lock_acquiring(
2812  (kmp_user_lock_p)
2813  user_lock); // itt function will get to the right lock object.
2814 #endif
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816  // This is the case, if called from omp_init_lock_with_hint:
2817  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818  if (!codeptr)
2819  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820  if (ompt_enabled.ompt_callback_mutex_acquire) {
2821  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822  ompt_mutex_lock, omp_lock_hint_none,
2823  __ompt_get_mutex_impl_type(user_lock),
2824  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2825  }
2826 #endif
2827 #if KMP_USE_INLINED_TAS
2828  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2830  } else
2831 #elif KMP_USE_INLINED_FUTEX
2832  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2834  } else
2835 #endif
2836  {
2837  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2838  }
2839 #if USE_ITT_BUILD
2840  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2841 #endif
2842 #if OMPT_SUPPORT && OMPT_OPTIONAL
2843  if (ompt_enabled.ompt_callback_mutex_acquired) {
2844  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2846  }
2847 #endif
2848 
2849 #else // KMP_USE_DYNAMIC_LOCK
2850 
2851  kmp_user_lock_p lck;
2852 
2853  if ((__kmp_user_lock_kind == lk_tas) &&
2854  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855  lck = (kmp_user_lock_p)user_lock;
2856  }
2857 #if KMP_USE_FUTEX
2858  else if ((__kmp_user_lock_kind == lk_futex) &&
2859  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860  lck = (kmp_user_lock_p)user_lock;
2861  }
2862 #endif
2863  else {
2864  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2865  }
2866 
2867 #if USE_ITT_BUILD
2868  __kmp_itt_lock_acquiring(lck);
2869 #endif /* USE_ITT_BUILD */
2870 #if OMPT_SUPPORT && OMPT_OPTIONAL
2871  // This is the case, if called from omp_init_lock_with_hint:
2872  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873  if (!codeptr)
2874  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875  if (ompt_enabled.ompt_callback_mutex_acquire) {
2876  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2879  }
2880 #endif
2881 
2882  ACQUIRE_LOCK(lck, gtid);
2883 
2884 #if USE_ITT_BUILD
2885  __kmp_itt_lock_acquired(lck);
2886 #endif /* USE_ITT_BUILD */
2887 
2888 #if OMPT_SUPPORT && OMPT_OPTIONAL
2889  if (ompt_enabled.ompt_callback_mutex_acquired) {
2890  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2892  }
2893 #endif
2894 
2895 #endif // KMP_USE_DYNAMIC_LOCK
2896 }
2897 
2898 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2899 #if KMP_USE_DYNAMIC_LOCK
2900 
2901 #if USE_ITT_BUILD
2902  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2903 #endif
2904 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905  // This is the case, if called from omp_init_lock_with_hint:
2906  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907  if (!codeptr)
2908  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909  if (ompt_enabled.enabled) {
2910  if (ompt_enabled.ompt_callback_mutex_acquire) {
2911  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912  ompt_mutex_nest_lock, omp_lock_hint_none,
2913  __ompt_get_mutex_impl_type(user_lock),
2914  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915  }
2916  }
2917 #endif
2918  int acquire_status =
2919  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920  (void)acquire_status;
2921 #if USE_ITT_BUILD
2922  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2923 #endif
2924 
2925 #if OMPT_SUPPORT && OMPT_OPTIONAL
2926  if (ompt_enabled.enabled) {
2927  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928  if (ompt_enabled.ompt_callback_mutex_acquired) {
2929  // lock_first
2930  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2932  codeptr);
2933  }
2934  } else {
2935  if (ompt_enabled.ompt_callback_nest_lock) {
2936  // lock_next
2937  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2939  }
2940  }
2941  }
2942 #endif
2943 
2944 #else // KMP_USE_DYNAMIC_LOCK
2945  int acquire_status;
2946  kmp_user_lock_p lck;
2947 
2948  if ((__kmp_user_lock_kind == lk_tas) &&
2949  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2950  OMP_NEST_LOCK_T_SIZE)) {
2951  lck = (kmp_user_lock_p)user_lock;
2952  }
2953 #if KMP_USE_FUTEX
2954  else if ((__kmp_user_lock_kind == lk_futex) &&
2955  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2956  OMP_NEST_LOCK_T_SIZE)) {
2957  lck = (kmp_user_lock_p)user_lock;
2958  }
2959 #endif
2960  else {
2961  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2962  }
2963 
2964 #if USE_ITT_BUILD
2965  __kmp_itt_lock_acquiring(lck);
2966 #endif /* USE_ITT_BUILD */
2967 #if OMPT_SUPPORT && OMPT_OPTIONAL
2968  // This is the case, if called from omp_init_lock_with_hint:
2969  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970  if (!codeptr)
2971  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972  if (ompt_enabled.enabled) {
2973  if (ompt_enabled.ompt_callback_mutex_acquire) {
2974  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975  ompt_mutex_nest_lock, omp_lock_hint_none,
2976  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2977  codeptr);
2978  }
2979  }
2980 #endif
2981 
2982  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2983 
2984 #if USE_ITT_BUILD
2985  __kmp_itt_lock_acquired(lck);
2986 #endif /* USE_ITT_BUILD */
2987 
2988 #if OMPT_SUPPORT && OMPT_OPTIONAL
2989  if (ompt_enabled.enabled) {
2990  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991  if (ompt_enabled.ompt_callback_mutex_acquired) {
2992  // lock_first
2993  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2995  }
2996  } else {
2997  if (ompt_enabled.ompt_callback_nest_lock) {
2998  // lock_next
2999  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3001  }
3002  }
3003  }
3004 #endif
3005 
3006 #endif // KMP_USE_DYNAMIC_LOCK
3007 }
3008 
3009 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3010 #if KMP_USE_DYNAMIC_LOCK
3011 
3012  int tag = KMP_EXTRACT_D_TAG(user_lock);
3013 #if USE_ITT_BUILD
3014  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3015 #endif
3016 #if KMP_USE_INLINED_TAS
3017  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3019  } else
3020 #elif KMP_USE_INLINED_FUTEX
3021  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3023  } else
3024 #endif
3025  {
3026  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3027  }
3028 
3029 #if OMPT_SUPPORT && OMPT_OPTIONAL
3030  // This is the case, if called from omp_init_lock_with_hint:
3031  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032  if (!codeptr)
3033  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034  if (ompt_enabled.ompt_callback_mutex_released) {
3035  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3037  }
3038 #endif
3039 
3040 #else // KMP_USE_DYNAMIC_LOCK
3041 
3042  kmp_user_lock_p lck;
3043 
3044  /* Can't use serial interval since not block structured */
3045  /* release the lock */
3046 
3047  if ((__kmp_user_lock_kind == lk_tas) &&
3048  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049 #if KMP_OS_LINUX && \
3050  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3051 // "fast" path implemented to fix customer performance issue
3052 #if USE_ITT_BUILD
3053  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054 #endif /* USE_ITT_BUILD */
3055  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3056  KMP_MB();
3057 
3058 #if OMPT_SUPPORT && OMPT_OPTIONAL
3059  // This is the case, if called from omp_init_lock_with_hint:
3060  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061  if (!codeptr)
3062  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063  if (ompt_enabled.ompt_callback_mutex_released) {
3064  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3066  }
3067 #endif
3068 
3069  return;
3070 #else
3071  lck = (kmp_user_lock_p)user_lock;
3072 #endif
3073  }
3074 #if KMP_USE_FUTEX
3075  else if ((__kmp_user_lock_kind == lk_futex) &&
3076  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077  lck = (kmp_user_lock_p)user_lock;
3078  }
3079 #endif
3080  else {
3081  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3082  }
3083 
3084 #if USE_ITT_BUILD
3085  __kmp_itt_lock_releasing(lck);
3086 #endif /* USE_ITT_BUILD */
3087 
3088  RELEASE_LOCK(lck, gtid);
3089 
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091  // This is the case, if called from omp_init_lock_with_hint:
3092  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093  if (!codeptr)
3094  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095  if (ompt_enabled.ompt_callback_mutex_released) {
3096  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098  }
3099 #endif
3100 
3101 #endif // KMP_USE_DYNAMIC_LOCK
3102 }
3103 
3104 /* release the lock */
3105 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106 #if KMP_USE_DYNAMIC_LOCK
3107 
3108 #if USE_ITT_BUILD
3109  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3110 #endif
3111  int release_status =
3112  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113  (void)release_status;
3114 
3115 #if OMPT_SUPPORT && OMPT_OPTIONAL
3116  // This is the case, if called from omp_init_lock_with_hint:
3117  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118  if (!codeptr)
3119  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120  if (ompt_enabled.enabled) {
3121  if (release_status == KMP_LOCK_RELEASED) {
3122  if (ompt_enabled.ompt_callback_mutex_released) {
3123  // release_lock_last
3124  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3126  codeptr);
3127  }
3128  } else if (ompt_enabled.ompt_callback_nest_lock) {
3129  // release_lock_prev
3130  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3132  }
3133  }
3134 #endif
3135 
3136 #else // KMP_USE_DYNAMIC_LOCK
3137 
3138  kmp_user_lock_p lck;
3139 
3140  /* Can't use serial interval since not block structured */
3141 
3142  if ((__kmp_user_lock_kind == lk_tas) &&
3143  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3144  OMP_NEST_LOCK_T_SIZE)) {
3145 #if KMP_OS_LINUX && \
3146  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3147  // "fast" path implemented to fix customer performance issue
3148  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3149 #if USE_ITT_BUILD
3150  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3151 #endif /* USE_ITT_BUILD */
3152 
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3154  int release_status = KMP_LOCK_STILL_HELD;
3155 #endif
3156 
3157  if (--(tl->lk.depth_locked) == 0) {
3158  TCW_4(tl->lk.poll, 0);
3159 #if OMPT_SUPPORT && OMPT_OPTIONAL
3160  release_status = KMP_LOCK_RELEASED;
3161 #endif
3162  }
3163  KMP_MB();
3164 
3165 #if OMPT_SUPPORT && OMPT_OPTIONAL
3166  // This is the case, if called from omp_init_lock_with_hint:
3167  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3168  if (!codeptr)
3169  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170  if (ompt_enabled.enabled) {
3171  if (release_status == KMP_LOCK_RELEASED) {
3172  if (ompt_enabled.ompt_callback_mutex_released) {
3173  // release_lock_last
3174  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176  }
3177  } else if (ompt_enabled.ompt_callback_nest_lock) {
3178  // release_lock_previous
3179  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3181  }
3182  }
3183 #endif
3184 
3185  return;
3186 #else
3187  lck = (kmp_user_lock_p)user_lock;
3188 #endif
3189  }
3190 #if KMP_USE_FUTEX
3191  else if ((__kmp_user_lock_kind == lk_futex) &&
3192  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3193  OMP_NEST_LOCK_T_SIZE)) {
3194  lck = (kmp_user_lock_p)user_lock;
3195  }
3196 #endif
3197  else {
3198  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3199  }
3200 
3201 #if USE_ITT_BUILD
3202  __kmp_itt_lock_releasing(lck);
3203 #endif /* USE_ITT_BUILD */
3204 
3205  int release_status;
3206  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207 #if OMPT_SUPPORT && OMPT_OPTIONAL
3208  // This is the case, if called from omp_init_lock_with_hint:
3209  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3210  if (!codeptr)
3211  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212  if (ompt_enabled.enabled) {
3213  if (release_status == KMP_LOCK_RELEASED) {
3214  if (ompt_enabled.ompt_callback_mutex_released) {
3215  // release_lock_last
3216  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3218  }
3219  } else if (ompt_enabled.ompt_callback_nest_lock) {
3220  // release_lock_previous
3221  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3223  }
3224  }
3225 #endif
3226 
3227 #endif // KMP_USE_DYNAMIC_LOCK
3228 }
3229 
3230 /* try to acquire the lock */
3231 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3232  KMP_COUNT_BLOCK(OMP_test_lock);
3233 
3234 #if KMP_USE_DYNAMIC_LOCK
3235  int rc;
3236  int tag = KMP_EXTRACT_D_TAG(user_lock);
3237 #if USE_ITT_BUILD
3238  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3239 #endif
3240 #if OMPT_SUPPORT && OMPT_OPTIONAL
3241  // This is the case, if called from omp_init_lock_with_hint:
3242  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3243  if (!codeptr)
3244  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245  if (ompt_enabled.ompt_callback_mutex_acquire) {
3246  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247  ompt_mutex_test_lock, omp_lock_hint_none,
3248  __ompt_get_mutex_impl_type(user_lock),
3249  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3250  }
3251 #endif
3252 #if KMP_USE_INLINED_TAS
3253  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3255  } else
3256 #elif KMP_USE_INLINED_FUTEX
3257  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3259  } else
3260 #endif
3261  {
3262  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3263  }
3264  if (rc) {
3265 #if USE_ITT_BUILD
3266  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267 #endif
3268 #if OMPT_SUPPORT && OMPT_OPTIONAL
3269  if (ompt_enabled.ompt_callback_mutex_acquired) {
3270  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3272  }
3273 #endif
3274  return FTN_TRUE;
3275  } else {
3276 #if USE_ITT_BUILD
3277  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3278 #endif
3279  return FTN_FALSE;
3280  }
3281 
3282 #else // KMP_USE_DYNAMIC_LOCK
3283 
3284  kmp_user_lock_p lck;
3285  int rc;
3286 
3287  if ((__kmp_user_lock_kind == lk_tas) &&
3288  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289  lck = (kmp_user_lock_p)user_lock;
3290  }
3291 #if KMP_USE_FUTEX
3292  else if ((__kmp_user_lock_kind == lk_futex) &&
3293  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294  lck = (kmp_user_lock_p)user_lock;
3295  }
3296 #endif
3297  else {
3298  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3299  }
3300 
3301 #if USE_ITT_BUILD
3302  __kmp_itt_lock_acquiring(lck);
3303 #endif /* USE_ITT_BUILD */
3304 #if OMPT_SUPPORT && OMPT_OPTIONAL
3305  // This is the case, if called from omp_init_lock_with_hint:
3306  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3307  if (!codeptr)
3308  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309  if (ompt_enabled.ompt_callback_mutex_acquire) {
3310  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311  ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3313  }
3314 #endif
3315 
3316  rc = TEST_LOCK(lck, gtid);
3317 #if USE_ITT_BUILD
3318  if (rc) {
3319  __kmp_itt_lock_acquired(lck);
3320  } else {
3321  __kmp_itt_lock_cancelled(lck);
3322  }
3323 #endif /* USE_ITT_BUILD */
3324 #if OMPT_SUPPORT && OMPT_OPTIONAL
3325  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327  ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3328  }
3329 #endif
3330 
3331  return (rc ? FTN_TRUE : FTN_FALSE);
3332 
3333  /* Can't use serial interval since not block structured */
3334 
3335 #endif // KMP_USE_DYNAMIC_LOCK
3336 }
3337 
3338 /* try to acquire the lock */
3339 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3340 #if KMP_USE_DYNAMIC_LOCK
3341  int rc;
3342 #if USE_ITT_BUILD
3343  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3344 #endif
3345 #if OMPT_SUPPORT && OMPT_OPTIONAL
3346  // This is the case, if called from omp_init_lock_with_hint:
3347  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3348  if (!codeptr)
3349  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350  if (ompt_enabled.ompt_callback_mutex_acquire) {
3351  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353  __ompt_get_mutex_impl_type(user_lock),
3354  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3355  }
3356 #endif
3357  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3358 #if USE_ITT_BUILD
3359  if (rc) {
3360  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3361  } else {
3362  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3363  }
3364 #endif
3365 #if OMPT_SUPPORT && OMPT_OPTIONAL
3366  if (ompt_enabled.enabled && rc) {
3367  if (rc == 1) {
3368  if (ompt_enabled.ompt_callback_mutex_acquired) {
3369  // lock_first
3370  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3372  codeptr);
3373  }
3374  } else {
3375  if (ompt_enabled.ompt_callback_nest_lock) {
3376  // lock_next
3377  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3379  }
3380  }
3381  }
3382 #endif
3383  return rc;
3384 
3385 #else // KMP_USE_DYNAMIC_LOCK
3386 
3387  kmp_user_lock_p lck;
3388  int rc;
3389 
3390  if ((__kmp_user_lock_kind == lk_tas) &&
3391  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3392  OMP_NEST_LOCK_T_SIZE)) {
3393  lck = (kmp_user_lock_p)user_lock;
3394  }
3395 #if KMP_USE_FUTEX
3396  else if ((__kmp_user_lock_kind == lk_futex) &&
3397  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3398  OMP_NEST_LOCK_T_SIZE)) {
3399  lck = (kmp_user_lock_p)user_lock;
3400  }
3401 #endif
3402  else {
3403  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3404  }
3405 
3406 #if USE_ITT_BUILD
3407  __kmp_itt_lock_acquiring(lck);
3408 #endif /* USE_ITT_BUILD */
3409 
3410 #if OMPT_SUPPORT && OMPT_OPTIONAL
3411  // This is the case, if called from omp_init_lock_with_hint:
3412  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3413  if (!codeptr)
3414  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415  if (ompt_enabled.enabled) &&
3416  ompt_enabled.ompt_callback_mutex_acquire) {
3417  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418  ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3420  codeptr);
3421  }
3422 #endif
3423 
3424  rc = TEST_NESTED_LOCK(lck, gtid);
3425 #if USE_ITT_BUILD
3426  if (rc) {
3427  __kmp_itt_lock_acquired(lck);
3428  } else {
3429  __kmp_itt_lock_cancelled(lck);
3430  }
3431 #endif /* USE_ITT_BUILD */
3432 #if OMPT_SUPPORT && OMPT_OPTIONAL
3433  if (ompt_enabled.enabled && rc) {
3434  if (rc == 1) {
3435  if (ompt_enabled.ompt_callback_mutex_acquired) {
3436  // lock_first
3437  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438  ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3439  }
3440  } else {
3441  if (ompt_enabled.ompt_callback_nest_lock) {
3442  // lock_next
3443  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3445  }
3446  }
3447  }
3448 #endif
3449  return rc;
3450 
3451  /* Can't use serial interval since not block structured */
3452 
3453 #endif // KMP_USE_DYNAMIC_LOCK
3454 }
3455 
3456 // Interface to fast scalable reduce methods routines
3457 
3458 // keep the selected method in a thread local structure for cross-function
3459 // usage: will be used in __kmpc_end_reduce* functions;
3460 // another solution: to re-determine the method one more time in
3461 // __kmpc_end_reduce* functions (new prototype required then)
3462 // AT: which solution is better?
3463 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3465 
3466 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3467  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3468 
3469 // description of the packed_reduction_method variable: look at the macros in
3470 // kmp.h
3471 
3472 // used in a critical section reduce block
3473 static __forceinline void
3474 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3475  kmp_critical_name *crit) {
3476 
3477  // this lock was visible to a customer and to the threading profile tool as a
3478  // serial overhead span (although it's used for an internal purpose only)
3479  // why was it visible in previous implementation?
3480  // should we keep it visible in new reduce block?
3481  kmp_user_lock_p lck;
3482 
3483 #if KMP_USE_DYNAMIC_LOCK
3484 
3485  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3486  // Check if it is initialized.
3487  if (*lk == 0) {
3488  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489  (void)KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3490  KMP_GET_D_TAG(__kmp_user_lock_seq));
3491  } else {
3492  __kmp_init_indirect_csptr(crit, loc, global_tid,
3493  KMP_GET_I_TAG(__kmp_user_lock_seq));
3494  }
3495  }
3496  // Branch for accessing the actual lock object and set operation. This
3497  // branching is inevitable since this lock initialization does not follow the
3498  // normal dispatch path (lock table is not used).
3499  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500  lck = (kmp_user_lock_p)lk;
3501  KMP_DEBUG_ASSERT(lck != NULL);
3502  if (__kmp_env_consistency_check) {
3503  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3504  }
3505  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3506  } else {
3507  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3508  lck = ilk->lock;
3509  KMP_DEBUG_ASSERT(lck != NULL);
3510  if (__kmp_env_consistency_check) {
3511  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3512  }
3513  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3514  }
3515 
3516 #else // KMP_USE_DYNAMIC_LOCK
3517 
3518  // We know that the fast reduction code is only emitted by Intel compilers
3519  // with 32 byte critical sections. If there isn't enough space, then we
3520  // have to use a pointer.
3521  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3522  lck = (kmp_user_lock_p)crit;
3523  } else {
3524  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3525  }
3526  KMP_DEBUG_ASSERT(lck != NULL);
3527 
3528  if (__kmp_env_consistency_check)
3529  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3530 
3531  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3532 
3533 #endif // KMP_USE_DYNAMIC_LOCK
3534 }
3535 
3536 // used in a critical section reduce block
3537 static __forceinline void
3538 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3539  kmp_critical_name *crit) {
3540 
3541  kmp_user_lock_p lck;
3542 
3543 #if KMP_USE_DYNAMIC_LOCK
3544 
3545  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3546  lck = (kmp_user_lock_p)crit;
3547  if (__kmp_env_consistency_check)
3548  __kmp_pop_sync(global_tid, ct_critical, loc);
3549  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3550  } else {
3551  kmp_indirect_lock_t *ilk =
3552  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3553  if (__kmp_env_consistency_check)
3554  __kmp_pop_sync(global_tid, ct_critical, loc);
3555  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3556  }
3557 
3558 #else // KMP_USE_DYNAMIC_LOCK
3559 
3560  // We know that the fast reduction code is only emitted by Intel compilers
3561  // with 32 byte critical sections. If there isn't enough space, then we have
3562  // to use a pointer.
3563  if (__kmp_base_user_lock_size > 32) {
3564  lck = *((kmp_user_lock_p *)crit);
3565  KMP_ASSERT(lck != NULL);
3566  } else {
3567  lck = (kmp_user_lock_p)crit;
3568  }
3569 
3570  if (__kmp_env_consistency_check)
3571  __kmp_pop_sync(global_tid, ct_critical, loc);
3572 
3573  __kmp_release_user_lock_with_checks(lck, global_tid);
3574 
3575 #endif // KMP_USE_DYNAMIC_LOCK
3576 } // __kmp_end_critical_section_reduce_block
3577 
3578 static __forceinline int
3579 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3580  int *task_state) {
3581  kmp_team_t *team;
3582 
3583  // Check if we are inside the teams construct?
3584  if (th->th.th_teams_microtask) {
3585  *team_p = team = th->th.th_team;
3586  if (team->t.t_level == th->th.th_teams_level) {
3587  // This is reduction at teams construct.
3588  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3589  // Let's swap teams temporarily for the reduction.
3590  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591  th->th.th_team = team->t.t_parent;
3592  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594  *task_state = th->th.th_task_state;
3595  th->th.th_task_state = 0;
3596 
3597  return 1;
3598  }
3599  }
3600  return 0;
3601 }
3602 
3603 static __forceinline void
3604 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3605  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3606  th->th.th_info.ds.ds_tid = 0;
3607  th->th.th_team = team;
3608  th->th.th_team_nproc = team->t.t_nproc;
3609  th->th.th_task_team = team->t.t_task_team[task_state];
3610  __kmp_type_convert(task_state, &(th->th.th_task_state));
3611 }
3612 
3613 /* 2.a.i. Reduce Block without a terminating barrier */
3629 kmp_int32
3630 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3631  size_t reduce_size, void *reduce_data,
3632  void (*reduce_func)(void *lhs_data, void *rhs_data),
3633  kmp_critical_name *lck) {
3634 
3635  KMP_COUNT_BLOCK(REDUCE_nowait);
3636  int retval = 0;
3637  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638  kmp_info_t *th;
3639  kmp_team_t *team;
3640  int teams_swapped = 0, task_state;
3641  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642  __kmp_assert_valid_gtid(global_tid);
3643 
3644  // why do we need this initialization here at all?
3645  // Reduction clause can not be used as a stand-alone directive.
3646 
3647  // do not call __kmp_serial_initialize(), it will be called by
3648  // __kmp_parallel_initialize() if needed
3649  // possible detection of false-positive race by the threadchecker ???
3650  if (!TCR_4(__kmp_init_parallel))
3651  __kmp_parallel_initialize();
3652 
3653  __kmp_resume_if_soft_paused();
3654 
3655 // check correctness of reduce block nesting
3656 #if KMP_USE_DYNAMIC_LOCK
3657  if (__kmp_env_consistency_check)
3658  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3659 #else
3660  if (__kmp_env_consistency_check)
3661  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3662 #endif
3663 
3664  th = __kmp_thread_from_gtid(global_tid);
3665  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666 
3667  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3668  // the value should be kept in a variable
3669  // the variable should be either a construct-specific or thread-specific
3670  // property, not a team specific property
3671  // (a thread can reach the next reduce block on the next construct, reduce
3672  // method may differ on the next construct)
3673  // an ident_t "loc" parameter could be used as a construct-specific property
3674  // (what if loc == 0?)
3675  // (if both construct-specific and team-specific variables were shared,
3676  // then unness extra syncs should be needed)
3677  // a thread-specific variable is better regarding two issues above (next
3678  // construct and extra syncs)
3679  // a thread-specific "th_local.reduction_method" variable is used currently
3680  // each thread executes 'determine' and 'set' lines (no need to execute by one
3681  // thread, to avoid unness extra syncs)
3682 
3683  packed_reduction_method = __kmp_determine_reduction_method(
3684  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3686 
3687  OMPT_REDUCTION_DECL(th, global_tid);
3688  if (packed_reduction_method == critical_reduce_block) {
3689 
3690  OMPT_REDUCTION_BEGIN;
3691 
3692  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3693  retval = 1;
3694 
3695  } else if (packed_reduction_method == empty_reduce_block) {
3696 
3697  OMPT_REDUCTION_BEGIN;
3698 
3699  // usage: if team size == 1, no synchronization is required ( Intel
3700  // platforms only )
3701  retval = 1;
3702 
3703  } else if (packed_reduction_method == atomic_reduce_block) {
3704 
3705  retval = 2;
3706 
3707  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3708  // won't be called by the code gen)
3709  // (it's not quite good, because the checking block has been closed by
3710  // this 'pop',
3711  // but atomic operation has not been executed yet, will be executed
3712  // slightly later, literally on next instruction)
3713  if (__kmp_env_consistency_check)
3714  __kmp_pop_sync(global_tid, ct_reduce, loc);
3715 
3716  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3717  tree_reduce_block)) {
3718 
3719 // AT: performance issue: a real barrier here
3720 // AT: (if primary thread is slow, other threads are blocked here waiting for
3721 // the primary thread to come and release them)
3722 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3723 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3724 // be confusing to a customer)
3725 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3726 // might go faster and be more in line with sense of NOWAIT
3727 // AT: TO DO: do epcc test and compare times
3728 
3729 // this barrier should be invisible to a customer and to the threading profile
3730 // tool (it's neither a terminating barrier nor customer's code, it's
3731 // used for an internal purpose)
3732 #if OMPT_SUPPORT
3733  // JP: can this barrier potentially leed to task scheduling?
3734  // JP: as long as there is a barrier in the implementation, OMPT should and
3735  // will provide the barrier events
3736  // so we set-up the necessary frame/return addresses.
3737  ompt_frame_t *ompt_frame;
3738  if (ompt_enabled.enabled) {
3739  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740  if (ompt_frame->enter_frame.ptr == NULL)
3741  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3742  }
3743  OMPT_STORE_RETURN_ADDRESS(global_tid);
3744 #endif
3745 #if USE_ITT_NOTIFY
3746  __kmp_threads[global_tid]->th.th_ident = loc;
3747 #endif
3748  retval =
3749  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751  retval = (retval != 0) ? (0) : (1);
3752 #if OMPT_SUPPORT && OMPT_OPTIONAL
3753  if (ompt_enabled.enabled) {
3754  ompt_frame->enter_frame = ompt_data_none;
3755  }
3756 #endif
3757 
3758  // all other workers except primary thread should do this pop here
3759  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3760  if (__kmp_env_consistency_check) {
3761  if (retval == 0) {
3762  __kmp_pop_sync(global_tid, ct_reduce, loc);
3763  }
3764  }
3765 
3766  } else {
3767 
3768  // should never reach this block
3769  KMP_ASSERT(0); // "unexpected method"
3770  }
3771  if (teams_swapped) {
3772  __kmp_restore_swapped_teams(th, team, task_state);
3773  }
3774  KA_TRACE(
3775  10,
3776  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777  global_tid, packed_reduction_method, retval));
3778 
3779  return retval;
3780 }
3781 
3790 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3791  kmp_critical_name *lck) {
3792 
3793  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3794 
3795  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796  __kmp_assert_valid_gtid(global_tid);
3797 
3798  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3799 
3800  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3801 
3802  if (packed_reduction_method == critical_reduce_block) {
3803 
3804  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3805  OMPT_REDUCTION_END;
3806 
3807  } else if (packed_reduction_method == empty_reduce_block) {
3808 
3809  // usage: if team size == 1, no synchronization is required ( on Intel
3810  // platforms only )
3811 
3812  OMPT_REDUCTION_END;
3813 
3814  } else if (packed_reduction_method == atomic_reduce_block) {
3815 
3816  // neither primary thread nor other workers should get here
3817  // (code gen does not generate this call in case 2: atomic reduce block)
3818  // actually it's better to remove this elseif at all;
3819  // after removal this value will checked by the 'else' and will assert
3820 
3821  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3822  tree_reduce_block)) {
3823 
3824  // only primary thread gets here
3825  // OMPT: tree reduction is annotated in the barrier code
3826 
3827  } else {
3828 
3829  // should never reach this block
3830  KMP_ASSERT(0); // "unexpected method"
3831  }
3832 
3833  if (__kmp_env_consistency_check)
3834  __kmp_pop_sync(global_tid, ct_reduce, loc);
3835 
3836  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837  global_tid, packed_reduction_method));
3838 
3839  return;
3840 }
3841 
3842 /* 2.a.ii. Reduce Block with a terminating barrier */
3843 
3859 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3860  size_t reduce_size, void *reduce_data,
3861  void (*reduce_func)(void *lhs_data, void *rhs_data),
3862  kmp_critical_name *lck) {
3863  KMP_COUNT_BLOCK(REDUCE_wait);
3864  int retval = 0;
3865  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3866  kmp_info_t *th;
3867  kmp_team_t *team;
3868  int teams_swapped = 0, task_state;
3869 
3870  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3871  __kmp_assert_valid_gtid(global_tid);
3872 
3873  // why do we need this initialization here at all?
3874  // Reduction clause can not be a stand-alone directive.
3875 
3876  // do not call __kmp_serial_initialize(), it will be called by
3877  // __kmp_parallel_initialize() if needed
3878  // possible detection of false-positive race by the threadchecker ???
3879  if (!TCR_4(__kmp_init_parallel))
3880  __kmp_parallel_initialize();
3881 
3882  __kmp_resume_if_soft_paused();
3883 
3884 // check correctness of reduce block nesting
3885 #if KMP_USE_DYNAMIC_LOCK
3886  if (__kmp_env_consistency_check)
3887  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3888 #else
3889  if (__kmp_env_consistency_check)
3890  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3891 #endif
3892 
3893  th = __kmp_thread_from_gtid(global_tid);
3894  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3895 
3896  packed_reduction_method = __kmp_determine_reduction_method(
3897  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3899 
3900  OMPT_REDUCTION_DECL(th, global_tid);
3901 
3902  if (packed_reduction_method == critical_reduce_block) {
3903 
3904  OMPT_REDUCTION_BEGIN;
3905  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3906  retval = 1;
3907 
3908  } else if (packed_reduction_method == empty_reduce_block) {
3909 
3910  OMPT_REDUCTION_BEGIN;
3911  // usage: if team size == 1, no synchronization is required ( Intel
3912  // platforms only )
3913  retval = 1;
3914 
3915  } else if (packed_reduction_method == atomic_reduce_block) {
3916 
3917  retval = 2;
3918 
3919  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920  tree_reduce_block)) {
3921 
3922 // case tree_reduce_block:
3923 // this barrier should be visible to a customer and to the threading profile
3924 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3925 #if OMPT_SUPPORT
3926  ompt_frame_t *ompt_frame;
3927  if (ompt_enabled.enabled) {
3928  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929  if (ompt_frame->enter_frame.ptr == NULL)
3930  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3931  }
3932  OMPT_STORE_RETURN_ADDRESS(global_tid);
3933 #endif
3934 #if USE_ITT_NOTIFY
3935  __kmp_threads[global_tid]->th.th_ident =
3936  loc; // needed for correct notification of frames
3937 #endif
3938  retval =
3939  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941  retval = (retval != 0) ? (0) : (1);
3942 #if OMPT_SUPPORT && OMPT_OPTIONAL
3943  if (ompt_enabled.enabled) {
3944  ompt_frame->enter_frame = ompt_data_none;
3945  }
3946 #endif
3947 
3948  // all other workers except primary thread should do this pop here
3949  // (none of other workers except primary will enter __kmpc_end_reduce())
3950  if (__kmp_env_consistency_check) {
3951  if (retval == 0) { // 0: all other workers; 1: primary thread
3952  __kmp_pop_sync(global_tid, ct_reduce, loc);
3953  }
3954  }
3955 
3956  } else {
3957 
3958  // should never reach this block
3959  KMP_ASSERT(0); // "unexpected method"
3960  }
3961  if (teams_swapped) {
3962  __kmp_restore_swapped_teams(th, team, task_state);
3963  }
3964 
3965  KA_TRACE(10,
3966  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967  global_tid, packed_reduction_method, retval));
3968  return retval;
3969 }
3970 
3981 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3982  kmp_critical_name *lck) {
3983 
3984  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3985  kmp_info_t *th;
3986  kmp_team_t *team;
3987  int teams_swapped = 0, task_state;
3988 
3989  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990  __kmp_assert_valid_gtid(global_tid);
3991 
3992  th = __kmp_thread_from_gtid(global_tid);
3993  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3994 
3995  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3996 
3997  // this barrier should be visible to a customer and to the threading profile
3998  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3999  OMPT_REDUCTION_DECL(th, global_tid);
4000 
4001  if (packed_reduction_method == critical_reduce_block) {
4002  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
4003 
4004  OMPT_REDUCTION_END;
4005 
4006 // TODO: implicit barrier: should be exposed
4007 #if OMPT_SUPPORT
4008  ompt_frame_t *ompt_frame;
4009  if (ompt_enabled.enabled) {
4010  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011  if (ompt_frame->enter_frame.ptr == NULL)
4012  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4013  }
4014  OMPT_STORE_RETURN_ADDRESS(global_tid);
4015 #endif
4016 #if USE_ITT_NOTIFY
4017  __kmp_threads[global_tid]->th.th_ident = loc;
4018 #endif
4019  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020 #if OMPT_SUPPORT && OMPT_OPTIONAL
4021  if (ompt_enabled.enabled) {
4022  ompt_frame->enter_frame = ompt_data_none;
4023  }
4024 #endif
4025 
4026  } else if (packed_reduction_method == empty_reduce_block) {
4027 
4028  OMPT_REDUCTION_END;
4029 
4030 // usage: if team size==1, no synchronization is required (Intel platforms only)
4031 
4032 // TODO: implicit barrier: should be exposed
4033 #if OMPT_SUPPORT
4034  ompt_frame_t *ompt_frame;
4035  if (ompt_enabled.enabled) {
4036  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037  if (ompt_frame->enter_frame.ptr == NULL)
4038  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4039  }
4040  OMPT_STORE_RETURN_ADDRESS(global_tid);
4041 #endif
4042 #if USE_ITT_NOTIFY
4043  __kmp_threads[global_tid]->th.th_ident = loc;
4044 #endif
4045  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046 #if OMPT_SUPPORT && OMPT_OPTIONAL
4047  if (ompt_enabled.enabled) {
4048  ompt_frame->enter_frame = ompt_data_none;
4049  }
4050 #endif
4051 
4052  } else if (packed_reduction_method == atomic_reduce_block) {
4053 
4054 #if OMPT_SUPPORT
4055  ompt_frame_t *ompt_frame;
4056  if (ompt_enabled.enabled) {
4057  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058  if (ompt_frame->enter_frame.ptr == NULL)
4059  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4060  }
4061  OMPT_STORE_RETURN_ADDRESS(global_tid);
4062 #endif
4063 // TODO: implicit barrier: should be exposed
4064 #if USE_ITT_NOTIFY
4065  __kmp_threads[global_tid]->th.th_ident = loc;
4066 #endif
4067  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068 #if OMPT_SUPPORT && OMPT_OPTIONAL
4069  if (ompt_enabled.enabled) {
4070  ompt_frame->enter_frame = ompt_data_none;
4071  }
4072 #endif
4073 
4074  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4075  tree_reduce_block)) {
4076 
4077  // only primary thread executes here (primary releases all other workers)
4078  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4079  global_tid);
4080 
4081  } else {
4082 
4083  // should never reach this block
4084  KMP_ASSERT(0); // "unexpected method"
4085  }
4086  if (teams_swapped) {
4087  __kmp_restore_swapped_teams(th, team, task_state);
4088  }
4089 
4090  if (__kmp_env_consistency_check)
4091  __kmp_pop_sync(global_tid, ct_reduce, loc);
4092 
4093  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094  global_tid, packed_reduction_method));
4095 
4096  return;
4097 }
4098 
4099 #undef __KMP_GET_REDUCTION_METHOD
4100 #undef __KMP_SET_REDUCTION_METHOD
4101 
4102 /* end of interface to fast scalable reduce routines */
4103 
4104 kmp_uint64 __kmpc_get_taskid() {
4105 
4106  kmp_int32 gtid;
4107  kmp_info_t *thread;
4108 
4109  gtid = __kmp_get_gtid();
4110  if (gtid < 0) {
4111  return 0;
4112  }
4113  thread = __kmp_thread_from_gtid(gtid);
4114  return thread->th.th_current_task->td_task_id;
4115 
4116 } // __kmpc_get_taskid
4117 
4118 kmp_uint64 __kmpc_get_parent_taskid() {
4119 
4120  kmp_int32 gtid;
4121  kmp_info_t *thread;
4122  kmp_taskdata_t *parent_task;
4123 
4124  gtid = __kmp_get_gtid();
4125  if (gtid < 0) {
4126  return 0;
4127  }
4128  thread = __kmp_thread_from_gtid(gtid);
4129  parent_task = thread->th.th_current_task->td_parent;
4130  return (parent_task == NULL ? 0 : parent_task->td_task_id);
4131 
4132 } // __kmpc_get_parent_taskid
4133 
4145 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4146  const struct kmp_dim *dims) {
4147  __kmp_assert_valid_gtid(gtid);
4148  int j, idx;
4149  kmp_int64 last, trace_count;
4150  kmp_info_t *th = __kmp_threads[gtid];
4151  kmp_team_t *team = th->th.th_team;
4152  kmp_uint32 *flags;
4153  kmp_disp_t *pr_buf = th->th.th_dispatch;
4154  dispatch_shared_info_t *sh_buf;
4155 
4156  KA_TRACE(
4157  20,
4158  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159  gtid, num_dims, !team->t.t_serialized));
4160  KMP_DEBUG_ASSERT(dims != NULL);
4161  KMP_DEBUG_ASSERT(num_dims > 0);
4162 
4163  if (team->t.t_serialized) {
4164  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4165  return; // no dependencies if team is serialized
4166  }
4167  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4169  // the next loop
4170  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4171 
4172  // Save bounds info into allocated private buffer
4173  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4174  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4175  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4176  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177  pr_buf->th_doacross_info[0] =
4178  (kmp_int64)num_dims; // first element is number of dimensions
4179  // Save also address of num_done in order to access it later without knowing
4180  // the buffer index
4181  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182  pr_buf->th_doacross_info[2] = dims[0].lo;
4183  pr_buf->th_doacross_info[3] = dims[0].up;
4184  pr_buf->th_doacross_info[4] = dims[0].st;
4185  last = 5;
4186  for (j = 1; j < num_dims; ++j) {
4187  kmp_int64
4188  range_length; // To keep ranges of all dimensions but the first dims[0]
4189  if (dims[j].st == 1) { // most common case
4190  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4191  range_length = dims[j].up - dims[j].lo + 1;
4192  } else {
4193  if (dims[j].st > 0) {
4194  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4196  } else { // negative increment
4197  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4198  range_length =
4199  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4200  }
4201  }
4202  pr_buf->th_doacross_info[last++] = range_length;
4203  pr_buf->th_doacross_info[last++] = dims[j].lo;
4204  pr_buf->th_doacross_info[last++] = dims[j].up;
4205  pr_buf->th_doacross_info[last++] = dims[j].st;
4206  }
4207 
4208  // Compute total trip count.
4209  // Start with range of dims[0] which we don't need to keep in the buffer.
4210  if (dims[0].st == 1) { // most common case
4211  trace_count = dims[0].up - dims[0].lo + 1;
4212  } else if (dims[0].st > 0) {
4213  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4215  } else { // negative increment
4216  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4218  }
4219  for (j = 1; j < num_dims; ++j) {
4220  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4221  }
4222  KMP_DEBUG_ASSERT(trace_count > 0);
4223 
4224  // Check if shared buffer is not occupied by other loop (idx -
4225  // __kmp_dispatch_num_buffers)
4226  if (idx != sh_buf->doacross_buf_idx) {
4227  // Shared buffer is occupied, wait for it to be free
4228  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4229  __kmp_eq_4, NULL);
4230  }
4231 #if KMP_32_BIT_ARCH
4232  // Check if we are the first thread. After the CAS the first thread gets 0,
4233  // others get 1 if initialization is in progress, allocated pointer otherwise.
4234  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4235  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4236  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4237 #else
4238  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4239  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4240 #endif
4241  if (flags == NULL) {
4242  // we are the first thread, allocate the array of flags
4243  size_t size =
4244  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4245  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4246  KMP_MB();
4247  sh_buf->doacross_flags = flags;
4248  } else if (flags == (kmp_uint32 *)1) {
4249 #if KMP_32_BIT_ARCH
4250  // initialization is still in progress, need to wait
4251  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4252 #else
4253  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4254 #endif
4255  KMP_YIELD(TRUE);
4256  KMP_MB();
4257  } else {
4258  KMP_MB();
4259  }
4260  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4261  pr_buf->th_doacross_flags =
4262  sh_buf->doacross_flags; // save private copy in order to not
4263  // touch shared buffer on each iteration
4264  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4265 }
4266 
4267 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4268  __kmp_assert_valid_gtid(gtid);
4269  kmp_int64 shft;
4270  size_t num_dims, i;
4271  kmp_uint32 flag;
4272  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4273  kmp_info_t *th = __kmp_threads[gtid];
4274  kmp_team_t *team = th->th.th_team;
4275  kmp_disp_t *pr_buf;
4276  kmp_int64 lo, up, st;
4277 
4278  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279  if (team->t.t_serialized) {
4280  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4281  return; // no dependencies if team is serialized
4282  }
4283 
4284  // calculate sequential iteration number and check out-of-bounds condition
4285  pr_buf = th->th.th_dispatch;
4286  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287  num_dims = (size_t)pr_buf->th_doacross_info[0];
4288  lo = pr_buf->th_doacross_info[2];
4289  up = pr_buf->th_doacross_info[3];
4290  st = pr_buf->th_doacross_info[4];
4291 #if OMPT_SUPPORT && OMPT_OPTIONAL
4292  SimpleVLA<ompt_dependence_t> deps(num_dims);
4293 #endif
4294  if (st == 1) { // most common case
4295  if (vec[0] < lo || vec[0] > up) {
4296  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297  "bounds [%lld,%lld]\n",
4298  gtid, vec[0], lo, up));
4299  return;
4300  }
4301  iter_number = vec[0] - lo;
4302  } else if (st > 0) {
4303  if (vec[0] < lo || vec[0] > up) {
4304  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305  "bounds [%lld,%lld]\n",
4306  gtid, vec[0], lo, up));
4307  return;
4308  }
4309  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4310  } else { // negative increment
4311  if (vec[0] > lo || vec[0] < up) {
4312  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313  "bounds [%lld,%lld]\n",
4314  gtid, vec[0], lo, up));
4315  return;
4316  }
4317  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318  }
4319 #if OMPT_SUPPORT && OMPT_OPTIONAL
4320  deps[0].variable.value = iter_number;
4321  deps[0].dependence_type = ompt_dependence_type_sink;
4322 #endif
4323  for (i = 1; i < num_dims; ++i) {
4324  kmp_int64 iter, ln;
4325  size_t j = i * 4;
4326  ln = pr_buf->th_doacross_info[j + 1];
4327  lo = pr_buf->th_doacross_info[j + 2];
4328  up = pr_buf->th_doacross_info[j + 3];
4329  st = pr_buf->th_doacross_info[j + 4];
4330  if (st == 1) {
4331  if (vec[i] < lo || vec[i] > up) {
4332  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333  "bounds [%lld,%lld]\n",
4334  gtid, vec[i], lo, up));
4335  return;
4336  }
4337  iter = vec[i] - lo;
4338  } else if (st > 0) {
4339  if (vec[i] < lo || vec[i] > up) {
4340  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341  "bounds [%lld,%lld]\n",
4342  gtid, vec[i], lo, up));
4343  return;
4344  }
4345  iter = (kmp_uint64)(vec[i] - lo) / st;
4346  } else { // st < 0
4347  if (vec[i] > lo || vec[i] < up) {
4348  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349  "bounds [%lld,%lld]\n",
4350  gtid, vec[i], lo, up));
4351  return;
4352  }
4353  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4354  }
4355  iter_number = iter + ln * iter_number;
4356 #if OMPT_SUPPORT && OMPT_OPTIONAL
4357  deps[i].variable.value = iter;
4358  deps[i].dependence_type = ompt_dependence_type_sink;
4359 #endif
4360  }
4361  shft = iter_number % 32; // use 32-bit granularity
4362  iter_number >>= 5; // divided by 32
4363  flag = 1 << shft;
4364  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4365  KMP_YIELD(TRUE);
4366  }
4367  KMP_MB();
4368 #if OMPT_SUPPORT && OMPT_OPTIONAL
4369  if (ompt_enabled.ompt_callback_dependences) {
4370  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4372  }
4373 #endif
4374  KA_TRACE(20,
4375  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376  gtid, (iter_number << 5) + shft));
4377 }
4378 
4379 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4380  __kmp_assert_valid_gtid(gtid);
4381  kmp_int64 shft;
4382  size_t num_dims, i;
4383  kmp_uint32 flag;
4384  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4385  kmp_info_t *th = __kmp_threads[gtid];
4386  kmp_team_t *team = th->th.th_team;
4387  kmp_disp_t *pr_buf;
4388  kmp_int64 lo, st;
4389 
4390  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391  if (team->t.t_serialized) {
4392  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4393  return; // no dependencies if team is serialized
4394  }
4395 
4396  // calculate sequential iteration number (same as in "wait" but no
4397  // out-of-bounds checks)
4398  pr_buf = th->th.th_dispatch;
4399  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400  num_dims = (size_t)pr_buf->th_doacross_info[0];
4401  lo = pr_buf->th_doacross_info[2];
4402  st = pr_buf->th_doacross_info[4];
4403 #if OMPT_SUPPORT && OMPT_OPTIONAL
4404  SimpleVLA<ompt_dependence_t> deps(num_dims);
4405 #endif
4406  if (st == 1) { // most common case
4407  iter_number = vec[0] - lo;
4408  } else if (st > 0) {
4409  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4410  } else { // negative increment
4411  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4412  }
4413 #if OMPT_SUPPORT && OMPT_OPTIONAL
4414  deps[0].variable.value = iter_number;
4415  deps[0].dependence_type = ompt_dependence_type_source;
4416 #endif
4417  for (i = 1; i < num_dims; ++i) {
4418  kmp_int64 iter, ln;
4419  size_t j = i * 4;
4420  ln = pr_buf->th_doacross_info[j + 1];
4421  lo = pr_buf->th_doacross_info[j + 2];
4422  st = pr_buf->th_doacross_info[j + 4];
4423  if (st == 1) {
4424  iter = vec[i] - lo;
4425  } else if (st > 0) {
4426  iter = (kmp_uint64)(vec[i] - lo) / st;
4427  } else { // st < 0
4428  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4429  }
4430  iter_number = iter + ln * iter_number;
4431 #if OMPT_SUPPORT && OMPT_OPTIONAL
4432  deps[i].variable.value = iter;
4433  deps[i].dependence_type = ompt_dependence_type_source;
4434 #endif
4435  }
4436 #if OMPT_SUPPORT && OMPT_OPTIONAL
4437  if (ompt_enabled.ompt_callback_dependences) {
4438  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4440  }
4441 #endif
4442  shft = iter_number % 32; // use 32-bit granularity
4443  iter_number >>= 5; // divided by 32
4444  flag = 1 << shft;
4445  KMP_MB();
4446  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449  (iter_number << 5) + shft));
4450 }
4451 
4452 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4453  __kmp_assert_valid_gtid(gtid);
4454  kmp_int32 num_done;
4455  kmp_info_t *th = __kmp_threads[gtid];
4456  kmp_team_t *team = th->th.th_team;
4457  kmp_disp_t *pr_buf = th->th.th_dispatch;
4458 
4459  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460  if (team->t.t_serialized) {
4461  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4462  return; // nothing to do
4463  }
4464  num_done =
4465  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4466  if (num_done == th->th.th_team_nproc) {
4467  // we are the last thread, need to free shared resources
4468  int idx = pr_buf->th_doacross_buf_idx - 1;
4469  dispatch_shared_info_t *sh_buf =
4470  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4471  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4472  (kmp_int64)&sh_buf->doacross_num_done);
4473  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4475  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4476  sh_buf->doacross_flags = NULL;
4477  sh_buf->doacross_num_done = 0;
4478  sh_buf->doacross_buf_idx +=
4479  __kmp_dispatch_num_buffers; // free buffer for future re-use
4480  }
4481  // free private resources (need to keep buffer index forever)
4482  pr_buf->th_doacross_flags = NULL;
4483  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4484  pr_buf->th_doacross_info = NULL;
4485  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4486 }
4487 
4488 /* OpenMP 5.1 Memory Management routines */
4489 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4490  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4491 }
4492 
4493 void *omp_aligned_alloc(size_t align, size_t size,
4494  omp_allocator_handle_t allocator) {
4495  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4496 }
4497 
4498 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4499  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4500 }
4501 
4502 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4503  omp_allocator_handle_t allocator) {
4504  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4505 }
4506 
4507 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4508  omp_allocator_handle_t free_allocator) {
4509  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4510  free_allocator);
4511 }
4512 
4513 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4514  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4515 }
4516 /* end of OpenMP 5.1 Memory Management routines */
4517 
4518 int __kmpc_get_target_offload(void) {
4519  if (!__kmp_init_serial) {
4520  __kmp_serial_initialize();
4521  }
4522  return __kmp_target_offload;
4523 }
4524 
4525 int __kmpc_pause_resource(kmp_pause_status_t level) {
4526  if (!__kmp_init_serial) {
4527  return 1; // Can't pause if runtime is not initialized
4528  }
4529  return __kmp_pause_resource(level);
4530 }
4531 
4532 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4533  if (!__kmp_init_serial)
4534  __kmp_serial_initialize();
4535 
4536  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4537 
4538 #if OMPT_SUPPORT
4539  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540  ompt_callbacks.ompt_callback(ompt_callback_error)(
4541  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4542  OMPT_GET_RETURN_ADDRESS(0));
4543  }
4544 #endif // OMPT_SUPPORT
4545 
4546  char *src_loc;
4547  if (loc && loc->psource) {
4548  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4549  src_loc =
4550  __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551  __kmp_str_loc_free(&str_loc);
4552  } else {
4553  src_loc = __kmp_str_format("unknown");
4554  }
4555 
4556  if (severity == severity_warning)
4557  KMP_WARNING(UserDirectedWarning, src_loc, message);
4558  else
4559  KMP_FATAL(UserDirectedError, src_loc, message);
4560 
4561  __kmp_str_free(&src_loc);
4562 }
4563 
4564 // Mark begin of scope directive.
4565 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4566 // reserved is for extension of scope directive and not used.
4567 #if OMPT_SUPPORT && OMPT_OPTIONAL
4568  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570  int tid = __kmp_tid_from_gtid(gtid);
4571  ompt_callbacks.ompt_callback(ompt_callback_work)(
4572  ompt_work_scope, ompt_scope_begin,
4573  &(team->t.ompt_team_info.parallel_data),
4574  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4575  OMPT_GET_RETURN_ADDRESS(0));
4576  }
4577 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4578 }
4579 
4580 // Mark end of scope directive
4581 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4582 // reserved is for extension of scope directive and not used.
4583 #if OMPT_SUPPORT && OMPT_OPTIONAL
4584  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586  int tid = __kmp_tid_from_gtid(gtid);
4587  ompt_callbacks.ompt_callback(ompt_callback_work)(
4588  ompt_work_scope, ompt_scope_end,
4589  &(team->t.ompt_team_info.parallel_data),
4590  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4591  OMPT_GET_RETURN_ADDRESS(0));
4592  }
4593 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4594 }
4595 
4596 #ifdef KMP_USE_VERSION_SYMBOLS
4597 // For GOMP compatibility there are two versions of each omp_* API.
4598 // One is the plain C symbol and one is the Fortran symbol with an appended
4599 // underscore. When we implement a specific ompc_* version of an omp_*
4600 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4601 // instead of the Fortran versions in kmp_ftn_entry.h
4602 extern "C" {
4603 // Have to undef these from omp.h so they aren't translated into
4604 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4605 #ifdef omp_set_affinity_format
4606 #undef omp_set_affinity_format
4607 #endif
4608 #ifdef omp_get_affinity_format
4609 #undef omp_get_affinity_format
4610 #endif
4611 #ifdef omp_display_affinity
4612 #undef omp_display_affinity
4613 #endif
4614 #ifdef omp_capture_affinity
4615 #undef omp_capture_affinity
4616 #endif
4617 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4618  "OMP_5.0");
4619 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4620  "OMP_5.0");
4621 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4622  "OMP_5.0");
4623 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4624  "OMP_5.0");
4625 } // extern "C"
4626 #endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:207
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:209
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:192
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:211
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1773
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:227
char const * psource
Definition: kmp.h:237
kmp_int32 flags
Definition: kmp.h:229