LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
74  // Normal exit process on Windows does not allow worker threads of the final
75  // parallel region to finish reporting their events, so shutting down the
76  // library here fixes the issue at least for the cases where __kmpc_end() is
77  // placed properly.
78  if (ompt_enabled.enabled)
79  __kmp_internal_end_library(__kmp_gtid_get_specific());
80 #endif
81 }
82 
102  kmp_int32 gtid = __kmp_entry_gtid();
103 
104  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
105 
106  return gtid;
107 }
108 
124  KC_TRACE(10,
125  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 
127  return TCR_4(__kmp_all_nth);
128 }
129 
137  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138  return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 }
140 
147  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
148 
149  return __kmp_entry_thread()->th.th_team->t.t_nproc;
150 }
151 
158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
159 #ifndef KMP_DEBUG
160 
161  return TRUE;
162 
163 #else
164 
165  const char *semi2;
166  const char *semi3;
167  int line_no;
168 
169  if (__kmp_par_range == 0) {
170  return TRUE;
171  }
172  semi2 = loc->psource;
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  semi2 = strchr(semi2, ';');
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2 + 1, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  if (__kmp_par_range_filename[0]) {
185  const char *name = semi2 - 1;
186  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187  name--;
188  }
189  if ((*name == '/') || (*name == ';')) {
190  name++;
191  }
192  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193  return __kmp_par_range < 0;
194  }
195  }
196  semi3 = strchr(semi2 + 1, ';');
197  if (__kmp_par_range_routine[0]) {
198  if ((semi3 != NULL) && (semi3 > semi2) &&
199  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200  return __kmp_par_range < 0;
201  }
202  }
203  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205  return __kmp_par_range > 0;
206  }
207  return __kmp_par_range < 0;
208  }
209  return TRUE;
210 
211 #endif /* KMP_DEBUG */
212 }
213 
220 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221  return __kmp_entry_thread()->th.th_root->r.r_active;
222 }
223 
233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234  kmp_int32 num_threads) {
235  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236  global_tid, num_threads));
237 
238  __kmp_push_num_threads(loc, global_tid, num_threads);
239 }
240 
241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
243 
244  /* the num_threads are automatically popped */
245 }
246 
247 #if OMP_40_ENABLED
248 
249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250  kmp_int32 proc_bind) {
251  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252  proc_bind));
253 
254  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
255 }
256 
257 #endif /* OMP_40_ENABLED */
258 
269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270  int gtid = __kmp_entry_gtid();
271 
272 #if (KMP_STATS_ENABLED)
273  // If we were in a serial region, then stop the serial timer, record
274  // the event, and start parallel region timer
275  stats_state_e previous_state = KMP_GET_THREAD_STATE();
276  if (previous_state == stats_state_e::SERIAL_REGION) {
277  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278  } else {
279  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280  }
281  int inParallel = __kmpc_in_parallel(loc);
282  if (inParallel) {
283  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284  } else {
285  KMP_COUNT_BLOCK(OMP_PARALLEL);
286  }
287 #endif
288 
289  // maybe to save thr_state is enough here
290  {
291  va_list ap;
292  va_start(ap, microtask);
293 
294 #if OMPT_SUPPORT
295  ompt_frame_t *ompt_frame;
296  if (ompt_enabled.enabled) {
297  kmp_info_t *master_th = __kmp_threads[gtid];
298  kmp_team_t *parent_team = master_th->th.th_team;
299  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300  if (lwt)
301  ompt_frame = &(lwt->ompt_task_info.frame);
302  else {
303  int tid = __kmp_tid_from_gtid(gtid);
304  ompt_frame = &(
305  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306  }
307  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308  OMPT_STORE_RETURN_ADDRESS(gtid);
309  }
310 #endif
311 
312 #if INCLUDE_SSC_MARKS
313  SSC_MARK_FORKING();
314 #endif
315  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
320  &ap
321 #else
322  ap
323 #endif
324  );
325 #if INCLUDE_SSC_MARKS
326  SSC_MARK_JOINING();
327 #endif
328  __kmp_join_call(loc, gtid
329 #if OMPT_SUPPORT
330  ,
331  fork_context_intel
332 #endif
333  );
334 
335  va_end(ap);
336  }
337 
338 #if KMP_STATS_ENABLED
339  if (previous_state == stats_state_e::SERIAL_REGION) {
340  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341  } else {
342  KMP_POP_PARTITIONED_TIMER();
343  }
344 #endif // KMP_STATS_ENABLED
345 }
346 
347 #if OMP_40_ENABLED
348 
359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360  kmp_int32 num_teams, kmp_int32 num_threads) {
361  KA_TRACE(20,
362  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363  global_tid, num_teams, num_threads));
364 
365  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
366 }
367 
378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379  ...) {
380  int gtid = __kmp_entry_gtid();
381  kmp_info_t *this_thr = __kmp_threads[gtid];
382  va_list ap;
383  va_start(ap, microtask);
384 
385  KMP_COUNT_BLOCK(OMP_TEAMS);
386 
387  // remember teams entry point and nesting level
388  this_thr->th.th_teams_microtask = microtask;
389  this_thr->th.th_teams_level =
390  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
391 
392 #if OMPT_SUPPORT
393  kmp_team_t *parent_team = this_thr->th.th_team;
394  int tid = __kmp_tid_from_gtid(gtid);
395  if (ompt_enabled.enabled) {
396  parent_team->t.t_implicit_task_taskdata[tid]
397  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
398  }
399  OMPT_STORE_RETURN_ADDRESS(gtid);
400 #endif
401 
402  // check if __kmpc_push_num_teams called, set default number of teams
403  // otherwise
404  if (this_thr->th.th_teams_size.nteams == 0) {
405  __kmp_push_num_teams(loc, gtid, 0, 0);
406  }
407  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
410 
411  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
412  VOLATILE_CAST(microtask_t)
413  __kmp_teams_master, // "wrapped" task
414  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
415 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
416  &ap
417 #else
418  ap
419 #endif
420  );
421  __kmp_join_call(loc, gtid
422 #if OMPT_SUPPORT
423  ,
424  fork_context_intel
425 #endif
426  );
427 
428  // Pop current CG root off list
429  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431  this_thr->th.th_cg_roots = tmp->up;
432  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
433  " to node %p. cg_nthreads was %d\n",
434  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435  __kmp_free(tmp);
436  // Restore current task's thread_limit from CG root
437  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438  this_thr->th.th_current_task->td_icvs.thread_limit =
439  this_thr->th.th_cg_roots->cg_thread_limit;
440 
441  this_thr->th.th_teams_microtask = NULL;
442  this_thr->th.th_teams_level = 0;
443  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
444  va_end(ap);
445 }
446 #endif /* OMP_40_ENABLED */
447 
448 // I don't think this function should ever have been exported.
449 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
450 // openmp code ever called it, but it's been exported from the RTL for so
451 // long that I'm afraid to remove the definition.
452 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
453 
466 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
467 // The implementation is now in kmp_runtime.cpp so that it can share static
468 // functions with kmp_fork_call since the tasks to be done are similar in
469 // each case.
470 #if OMPT_SUPPORT
471  OMPT_STORE_RETURN_ADDRESS(global_tid);
472 #endif
473  __kmp_serialized_parallel(loc, global_tid);
474 }
475 
483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
484  kmp_internal_control_t *top;
485  kmp_info_t *this_thr;
486  kmp_team_t *serial_team;
487 
488  KC_TRACE(10,
489  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
490 
491  /* skip all this code for autopar serialized loops since it results in
492  unacceptable overhead */
493  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
494  return;
495 
496  // Not autopar code
497  if (!TCR_4(__kmp_init_parallel))
498  __kmp_parallel_initialize();
499 
500 #if OMP_50_ENABLED
501  __kmp_resume_if_soft_paused();
502 #endif
503 
504  this_thr = __kmp_threads[global_tid];
505  serial_team = this_thr->th.th_serial_team;
506 
507 #if OMP_45_ENABLED
508  kmp_task_team_t *task_team = this_thr->th.th_task_team;
509 
510  // we need to wait for the proxy tasks before finishing the thread
511  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
512  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
513 #endif
514 
515  KMP_MB();
516  KMP_DEBUG_ASSERT(serial_team);
517  KMP_ASSERT(serial_team->t.t_serialized);
518  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
519  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
520  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
521  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
522 
523 #if OMPT_SUPPORT
524  if (ompt_enabled.enabled &&
525  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
526  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
527  if (ompt_enabled.ompt_callback_implicit_task) {
528  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
529  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
530  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
531  }
532 
533  // reset clear the task id only after unlinking the task
534  ompt_data_t *parent_task_data;
535  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
536 
537  if (ompt_enabled.ompt_callback_parallel_end) {
538  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
539  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
540  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
541  }
542  __ompt_lw_taskteam_unlink(this_thr);
543  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
544  }
545 #endif
546 
547  /* If necessary, pop the internal control stack values and replace the team
548  * values */
549  top = serial_team->t.t_control_stack_top;
550  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
551  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
552  serial_team->t.t_control_stack_top = top->next;
553  __kmp_free(top);
554  }
555 
556  // if( serial_team -> t.t_serialized > 1 )
557  serial_team->t.t_level--;
558 
559  /* pop dispatch buffers stack */
560  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
561  {
562  dispatch_private_info_t *disp_buffer =
563  serial_team->t.t_dispatch->th_disp_buffer;
564  serial_team->t.t_dispatch->th_disp_buffer =
565  serial_team->t.t_dispatch->th_disp_buffer->next;
566  __kmp_free(disp_buffer);
567  }
568 #if OMP_50_ENABLED
569  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
570 #endif
571 
572  --serial_team->t.t_serialized;
573  if (serial_team->t.t_serialized == 0) {
574 
575 /* return to the parallel section */
576 
577 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
578  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
579  __kmp_clear_x87_fpu_status_word();
580  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
581  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
582  }
583 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
584 
585  this_thr->th.th_team = serial_team->t.t_parent;
586  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
587 
588  /* restore values cached in the thread */
589  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
590  this_thr->th.th_team_master =
591  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
592  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
593 
594  /* TODO the below shouldn't need to be adjusted for serialized teams */
595  this_thr->th.th_dispatch =
596  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
597 
598  __kmp_pop_current_task_from_thread(this_thr);
599 
600  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
601  this_thr->th.th_current_task->td_flags.executing = 1;
602 
603  if (__kmp_tasking_mode != tskm_immediate_exec) {
604  // Copy the task team from the new child / old parent team to the thread.
605  this_thr->th.th_task_team =
606  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
607  KA_TRACE(20,
608  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
609  "team %p\n",
610  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
611  }
612  } else {
613  if (__kmp_tasking_mode != tskm_immediate_exec) {
614  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
615  "depth of serial team %p to %d\n",
616  global_tid, serial_team, serial_team->t.t_serialized));
617  }
618  }
619 
620  if (__kmp_env_consistency_check)
621  __kmp_pop_parallel(global_tid, NULL);
622 #if OMPT_SUPPORT
623  if (ompt_enabled.enabled)
624  this_thr->th.ompt_thread_info.state =
625  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
626  : ompt_state_work_parallel);
627 #endif
628 }
629 
638 void __kmpc_flush(ident_t *loc) {
639  KC_TRACE(10, ("__kmpc_flush: called\n"));
640 
641  /* need explicit __mf() here since use volatile instead in library */
642  KMP_MB(); /* Flush all pending memory write invalidates. */
643 
644 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
645 #if KMP_MIC
646 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
647 // We shouldn't need it, though, since the ABI rules require that
648 // * If the compiler generates NGO stores it also generates the fence
649 // * If users hand-code NGO stores they should insert the fence
650 // therefore no incomplete unordered stores should be visible.
651 #else
652  // C74404
653  // This is to address non-temporal store instructions (sfence needed).
654  // The clflush instruction is addressed either (mfence needed).
655  // Probably the non-temporal load monvtdqa instruction should also be
656  // addressed.
657  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
658  if (!__kmp_cpuinfo.initialized) {
659  __kmp_query_cpuid(&__kmp_cpuinfo);
660  }
661  if (!__kmp_cpuinfo.sse2) {
662  // CPU cannot execute SSE2 instructions.
663  } else {
664 #if KMP_COMPILER_ICC
665  _mm_mfence();
666 #elif KMP_COMPILER_MSVC
667  MemoryBarrier();
668 #else
669  __sync_synchronize();
670 #endif // KMP_COMPILER_ICC
671  }
672 #endif // KMP_MIC
673 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
674 // Nothing to see here move along
675 #elif KMP_ARCH_PPC64
676 // Nothing needed here (we have a real MB above).
677 #if KMP_OS_CNK
678  // The flushing thread needs to yield here; this prevents a
679  // busy-waiting thread from saturating the pipeline. flush is
680  // often used in loops like this:
681  // while (!flag) {
682  // #pragma omp flush(flag)
683  // }
684  // and adding the yield here is good for at least a 10x speedup
685  // when running >2 threads per core (on the NAS LU benchmark).
686  __kmp_yield(TRUE);
687 #endif
688 #else
689 #error Unknown or unsupported architecture
690 #endif
691 
692 #if OMPT_SUPPORT && OMPT_OPTIONAL
693  if (ompt_enabled.ompt_callback_flush) {
694  ompt_callbacks.ompt_callback(ompt_callback_flush)(
695  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
696  }
697 #endif
698 }
699 
700 /* -------------------------------------------------------------------------- */
708 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
709  KMP_COUNT_BLOCK(OMP_BARRIER);
710  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
711 
712  if (!TCR_4(__kmp_init_parallel))
713  __kmp_parallel_initialize();
714 
715 #if OMP_50_ENABLED
716  __kmp_resume_if_soft_paused();
717 #endif
718 
719  if (__kmp_env_consistency_check) {
720  if (loc == 0) {
721  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
722  }
723 
724  __kmp_check_barrier(global_tid, ct_barrier, loc);
725  }
726 
727 #if OMPT_SUPPORT
728  ompt_frame_t *ompt_frame;
729  if (ompt_enabled.enabled) {
730  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
731  if (ompt_frame->enter_frame.ptr == NULL)
732  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
733  OMPT_STORE_RETURN_ADDRESS(global_tid);
734  }
735 #endif
736  __kmp_threads[global_tid]->th.th_ident = loc;
737  // TODO: explicit barrier_wait_id:
738  // this function is called when 'barrier' directive is present or
739  // implicit barrier at the end of a worksharing construct.
740  // 1) better to add a per-thread barrier counter to a thread data structure
741  // 2) set to 0 when a new team is created
742  // 4) no sync is required
743 
744  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
745 #if OMPT_SUPPORT && OMPT_OPTIONAL
746  if (ompt_enabled.enabled) {
747  ompt_frame->enter_frame = ompt_data_none;
748  }
749 #endif
750 }
751 
752 /* The BARRIER for a MASTER section is always explicit */
759 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
760  int status = 0;
761 
762  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
763 
764  if (!TCR_4(__kmp_init_parallel))
765  __kmp_parallel_initialize();
766 
767 #if OMP_50_ENABLED
768  __kmp_resume_if_soft_paused();
769 #endif
770 
771  if (KMP_MASTER_GTID(global_tid)) {
772  KMP_COUNT_BLOCK(OMP_MASTER);
773  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
774  status = 1;
775  }
776 
777 #if OMPT_SUPPORT && OMPT_OPTIONAL
778  if (status) {
779  if (ompt_enabled.ompt_callback_master) {
780  kmp_info_t *this_thr = __kmp_threads[global_tid];
781  kmp_team_t *team = this_thr->th.th_team;
782 
783  int tid = __kmp_tid_from_gtid(global_tid);
784  ompt_callbacks.ompt_callback(ompt_callback_master)(
785  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787  OMPT_GET_RETURN_ADDRESS(0));
788  }
789  }
790 #endif
791 
792  if (__kmp_env_consistency_check) {
793 #if KMP_USE_DYNAMIC_LOCK
794  if (status)
795  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
796  else
797  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
798 #else
799  if (status)
800  __kmp_push_sync(global_tid, ct_master, loc, NULL);
801  else
802  __kmp_check_sync(global_tid, ct_master, loc, NULL);
803 #endif
804  }
805 
806  return status;
807 }
808 
817 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
818  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
819 
820  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821  KMP_POP_PARTITIONED_TIMER();
822 
823 #if OMPT_SUPPORT && OMPT_OPTIONAL
824  kmp_info_t *this_thr = __kmp_threads[global_tid];
825  kmp_team_t *team = this_thr->th.th_team;
826  if (ompt_enabled.ompt_callback_master) {
827  int tid = __kmp_tid_from_gtid(global_tid);
828  ompt_callbacks.ompt_callback(ompt_callback_master)(
829  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831  OMPT_GET_RETURN_ADDRESS(0));
832  }
833 #endif
834 
835  if (__kmp_env_consistency_check) {
836  if (global_tid < 0)
837  KMP_WARNING(ThreadIdentInvalid);
838 
839  if (KMP_MASTER_GTID(global_tid))
840  __kmp_pop_sync(global_tid, ct_master, loc);
841  }
842 }
843 
851 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
852  int cid = 0;
853  kmp_info_t *th;
854  KMP_DEBUG_ASSERT(__kmp_init_serial);
855 
856  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
857 
858  if (!TCR_4(__kmp_init_parallel))
859  __kmp_parallel_initialize();
860 
861 #if OMP_50_ENABLED
862  __kmp_resume_if_soft_paused();
863 #endif
864 
865 #if USE_ITT_BUILD
866  __kmp_itt_ordered_prep(gtid);
867 // TODO: ordered_wait_id
868 #endif /* USE_ITT_BUILD */
869 
870  th = __kmp_threads[gtid];
871 
872 #if OMPT_SUPPORT && OMPT_OPTIONAL
873  kmp_team_t *team;
874  ompt_wait_id_t lck;
875  void *codeptr_ra;
876  if (ompt_enabled.enabled) {
877  OMPT_STORE_RETURN_ADDRESS(gtid);
878  team = __kmp_team_from_gtid(gtid);
879  lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
880  /* OMPT state update */
881  th->th.ompt_thread_info.wait_id = lck;
882  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
883 
884  /* OMPT event callback */
885  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
886  if (ompt_enabled.ompt_callback_mutex_acquire) {
887  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
888  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
889  (ompt_wait_id_t)lck, codeptr_ra);
890  }
891  }
892 #endif
893 
894  if (th->th.th_dispatch->th_deo_fcn != 0)
895  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
896  else
897  __kmp_parallel_deo(&gtid, &cid, loc);
898 
899 #if OMPT_SUPPORT && OMPT_OPTIONAL
900  if (ompt_enabled.enabled) {
901  /* OMPT state update */
902  th->th.ompt_thread_info.state = ompt_state_work_parallel;
903  th->th.ompt_thread_info.wait_id = 0;
904 
905  /* OMPT event callback */
906  if (ompt_enabled.ompt_callback_mutex_acquired) {
907  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
908  ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
909  }
910  }
911 #endif
912 
913 #if USE_ITT_BUILD
914  __kmp_itt_ordered_start(gtid);
915 #endif /* USE_ITT_BUILD */
916 }
917 
925 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
926  int cid = 0;
927  kmp_info_t *th;
928 
929  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
930 
931 #if USE_ITT_BUILD
932  __kmp_itt_ordered_end(gtid);
933 // TODO: ordered_wait_id
934 #endif /* USE_ITT_BUILD */
935 
936  th = __kmp_threads[gtid];
937 
938  if (th->th.th_dispatch->th_dxo_fcn != 0)
939  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
940  else
941  __kmp_parallel_dxo(&gtid, &cid, loc);
942 
943 #if OMPT_SUPPORT && OMPT_OPTIONAL
944  OMPT_STORE_RETURN_ADDRESS(gtid);
945  if (ompt_enabled.ompt_callback_mutex_released) {
946  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
947  ompt_mutex_ordered,
948  (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
949  OMPT_LOAD_RETURN_ADDRESS(gtid));
950  }
951 #endif
952 }
953 
954 #if KMP_USE_DYNAMIC_LOCK
955 
956 static __forceinline void
957 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
958  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
959  // Pointer to the allocated indirect lock is written to crit, while indexing
960  // is ignored.
961  void *idx;
962  kmp_indirect_lock_t **lck;
963  lck = (kmp_indirect_lock_t **)crit;
964  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
965  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
966  KMP_SET_I_LOCK_LOCATION(ilk, loc);
967  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
968  KA_TRACE(20,
969  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
970 #if USE_ITT_BUILD
971  __kmp_itt_critical_creating(ilk->lock, loc);
972 #endif
973  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
974  if (status == 0) {
975 #if USE_ITT_BUILD
976  __kmp_itt_critical_destroyed(ilk->lock);
977 #endif
978  // We don't really need to destroy the unclaimed lock here since it will be
979  // cleaned up at program exit.
980  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
981  }
982  KMP_DEBUG_ASSERT(*lck != NULL);
983 }
984 
985 // Fast-path acquire tas lock
986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
987  { \
988  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
989  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
990  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
991  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
992  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
993  kmp_uint32 spins; \
994  KMP_FSYNC_PREPARE(l); \
995  KMP_INIT_YIELD(spins); \
996  if (TCR_4(__kmp_nth) > \
997  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
998  KMP_YIELD(TRUE); \
999  } else { \
1000  KMP_YIELD_SPIN(spins); \
1001  } \
1002  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1003  while ( \
1004  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1005  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1006  __kmp_spin_backoff(&backoff); \
1007  if (TCR_4(__kmp_nth) > \
1008  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1009  KMP_YIELD(TRUE); \
1010  } else { \
1011  KMP_YIELD_SPIN(spins); \
1012  } \
1013  } \
1014  } \
1015  KMP_FSYNC_ACQUIRED(l); \
1016  }
1017 
1018 // Fast-path test tas lock
1019 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1020  { \
1021  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1022  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1023  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1024  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1025  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1026  }
1027 
1028 // Fast-path release tas lock
1029 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1030  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1031 
1032 #if KMP_USE_FUTEX
1033 
1034 #include <sys/syscall.h>
1035 #include <unistd.h>
1036 #ifndef FUTEX_WAIT
1037 #define FUTEX_WAIT 0
1038 #endif
1039 #ifndef FUTEX_WAKE
1040 #define FUTEX_WAKE 1
1041 #endif
1042 
1043 // Fast-path acquire futex lock
1044 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1045  { \
1046  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1047  kmp_int32 gtid_code = (gtid + 1) << 1; \
1048  KMP_MB(); \
1049  KMP_FSYNC_PREPARE(ftx); \
1050  kmp_int32 poll_val; \
1051  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1052  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1053  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1054  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1055  if (!cond) { \
1056  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1057  poll_val | \
1058  KMP_LOCK_BUSY(1, futex))) { \
1059  continue; \
1060  } \
1061  poll_val |= KMP_LOCK_BUSY(1, futex); \
1062  } \
1063  kmp_int32 rc; \
1064  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1065  NULL, NULL, 0)) != 0) { \
1066  continue; \
1067  } \
1068  gtid_code |= 1; \
1069  } \
1070  KMP_FSYNC_ACQUIRED(ftx); \
1071  }
1072 
1073 // Fast-path test futex lock
1074 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1075  { \
1076  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1077  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1078  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1079  KMP_FSYNC_ACQUIRED(ftx); \
1080  rc = TRUE; \
1081  } else { \
1082  rc = FALSE; \
1083  } \
1084  }
1085 
1086 // Fast-path release futex lock
1087 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1088  { \
1089  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1090  KMP_MB(); \
1091  KMP_FSYNC_RELEASING(ftx); \
1092  kmp_int32 poll_val = \
1093  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1094  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1095  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1096  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1097  } \
1098  KMP_MB(); \
1099  KMP_YIELD(TCR_4(__kmp_nth) > \
1100  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1101  }
1102 
1103 #endif // KMP_USE_FUTEX
1104 
1105 #else // KMP_USE_DYNAMIC_LOCK
1106 
1107 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1108  ident_t const *loc,
1109  kmp_int32 gtid) {
1110  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1111 
1112  // Because of the double-check, the following load doesn't need to be volatile
1113  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1114 
1115  if (lck == NULL) {
1116  void *idx;
1117 
1118  // Allocate & initialize the lock.
1119  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1120  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1121  __kmp_init_user_lock_with_checks(lck);
1122  __kmp_set_user_lock_location(lck, loc);
1123 #if USE_ITT_BUILD
1124  __kmp_itt_critical_creating(lck);
1125 // __kmp_itt_critical_creating() should be called *before* the first usage
1126 // of underlying lock. It is the only place where we can guarantee it. There
1127 // are chances the lock will destroyed with no usage, but it is not a
1128 // problem, because this is not real event seen by user but rather setting
1129 // name for object (lock). See more details in kmp_itt.h.
1130 #endif /* USE_ITT_BUILD */
1131 
1132  // Use a cmpxchg instruction to slam the start of the critical section with
1133  // the lock pointer. If another thread beat us to it, deallocate the lock,
1134  // and use the lock that the other thread allocated.
1135  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1136 
1137  if (status == 0) {
1138 // Deallocate the lock and reload the value.
1139 #if USE_ITT_BUILD
1140  __kmp_itt_critical_destroyed(lck);
1141 // Let ITT know the lock is destroyed and the same memory location may be reused
1142 // for another purpose.
1143 #endif /* USE_ITT_BUILD */
1144  __kmp_destroy_user_lock_with_checks(lck);
1145  __kmp_user_lock_free(&idx, gtid, lck);
1146  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1147  KMP_DEBUG_ASSERT(lck != NULL);
1148  }
1149  }
1150  return lck;
1151 }
1152 
1153 #endif // KMP_USE_DYNAMIC_LOCK
1154 
1165 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1166  kmp_critical_name *crit) {
1167 #if KMP_USE_DYNAMIC_LOCK
1168 #if OMPT_SUPPORT && OMPT_OPTIONAL
1169  OMPT_STORE_RETURN_ADDRESS(global_tid);
1170 #endif // OMPT_SUPPORT
1171  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1172 #else
1173  KMP_COUNT_BLOCK(OMP_CRITICAL);
1174 #if OMPT_SUPPORT && OMPT_OPTIONAL
1175  ompt_state_t prev_state = ompt_state_undefined;
1176  ompt_thread_info_t ti;
1177 #endif
1178  kmp_user_lock_p lck;
1179 
1180  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1181 
1182  // TODO: add THR_OVHD_STATE
1183 
1184  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1185  KMP_CHECK_USER_LOCK_INIT();
1186 
1187  if ((__kmp_user_lock_kind == lk_tas) &&
1188  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1189  lck = (kmp_user_lock_p)crit;
1190  }
1191 #if KMP_USE_FUTEX
1192  else if ((__kmp_user_lock_kind == lk_futex) &&
1193  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1194  lck = (kmp_user_lock_p)crit;
1195  }
1196 #endif
1197  else { // ticket, queuing or drdpa
1198  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1199  }
1200 
1201  if (__kmp_env_consistency_check)
1202  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1203 
1204 // since the critical directive binds to all threads, not just the current
1205 // team we have to check this even if we are in a serialized team.
1206 // also, even if we are the uber thread, we still have to conduct the lock,
1207 // as we have to contend with sibling threads.
1208 
1209 #if USE_ITT_BUILD
1210  __kmp_itt_critical_acquiring(lck);
1211 #endif /* USE_ITT_BUILD */
1212 #if OMPT_SUPPORT && OMPT_OPTIONAL
1213  OMPT_STORE_RETURN_ADDRESS(gtid);
1214  void *codeptr_ra = NULL;
1215  if (ompt_enabled.enabled) {
1216  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1217  /* OMPT state update */
1218  prev_state = ti.state;
1219  ti.wait_id = (ompt_wait_id_t)lck;
1220  ti.state = ompt_state_wait_critical;
1221 
1222  /* OMPT event callback */
1223  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1224  if (ompt_enabled.ompt_callback_mutex_acquire) {
1225  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1226  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1227  (ompt_wait_id_t)crit, codeptr_ra);
1228  }
1229  }
1230 #endif
1231  // Value of 'crit' should be good for using as a critical_id of the critical
1232  // section directive.
1233  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1234 
1235 #if USE_ITT_BUILD
1236  __kmp_itt_critical_acquired(lck);
1237 #endif /* USE_ITT_BUILD */
1238 #if OMPT_SUPPORT && OMPT_OPTIONAL
1239  if (ompt_enabled.enabled) {
1240  /* OMPT state update */
1241  ti.state = prev_state;
1242  ti.wait_id = 0;
1243 
1244  /* OMPT event callback */
1245  if (ompt_enabled.ompt_callback_mutex_acquired) {
1246  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1247  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1248  }
1249  }
1250 #endif
1251  KMP_POP_PARTITIONED_TIMER();
1252 
1253  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1254  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1255 #endif // KMP_USE_DYNAMIC_LOCK
1256 }
1257 
1258 #if KMP_USE_DYNAMIC_LOCK
1259 
1260 // Converts the given hint to an internal lock implementation
1261 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1262 #if KMP_USE_TSX
1263 #define KMP_TSX_LOCK(seq) lockseq_##seq
1264 #else
1265 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1266 #endif
1267 
1268 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1269 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1270 #else
1271 #define KMP_CPUINFO_RTM 0
1272 #endif
1273 
1274  // Hints that do not require further logic
1275  if (hint & kmp_lock_hint_hle)
1276  return KMP_TSX_LOCK(hle);
1277  if (hint & kmp_lock_hint_rtm)
1278  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1279  if (hint & kmp_lock_hint_adaptive)
1280  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1281 
1282  // Rule out conflicting hints first by returning the default lock
1283  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1284  return __kmp_user_lock_seq;
1285  if ((hint & omp_lock_hint_speculative) &&
1286  (hint & omp_lock_hint_nonspeculative))
1287  return __kmp_user_lock_seq;
1288 
1289  // Do not even consider speculation when it appears to be contended
1290  if (hint & omp_lock_hint_contended)
1291  return lockseq_queuing;
1292 
1293  // Uncontended lock without speculation
1294  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1295  return lockseq_tas;
1296 
1297  // HLE lock for speculation
1298  if (hint & omp_lock_hint_speculative)
1299  return KMP_TSX_LOCK(hle);
1300 
1301  return __kmp_user_lock_seq;
1302 }
1303 
1304 #if OMPT_SUPPORT && OMPT_OPTIONAL
1305 #if KMP_USE_DYNAMIC_LOCK
1306 static kmp_mutex_impl_t
1307 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1308  if (user_lock) {
1309  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1310  case 0:
1311  break;
1312 #if KMP_USE_FUTEX
1313  case locktag_futex:
1314  return kmp_mutex_impl_queuing;
1315 #endif
1316  case locktag_tas:
1317  return kmp_mutex_impl_spin;
1318 #if KMP_USE_TSX
1319  case locktag_hle:
1320  return kmp_mutex_impl_speculative;
1321 #endif
1322  default:
1323  return kmp_mutex_impl_none;
1324  }
1325  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1326  }
1327  KMP_ASSERT(ilock);
1328  switch (ilock->type) {
1329 #if KMP_USE_TSX
1330  case locktag_adaptive:
1331  case locktag_rtm:
1332  return kmp_mutex_impl_speculative;
1333 #endif
1334  case locktag_nested_tas:
1335  return kmp_mutex_impl_spin;
1336 #if KMP_USE_FUTEX
1337  case locktag_nested_futex:
1338 #endif
1339  case locktag_ticket:
1340  case locktag_queuing:
1341  case locktag_drdpa:
1342  case locktag_nested_ticket:
1343  case locktag_nested_queuing:
1344  case locktag_nested_drdpa:
1345  return kmp_mutex_impl_queuing;
1346  default:
1347  return kmp_mutex_impl_none;
1348  }
1349 }
1350 #else
1351 // For locks without dynamic binding
1352 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1353  switch (__kmp_user_lock_kind) {
1354  case lk_tas:
1355  return kmp_mutex_impl_spin;
1356 #if KMP_USE_FUTEX
1357  case lk_futex:
1358 #endif
1359  case lk_ticket:
1360  case lk_queuing:
1361  case lk_drdpa:
1362  return kmp_mutex_impl_queuing;
1363 #if KMP_USE_TSX
1364  case lk_hle:
1365  case lk_rtm:
1366  case lk_adaptive:
1367  return kmp_mutex_impl_speculative;
1368 #endif
1369  default:
1370  return kmp_mutex_impl_none;
1371  }
1372 }
1373 #endif // KMP_USE_DYNAMIC_LOCK
1374 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1375 
1389 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1390  kmp_critical_name *crit, uint32_t hint) {
1391  KMP_COUNT_BLOCK(OMP_CRITICAL);
1392  kmp_user_lock_p lck;
1393 #if OMPT_SUPPORT && OMPT_OPTIONAL
1394  ompt_state_t prev_state = ompt_state_undefined;
1395  ompt_thread_info_t ti;
1396  // This is the case, if called from __kmpc_critical:
1397  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1398  if (!codeptr)
1399  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1400 #endif
1401 
1402  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1403 
1404  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1405  // Check if it is initialized.
1406  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1407  if (*lk == 0) {
1408  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1409  if (KMP_IS_D_LOCK(lckseq)) {
1410  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1411  KMP_GET_D_TAG(lckseq));
1412  } else {
1413  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1414  }
1415  }
1416  // Branch for accessing the actual lock object and set operation. This
1417  // branching is inevitable since this lock initialization does not follow the
1418  // normal dispatch path (lock table is not used).
1419  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1420  lck = (kmp_user_lock_p)lk;
1421  if (__kmp_env_consistency_check) {
1422  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1423  __kmp_map_hint_to_lock(hint));
1424  }
1425 #if USE_ITT_BUILD
1426  __kmp_itt_critical_acquiring(lck);
1427 #endif
1428 #if OMPT_SUPPORT && OMPT_OPTIONAL
1429  if (ompt_enabled.enabled) {
1430  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1431  /* OMPT state update */
1432  prev_state = ti.state;
1433  ti.wait_id = (ompt_wait_id_t)lck;
1434  ti.state = ompt_state_wait_critical;
1435 
1436  /* OMPT event callback */
1437  if (ompt_enabled.ompt_callback_mutex_acquire) {
1438  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1439  ompt_mutex_critical, (unsigned int)hint,
1440  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1441  }
1442  }
1443 #endif
1444 #if KMP_USE_INLINED_TAS
1445  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1446  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1447  } else
1448 #elif KMP_USE_INLINED_FUTEX
1449  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1450  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1451  } else
1452 #endif
1453  {
1454  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1455  }
1456  } else {
1457  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1458  lck = ilk->lock;
1459  if (__kmp_env_consistency_check) {
1460  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1461  __kmp_map_hint_to_lock(hint));
1462  }
1463 #if USE_ITT_BUILD
1464  __kmp_itt_critical_acquiring(lck);
1465 #endif
1466 #if OMPT_SUPPORT && OMPT_OPTIONAL
1467  if (ompt_enabled.enabled) {
1468  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1469  /* OMPT state update */
1470  prev_state = ti.state;
1471  ti.wait_id = (ompt_wait_id_t)lck;
1472  ti.state = ompt_state_wait_critical;
1473 
1474  /* OMPT event callback */
1475  if (ompt_enabled.ompt_callback_mutex_acquire) {
1476  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1477  ompt_mutex_critical, (unsigned int)hint,
1478  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1479  }
1480  }
1481 #endif
1482  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1483  }
1484  KMP_POP_PARTITIONED_TIMER();
1485 
1486 #if USE_ITT_BUILD
1487  __kmp_itt_critical_acquired(lck);
1488 #endif /* USE_ITT_BUILD */
1489 #if OMPT_SUPPORT && OMPT_OPTIONAL
1490  if (ompt_enabled.enabled) {
1491  /* OMPT state update */
1492  ti.state = prev_state;
1493  ti.wait_id = 0;
1494 
1495  /* OMPT event callback */
1496  if (ompt_enabled.ompt_callback_mutex_acquired) {
1497  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1498  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1499  }
1500  }
1501 #endif
1502 
1503  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1504  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1505 } // __kmpc_critical_with_hint
1506 
1507 #endif // KMP_USE_DYNAMIC_LOCK
1508 
1518 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1519  kmp_critical_name *crit) {
1520  kmp_user_lock_p lck;
1521 
1522  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1523 
1524 #if KMP_USE_DYNAMIC_LOCK
1525  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1526  lck = (kmp_user_lock_p)crit;
1527  KMP_ASSERT(lck != NULL);
1528  if (__kmp_env_consistency_check) {
1529  __kmp_pop_sync(global_tid, ct_critical, loc);
1530  }
1531 #if USE_ITT_BUILD
1532  __kmp_itt_critical_releasing(lck);
1533 #endif
1534 #if KMP_USE_INLINED_TAS
1535  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1536  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1537  } else
1538 #elif KMP_USE_INLINED_FUTEX
1539  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1540  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1541  } else
1542 #endif
1543  {
1544  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1545  }
1546  } else {
1547  kmp_indirect_lock_t *ilk =
1548  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1549  KMP_ASSERT(ilk != NULL);
1550  lck = ilk->lock;
1551  if (__kmp_env_consistency_check) {
1552  __kmp_pop_sync(global_tid, ct_critical, loc);
1553  }
1554 #if USE_ITT_BUILD
1555  __kmp_itt_critical_releasing(lck);
1556 #endif
1557  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1558  }
1559 
1560 #else // KMP_USE_DYNAMIC_LOCK
1561 
1562  if ((__kmp_user_lock_kind == lk_tas) &&
1563  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1564  lck = (kmp_user_lock_p)crit;
1565  }
1566 #if KMP_USE_FUTEX
1567  else if ((__kmp_user_lock_kind == lk_futex) &&
1568  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1569  lck = (kmp_user_lock_p)crit;
1570  }
1571 #endif
1572  else { // ticket, queuing or drdpa
1573  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1574  }
1575 
1576  KMP_ASSERT(lck != NULL);
1577 
1578  if (__kmp_env_consistency_check)
1579  __kmp_pop_sync(global_tid, ct_critical, loc);
1580 
1581 #if USE_ITT_BUILD
1582  __kmp_itt_critical_releasing(lck);
1583 #endif /* USE_ITT_BUILD */
1584  // Value of 'crit' should be good for using as a critical_id of the critical
1585  // section directive.
1586  __kmp_release_user_lock_with_checks(lck, global_tid);
1587 
1588 #endif // KMP_USE_DYNAMIC_LOCK
1589 
1590 #if OMPT_SUPPORT && OMPT_OPTIONAL
1591  /* OMPT release event triggers after lock is released; place here to trigger
1592  * for all #if branches */
1593  OMPT_STORE_RETURN_ADDRESS(global_tid);
1594  if (ompt_enabled.ompt_callback_mutex_released) {
1595  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1596  ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1597  }
1598 #endif
1599 
1600  KMP_POP_PARTITIONED_TIMER();
1601  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1602 }
1603 
1613 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1614  int status;
1615 
1616  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1617 
1618  if (!TCR_4(__kmp_init_parallel))
1619  __kmp_parallel_initialize();
1620 
1621 #if OMP_50_ENABLED
1622  __kmp_resume_if_soft_paused();
1623 #endif
1624 
1625  if (__kmp_env_consistency_check)
1626  __kmp_check_barrier(global_tid, ct_barrier, loc);
1627 
1628 #if OMPT_SUPPORT
1629  ompt_frame_t *ompt_frame;
1630  if (ompt_enabled.enabled) {
1631  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1632  if (ompt_frame->enter_frame.ptr == NULL)
1633  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1634  OMPT_STORE_RETURN_ADDRESS(global_tid);
1635  }
1636 #endif
1637 #if USE_ITT_NOTIFY
1638  __kmp_threads[global_tid]->th.th_ident = loc;
1639 #endif
1640  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1641 #if OMPT_SUPPORT && OMPT_OPTIONAL
1642  if (ompt_enabled.enabled) {
1643  ompt_frame->enter_frame = ompt_data_none;
1644  }
1645 #endif
1646 
1647  return (status != 0) ? 0 : 1;
1648 }
1649 
1659 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1660  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1661 
1662  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1663 }
1664 
1675 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1676  kmp_int32 ret;
1677 
1678  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1679 
1680  if (!TCR_4(__kmp_init_parallel))
1681  __kmp_parallel_initialize();
1682 
1683 #if OMP_50_ENABLED
1684  __kmp_resume_if_soft_paused();
1685 #endif
1686 
1687  if (__kmp_env_consistency_check) {
1688  if (loc == 0) {
1689  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1690  }
1691  __kmp_check_barrier(global_tid, ct_barrier, loc);
1692  }
1693 
1694 #if OMPT_SUPPORT
1695  ompt_frame_t *ompt_frame;
1696  if (ompt_enabled.enabled) {
1697  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1698  if (ompt_frame->enter_frame.ptr == NULL)
1699  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1700  OMPT_STORE_RETURN_ADDRESS(global_tid);
1701  }
1702 #endif
1703 #if USE_ITT_NOTIFY
1704  __kmp_threads[global_tid]->th.th_ident = loc;
1705 #endif
1706  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1707 #if OMPT_SUPPORT && OMPT_OPTIONAL
1708  if (ompt_enabled.enabled) {
1709  ompt_frame->enter_frame = ompt_data_none;
1710  }
1711 #endif
1712 
1713  ret = __kmpc_master(loc, global_tid);
1714 
1715  if (__kmp_env_consistency_check) {
1716  /* there's no __kmpc_end_master called; so the (stats) */
1717  /* actions of __kmpc_end_master are done here */
1718 
1719  if (global_tid < 0) {
1720  KMP_WARNING(ThreadIdentInvalid);
1721  }
1722  if (ret) {
1723  /* only one thread should do the pop since only */
1724  /* one did the push (see __kmpc_master()) */
1725 
1726  __kmp_pop_sync(global_tid, ct_master, loc);
1727  }
1728  }
1729 
1730  return (ret);
1731 }
1732 
1733 /* The BARRIER for a SINGLE process section is always explicit */
1745 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1746  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1747 
1748  if (rc) {
1749  // We are going to execute the single statement, so we should count it.
1750  KMP_COUNT_BLOCK(OMP_SINGLE);
1751  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1752  }
1753 
1754 #if OMPT_SUPPORT && OMPT_OPTIONAL
1755  kmp_info_t *this_thr = __kmp_threads[global_tid];
1756  kmp_team_t *team = this_thr->th.th_team;
1757  int tid = __kmp_tid_from_gtid(global_tid);
1758 
1759  if (ompt_enabled.enabled) {
1760  if (rc) {
1761  if (ompt_enabled.ompt_callback_work) {
1762  ompt_callbacks.ompt_callback(ompt_callback_work)(
1763  ompt_work_single_executor, ompt_scope_begin,
1764  &(team->t.ompt_team_info.parallel_data),
1765  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1766  1, OMPT_GET_RETURN_ADDRESS(0));
1767  }
1768  } else {
1769  if (ompt_enabled.ompt_callback_work) {
1770  ompt_callbacks.ompt_callback(ompt_callback_work)(
1771  ompt_work_single_other, ompt_scope_begin,
1772  &(team->t.ompt_team_info.parallel_data),
1773  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1774  1, OMPT_GET_RETURN_ADDRESS(0));
1775  ompt_callbacks.ompt_callback(ompt_callback_work)(
1776  ompt_work_single_other, ompt_scope_end,
1777  &(team->t.ompt_team_info.parallel_data),
1778  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1779  1, OMPT_GET_RETURN_ADDRESS(0));
1780  }
1781  }
1782  }
1783 #endif
1784 
1785  return rc;
1786 }
1787 
1797 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1798  __kmp_exit_single(global_tid);
1799  KMP_POP_PARTITIONED_TIMER();
1800 
1801 #if OMPT_SUPPORT && OMPT_OPTIONAL
1802  kmp_info_t *this_thr = __kmp_threads[global_tid];
1803  kmp_team_t *team = this_thr->th.th_team;
1804  int tid = __kmp_tid_from_gtid(global_tid);
1805 
1806  if (ompt_enabled.ompt_callback_work) {
1807  ompt_callbacks.ompt_callback(ompt_callback_work)(
1808  ompt_work_single_executor, ompt_scope_end,
1809  &(team->t.ompt_team_info.parallel_data),
1810  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1811  OMPT_GET_RETURN_ADDRESS(0));
1812  }
1813 #endif
1814 }
1815 
1823 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1824  KMP_POP_PARTITIONED_TIMER();
1825  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1826 
1827 #if OMPT_SUPPORT && OMPT_OPTIONAL
1828  if (ompt_enabled.ompt_callback_work) {
1829  ompt_work_t ompt_work_type = ompt_work_loop;
1830  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1831  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1832  // Determine workshare type
1833  if (loc != NULL) {
1834  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1835  ompt_work_type = ompt_work_loop;
1836  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1837  ompt_work_type = ompt_work_sections;
1838  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1839  ompt_work_type = ompt_work_distribute;
1840  } else {
1841  // use default set above.
1842  // a warning about this case is provided in __kmpc_for_static_init
1843  }
1844  KMP_DEBUG_ASSERT(ompt_work_type);
1845  }
1846  ompt_callbacks.ompt_callback(ompt_callback_work)(
1847  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1848  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1849  }
1850 #endif
1851  if (__kmp_env_consistency_check)
1852  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1853 }
1854 
1855 // User routines which take C-style arguments (call by value)
1856 // different from the Fortran equivalent routines
1857 
1858 void ompc_set_num_threads(int arg) {
1859  // !!!!! TODO: check the per-task binding
1860  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1861 }
1862 
1863 void ompc_set_dynamic(int flag) {
1864  kmp_info_t *thread;
1865 
1866  /* For the thread-private implementation of the internal controls */
1867  thread = __kmp_entry_thread();
1868 
1869  __kmp_save_internal_controls(thread);
1870 
1871  set__dynamic(thread, flag ? TRUE : FALSE);
1872 }
1873 
1874 void ompc_set_nested(int flag) {
1875  kmp_info_t *thread;
1876 
1877  /* For the thread-private internal controls implementation */
1878  thread = __kmp_entry_thread();
1879 
1880  __kmp_save_internal_controls(thread);
1881 
1882  set__nested(thread, flag ? TRUE : FALSE);
1883 }
1884 
1885 void ompc_set_max_active_levels(int max_active_levels) {
1886  /* TO DO */
1887  /* we want per-task implementation of this internal control */
1888 
1889  /* For the per-thread internal controls implementation */
1890  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1891 }
1892 
1893 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1894  // !!!!! TODO: check the per-task binding
1895  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1896 }
1897 
1898 int ompc_get_ancestor_thread_num(int level) {
1899  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1900 }
1901 
1902 int ompc_get_team_size(int level) {
1903  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1904 }
1905 
1906 #if OMP_50_ENABLED
1907 /* OpenMP 5.0 Affinity Format API */
1908 
1909 void ompc_set_affinity_format(char const *format) {
1910  if (!__kmp_init_serial) {
1911  __kmp_serial_initialize();
1912  }
1913  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1914  format, KMP_STRLEN(format) + 1);
1915 }
1916 
1917 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1918  size_t format_size;
1919  if (!__kmp_init_serial) {
1920  __kmp_serial_initialize();
1921  }
1922  format_size = KMP_STRLEN(__kmp_affinity_format);
1923  if (buffer && size) {
1924  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1925  format_size + 1);
1926  }
1927  return format_size;
1928 }
1929 
1930 void ompc_display_affinity(char const *format) {
1931  int gtid;
1932  if (!TCR_4(__kmp_init_middle)) {
1933  __kmp_middle_initialize();
1934  }
1935  gtid = __kmp_get_gtid();
1936  __kmp_aux_display_affinity(gtid, format);
1937 }
1938 
1939 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1940  char const *format) {
1941  int gtid;
1942  size_t num_required;
1943  kmp_str_buf_t capture_buf;
1944  if (!TCR_4(__kmp_init_middle)) {
1945  __kmp_middle_initialize();
1946  }
1947  gtid = __kmp_get_gtid();
1948  __kmp_str_buf_init(&capture_buf);
1949  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1950  if (buffer && buf_size) {
1951  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1952  capture_buf.used + 1);
1953  }
1954  __kmp_str_buf_free(&capture_buf);
1955  return num_required;
1956 }
1957 #endif /* OMP_50_ENABLED */
1958 
1959 void kmpc_set_stacksize(int arg) {
1960  // __kmp_aux_set_stacksize initializes the library if needed
1961  __kmp_aux_set_stacksize(arg);
1962 }
1963 
1964 void kmpc_set_stacksize_s(size_t arg) {
1965  // __kmp_aux_set_stacksize initializes the library if needed
1966  __kmp_aux_set_stacksize(arg);
1967 }
1968 
1969 void kmpc_set_blocktime(int arg) {
1970  int gtid, tid;
1971  kmp_info_t *thread;
1972 
1973  gtid = __kmp_entry_gtid();
1974  tid = __kmp_tid_from_gtid(gtid);
1975  thread = __kmp_thread_from_gtid(gtid);
1976 
1977  __kmp_aux_set_blocktime(arg, thread, tid);
1978 }
1979 
1980 void kmpc_set_library(int arg) {
1981  // __kmp_user_set_library initializes the library if needed
1982  __kmp_user_set_library((enum library_type)arg);
1983 }
1984 
1985 void kmpc_set_defaults(char const *str) {
1986  // __kmp_aux_set_defaults initializes the library if needed
1987  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1988 }
1989 
1990 void kmpc_set_disp_num_buffers(int arg) {
1991  // ignore after initialization because some teams have already
1992  // allocated dispatch buffers
1993  if (__kmp_init_serial == 0 && arg > 0)
1994  __kmp_dispatch_num_buffers = arg;
1995 }
1996 
1997 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1999  return -1;
2000 #else
2001  if (!TCR_4(__kmp_init_middle)) {
2002  __kmp_middle_initialize();
2003  }
2004  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2005 #endif
2006 }
2007 
2008 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2010  return -1;
2011 #else
2012  if (!TCR_4(__kmp_init_middle)) {
2013  __kmp_middle_initialize();
2014  }
2015  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2016 #endif
2017 }
2018 
2019 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2020 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2021  return -1;
2022 #else
2023  if (!TCR_4(__kmp_init_middle)) {
2024  __kmp_middle_initialize();
2025  }
2026  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2027 #endif
2028 }
2029 
2030 /* -------------------------------------------------------------------------- */
2075 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2076  void *cpy_data, void (*cpy_func)(void *, void *),
2077  kmp_int32 didit) {
2078  void **data_ptr;
2079 
2080  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2081 
2082  KMP_MB();
2083 
2084  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2085 
2086  if (__kmp_env_consistency_check) {
2087  if (loc == 0) {
2088  KMP_WARNING(ConstructIdentInvalid);
2089  }
2090  }
2091 
2092  // ToDo: Optimize the following two barriers into some kind of split barrier
2093 
2094  if (didit)
2095  *data_ptr = cpy_data;
2096 
2097 #if OMPT_SUPPORT
2098  ompt_frame_t *ompt_frame;
2099  if (ompt_enabled.enabled) {
2100  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2101  if (ompt_frame->enter_frame.ptr == NULL)
2102  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2103  OMPT_STORE_RETURN_ADDRESS(gtid);
2104  }
2105 #endif
2106 /* This barrier is not a barrier region boundary */
2107 #if USE_ITT_NOTIFY
2108  __kmp_threads[gtid]->th.th_ident = loc;
2109 #endif
2110  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2111 
2112  if (!didit)
2113  (*cpy_func)(cpy_data, *data_ptr);
2114 
2115 // Consider next barrier a user-visible barrier for barrier region boundaries
2116 // Nesting checks are already handled by the single construct checks
2117 
2118 #if OMPT_SUPPORT
2119  if (ompt_enabled.enabled) {
2120  OMPT_STORE_RETURN_ADDRESS(gtid);
2121  }
2122 #endif
2123 #if USE_ITT_NOTIFY
2124  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2125 // tasks can overwrite the location)
2126 #endif
2127  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2128 #if OMPT_SUPPORT && OMPT_OPTIONAL
2129  if (ompt_enabled.enabled) {
2130  ompt_frame->enter_frame = ompt_data_none;
2131  }
2132 #endif
2133 }
2134 
2135 /* -------------------------------------------------------------------------- */
2136 
2137 #define INIT_LOCK __kmp_init_user_lock_with_checks
2138 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2139 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2140 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2141 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2142 #define ACQUIRE_NESTED_LOCK_TIMED \
2143  __kmp_acquire_nested_user_lock_with_checks_timed
2144 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2145 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2146 #define TEST_LOCK __kmp_test_user_lock_with_checks
2147 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2148 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2149 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2150 
2151 // TODO: Make check abort messages use location info & pass it into
2152 // with_checks routines
2153 
2154 #if KMP_USE_DYNAMIC_LOCK
2155 
2156 // internal lock initializer
2157 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2158  kmp_dyna_lockseq_t seq) {
2159  if (KMP_IS_D_LOCK(seq)) {
2160  KMP_INIT_D_LOCK(lock, seq);
2161 #if USE_ITT_BUILD
2162  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2163 #endif
2164  } else {
2165  KMP_INIT_I_LOCK(lock, seq);
2166 #if USE_ITT_BUILD
2167  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2168  __kmp_itt_lock_creating(ilk->lock, loc);
2169 #endif
2170  }
2171 }
2172 
2173 // internal nest lock initializer
2174 static __forceinline void
2175 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2176  kmp_dyna_lockseq_t seq) {
2177 #if KMP_USE_TSX
2178  // Don't have nested lock implementation for speculative locks
2179  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2180  seq = __kmp_user_lock_seq;
2181 #endif
2182  switch (seq) {
2183  case lockseq_tas:
2184  seq = lockseq_nested_tas;
2185  break;
2186 #if KMP_USE_FUTEX
2187  case lockseq_futex:
2188  seq = lockseq_nested_futex;
2189  break;
2190 #endif
2191  case lockseq_ticket:
2192  seq = lockseq_nested_ticket;
2193  break;
2194  case lockseq_queuing:
2195  seq = lockseq_nested_queuing;
2196  break;
2197  case lockseq_drdpa:
2198  seq = lockseq_nested_drdpa;
2199  break;
2200  default:
2201  seq = lockseq_nested_queuing;
2202  }
2203  KMP_INIT_I_LOCK(lock, seq);
2204 #if USE_ITT_BUILD
2205  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2206  __kmp_itt_lock_creating(ilk->lock, loc);
2207 #endif
2208 }
2209 
2210 /* initialize the lock with a hint */
2211 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2212  uintptr_t hint) {
2213  KMP_DEBUG_ASSERT(__kmp_init_serial);
2214  if (__kmp_env_consistency_check && user_lock == NULL) {
2215  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2216  }
2217 
2218  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2219 
2220 #if OMPT_SUPPORT && OMPT_OPTIONAL
2221  // This is the case, if called from omp_init_lock_with_hint:
2222  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2223  if (!codeptr)
2224  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2225  if (ompt_enabled.ompt_callback_lock_init) {
2226  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2227  ompt_mutex_lock, (omp_lock_hint_t)hint,
2228  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2229  codeptr);
2230  }
2231 #endif
2232 }
2233 
2234 /* initialize the lock with a hint */
2235 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2236  void **user_lock, uintptr_t hint) {
2237  KMP_DEBUG_ASSERT(__kmp_init_serial);
2238  if (__kmp_env_consistency_check && user_lock == NULL) {
2239  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2240  }
2241 
2242  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2243 
2244 #if OMPT_SUPPORT && OMPT_OPTIONAL
2245  // This is the case, if called from omp_init_lock_with_hint:
2246  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2247  if (!codeptr)
2248  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2249  if (ompt_enabled.ompt_callback_lock_init) {
2250  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2251  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2252  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2253  codeptr);
2254  }
2255 #endif
2256 }
2257 
2258 #endif // KMP_USE_DYNAMIC_LOCK
2259 
2260 /* initialize the lock */
2261 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2262 #if KMP_USE_DYNAMIC_LOCK
2263 
2264  KMP_DEBUG_ASSERT(__kmp_init_serial);
2265  if (__kmp_env_consistency_check && user_lock == NULL) {
2266  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2267  }
2268  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2269 
2270 #if OMPT_SUPPORT && OMPT_OPTIONAL
2271  // This is the case, if called from omp_init_lock_with_hint:
2272  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2273  if (!codeptr)
2274  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2275  if (ompt_enabled.ompt_callback_lock_init) {
2276  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2277  ompt_mutex_lock, omp_lock_hint_none,
2278  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2279  codeptr);
2280  }
2281 #endif
2282 
2283 #else // KMP_USE_DYNAMIC_LOCK
2284 
2285  static char const *const func = "omp_init_lock";
2286  kmp_user_lock_p lck;
2287  KMP_DEBUG_ASSERT(__kmp_init_serial);
2288 
2289  if (__kmp_env_consistency_check) {
2290  if (user_lock == NULL) {
2291  KMP_FATAL(LockIsUninitialized, func);
2292  }
2293  }
2294 
2295  KMP_CHECK_USER_LOCK_INIT();
2296 
2297  if ((__kmp_user_lock_kind == lk_tas) &&
2298  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2299  lck = (kmp_user_lock_p)user_lock;
2300  }
2301 #if KMP_USE_FUTEX
2302  else if ((__kmp_user_lock_kind == lk_futex) &&
2303  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2304  lck = (kmp_user_lock_p)user_lock;
2305  }
2306 #endif
2307  else {
2308  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2309  }
2310  INIT_LOCK(lck);
2311  __kmp_set_user_lock_location(lck, loc);
2312 
2313 #if OMPT_SUPPORT && OMPT_OPTIONAL
2314  // This is the case, if called from omp_init_lock_with_hint:
2315  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316  if (!codeptr)
2317  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318  if (ompt_enabled.ompt_callback_lock_init) {
2319  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2321  (ompt_wait_id_t)user_lock, codeptr);
2322  }
2323 #endif
2324 
2325 #if USE_ITT_BUILD
2326  __kmp_itt_lock_creating(lck);
2327 #endif /* USE_ITT_BUILD */
2328 
2329 #endif // KMP_USE_DYNAMIC_LOCK
2330 } // __kmpc_init_lock
2331 
2332 /* initialize the lock */
2333 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2334 #if KMP_USE_DYNAMIC_LOCK
2335 
2336  KMP_DEBUG_ASSERT(__kmp_init_serial);
2337  if (__kmp_env_consistency_check && user_lock == NULL) {
2338  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2339  }
2340  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2341 
2342 #if OMPT_SUPPORT && OMPT_OPTIONAL
2343  // This is the case, if called from omp_init_lock_with_hint:
2344  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2345  if (!codeptr)
2346  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2347  if (ompt_enabled.ompt_callback_lock_init) {
2348  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2349  ompt_mutex_nest_lock, omp_lock_hint_none,
2350  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2351  codeptr);
2352  }
2353 #endif
2354 
2355 #else // KMP_USE_DYNAMIC_LOCK
2356 
2357  static char const *const func = "omp_init_nest_lock";
2358  kmp_user_lock_p lck;
2359  KMP_DEBUG_ASSERT(__kmp_init_serial);
2360 
2361  if (__kmp_env_consistency_check) {
2362  if (user_lock == NULL) {
2363  KMP_FATAL(LockIsUninitialized, func);
2364  }
2365  }
2366 
2367  KMP_CHECK_USER_LOCK_INIT();
2368 
2369  if ((__kmp_user_lock_kind == lk_tas) &&
2370  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2371  OMP_NEST_LOCK_T_SIZE)) {
2372  lck = (kmp_user_lock_p)user_lock;
2373  }
2374 #if KMP_USE_FUTEX
2375  else if ((__kmp_user_lock_kind == lk_futex) &&
2376  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2377  OMP_NEST_LOCK_T_SIZE)) {
2378  lck = (kmp_user_lock_p)user_lock;
2379  }
2380 #endif
2381  else {
2382  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2383  }
2384 
2385  INIT_NESTED_LOCK(lck);
2386  __kmp_set_user_lock_location(lck, loc);
2387 
2388 #if OMPT_SUPPORT && OMPT_OPTIONAL
2389  // This is the case, if called from omp_init_lock_with_hint:
2390  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2391  if (!codeptr)
2392  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2393  if (ompt_enabled.ompt_callback_lock_init) {
2394  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2395  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2396  (ompt_wait_id_t)user_lock, codeptr);
2397  }
2398 #endif
2399 
2400 #if USE_ITT_BUILD
2401  __kmp_itt_lock_creating(lck);
2402 #endif /* USE_ITT_BUILD */
2403 
2404 #endif // KMP_USE_DYNAMIC_LOCK
2405 } // __kmpc_init_nest_lock
2406 
2407 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2408 #if KMP_USE_DYNAMIC_LOCK
2409 
2410 #if USE_ITT_BUILD
2411  kmp_user_lock_p lck;
2412  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2413  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2414  } else {
2415  lck = (kmp_user_lock_p)user_lock;
2416  }
2417  __kmp_itt_lock_destroyed(lck);
2418 #endif
2419 #if OMPT_SUPPORT && OMPT_OPTIONAL
2420  // This is the case, if called from omp_init_lock_with_hint:
2421  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2422  if (!codeptr)
2423  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2424  if (ompt_enabled.ompt_callback_lock_destroy) {
2425  kmp_user_lock_p lck;
2426  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2427  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2428  } else {
2429  lck = (kmp_user_lock_p)user_lock;
2430  }
2431  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2432  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2433  }
2434 #endif
2435  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2436 #else
2437  kmp_user_lock_p lck;
2438 
2439  if ((__kmp_user_lock_kind == lk_tas) &&
2440  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2441  lck = (kmp_user_lock_p)user_lock;
2442  }
2443 #if KMP_USE_FUTEX
2444  else if ((__kmp_user_lock_kind == lk_futex) &&
2445  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2446  lck = (kmp_user_lock_p)user_lock;
2447  }
2448 #endif
2449  else {
2450  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2451  }
2452 
2453 #if OMPT_SUPPORT && OMPT_OPTIONAL
2454  // This is the case, if called from omp_init_lock_with_hint:
2455  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2456  if (!codeptr)
2457  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2458  if (ompt_enabled.ompt_callback_lock_destroy) {
2459  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2460  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2461  }
2462 #endif
2463 
2464 #if USE_ITT_BUILD
2465  __kmp_itt_lock_destroyed(lck);
2466 #endif /* USE_ITT_BUILD */
2467  DESTROY_LOCK(lck);
2468 
2469  if ((__kmp_user_lock_kind == lk_tas) &&
2470  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2471  ;
2472  }
2473 #if KMP_USE_FUTEX
2474  else if ((__kmp_user_lock_kind == lk_futex) &&
2475  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2476  ;
2477  }
2478 #endif
2479  else {
2480  __kmp_user_lock_free(user_lock, gtid, lck);
2481  }
2482 #endif // KMP_USE_DYNAMIC_LOCK
2483 } // __kmpc_destroy_lock
2484 
2485 /* destroy the lock */
2486 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2487 #if KMP_USE_DYNAMIC_LOCK
2488 
2489 #if USE_ITT_BUILD
2490  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2491  __kmp_itt_lock_destroyed(ilk->lock);
2492 #endif
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2494  // This is the case, if called from omp_init_lock_with_hint:
2495  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2496  if (!codeptr)
2497  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498  if (ompt_enabled.ompt_callback_lock_destroy) {
2499  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2500  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2501  }
2502 #endif
2503  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2504 
2505 #else // KMP_USE_DYNAMIC_LOCK
2506 
2507  kmp_user_lock_p lck;
2508 
2509  if ((__kmp_user_lock_kind == lk_tas) &&
2510  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2511  OMP_NEST_LOCK_T_SIZE)) {
2512  lck = (kmp_user_lock_p)user_lock;
2513  }
2514 #if KMP_USE_FUTEX
2515  else if ((__kmp_user_lock_kind == lk_futex) &&
2516  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2517  OMP_NEST_LOCK_T_SIZE)) {
2518  lck = (kmp_user_lock_p)user_lock;
2519  }
2520 #endif
2521  else {
2522  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2523  }
2524 
2525 #if OMPT_SUPPORT && OMPT_OPTIONAL
2526  // This is the case, if called from omp_init_lock_with_hint:
2527  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2528  if (!codeptr)
2529  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2530  if (ompt_enabled.ompt_callback_lock_destroy) {
2531  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2532  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2533  }
2534 #endif
2535 
2536 #if USE_ITT_BUILD
2537  __kmp_itt_lock_destroyed(lck);
2538 #endif /* USE_ITT_BUILD */
2539 
2540  DESTROY_NESTED_LOCK(lck);
2541 
2542  if ((__kmp_user_lock_kind == lk_tas) &&
2543  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2544  OMP_NEST_LOCK_T_SIZE)) {
2545  ;
2546  }
2547 #if KMP_USE_FUTEX
2548  else if ((__kmp_user_lock_kind == lk_futex) &&
2549  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2550  OMP_NEST_LOCK_T_SIZE)) {
2551  ;
2552  }
2553 #endif
2554  else {
2555  __kmp_user_lock_free(user_lock, gtid, lck);
2556  }
2557 #endif // KMP_USE_DYNAMIC_LOCK
2558 } // __kmpc_destroy_nest_lock
2559 
2560 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2561  KMP_COUNT_BLOCK(OMP_set_lock);
2562 #if KMP_USE_DYNAMIC_LOCK
2563  int tag = KMP_EXTRACT_D_TAG(user_lock);
2564 #if USE_ITT_BUILD
2565  __kmp_itt_lock_acquiring(
2566  (kmp_user_lock_p)
2567  user_lock); // itt function will get to the right lock object.
2568 #endif
2569 #if OMPT_SUPPORT && OMPT_OPTIONAL
2570  // This is the case, if called from omp_init_lock_with_hint:
2571  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2572  if (!codeptr)
2573  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2574  if (ompt_enabled.ompt_callback_mutex_acquire) {
2575  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2576  ompt_mutex_lock, omp_lock_hint_none,
2577  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2578  codeptr);
2579  }
2580 #endif
2581 #if KMP_USE_INLINED_TAS
2582  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2583  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2584  } else
2585 #elif KMP_USE_INLINED_FUTEX
2586  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2587  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2588  } else
2589 #endif
2590  {
2591  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2592  }
2593 #if USE_ITT_BUILD
2594  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2595 #endif
2596 #if OMPT_SUPPORT && OMPT_OPTIONAL
2597  if (ompt_enabled.ompt_callback_mutex_acquired) {
2598  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2599  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2600  }
2601 #endif
2602 
2603 #else // KMP_USE_DYNAMIC_LOCK
2604 
2605  kmp_user_lock_p lck;
2606 
2607  if ((__kmp_user_lock_kind == lk_tas) &&
2608  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2609  lck = (kmp_user_lock_p)user_lock;
2610  }
2611 #if KMP_USE_FUTEX
2612  else if ((__kmp_user_lock_kind == lk_futex) &&
2613  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2614  lck = (kmp_user_lock_p)user_lock;
2615  }
2616 #endif
2617  else {
2618  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2619  }
2620 
2621 #if USE_ITT_BUILD
2622  __kmp_itt_lock_acquiring(lck);
2623 #endif /* USE_ITT_BUILD */
2624 #if OMPT_SUPPORT && OMPT_OPTIONAL
2625  // This is the case, if called from omp_init_lock_with_hint:
2626  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2627  if (!codeptr)
2628  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2629  if (ompt_enabled.ompt_callback_mutex_acquire) {
2630  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2631  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2632  (ompt_wait_id_t)lck, codeptr);
2633  }
2634 #endif
2635 
2636  ACQUIRE_LOCK(lck, gtid);
2637 
2638 #if USE_ITT_BUILD
2639  __kmp_itt_lock_acquired(lck);
2640 #endif /* USE_ITT_BUILD */
2641 
2642 #if OMPT_SUPPORT && OMPT_OPTIONAL
2643  if (ompt_enabled.ompt_callback_mutex_acquired) {
2644  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2645  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2646  }
2647 #endif
2648 
2649 #endif // KMP_USE_DYNAMIC_LOCK
2650 }
2651 
2652 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2653 #if KMP_USE_DYNAMIC_LOCK
2654 
2655 #if USE_ITT_BUILD
2656  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2657 #endif
2658 #if OMPT_SUPPORT && OMPT_OPTIONAL
2659  // This is the case, if called from omp_init_lock_with_hint:
2660  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2661  if (!codeptr)
2662  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2663  if (ompt_enabled.enabled) {
2664  if (ompt_enabled.ompt_callback_mutex_acquire) {
2665  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2666  ompt_mutex_nest_lock, omp_lock_hint_none,
2667  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2668  codeptr);
2669  }
2670  }
2671 #endif
2672  int acquire_status =
2673  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2674  (void) acquire_status;
2675 #if USE_ITT_BUILD
2676  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2677 #endif
2678 
2679 #if OMPT_SUPPORT && OMPT_OPTIONAL
2680  if (ompt_enabled.enabled) {
2681  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2682  if (ompt_enabled.ompt_callback_mutex_acquired) {
2683  // lock_first
2684  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2685  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2686  }
2687  } else {
2688  if (ompt_enabled.ompt_callback_nest_lock) {
2689  // lock_next
2690  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2691  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2692  }
2693  }
2694  }
2695 #endif
2696 
2697 #else // KMP_USE_DYNAMIC_LOCK
2698  int acquire_status;
2699  kmp_user_lock_p lck;
2700 
2701  if ((__kmp_user_lock_kind == lk_tas) &&
2702  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2703  OMP_NEST_LOCK_T_SIZE)) {
2704  lck = (kmp_user_lock_p)user_lock;
2705  }
2706 #if KMP_USE_FUTEX
2707  else if ((__kmp_user_lock_kind == lk_futex) &&
2708  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2709  OMP_NEST_LOCK_T_SIZE)) {
2710  lck = (kmp_user_lock_p)user_lock;
2711  }
2712 #endif
2713  else {
2714  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2715  }
2716 
2717 #if USE_ITT_BUILD
2718  __kmp_itt_lock_acquiring(lck);
2719 #endif /* USE_ITT_BUILD */
2720 #if OMPT_SUPPORT && OMPT_OPTIONAL
2721  // This is the case, if called from omp_init_lock_with_hint:
2722  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2723  if (!codeptr)
2724  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2725  if (ompt_enabled.enabled) {
2726  if (ompt_enabled.ompt_callback_mutex_acquire) {
2727  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2728  ompt_mutex_nest_lock, omp_lock_hint_none,
2729  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2730  }
2731  }
2732 #endif
2733 
2734  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2735 
2736 #if USE_ITT_BUILD
2737  __kmp_itt_lock_acquired(lck);
2738 #endif /* USE_ITT_BUILD */
2739 
2740 #if OMPT_SUPPORT && OMPT_OPTIONAL
2741  if (ompt_enabled.enabled) {
2742  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2743  if (ompt_enabled.ompt_callback_mutex_acquired) {
2744  // lock_first
2745  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2746  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2747  }
2748  } else {
2749  if (ompt_enabled.ompt_callback_nest_lock) {
2750  // lock_next
2751  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2752  ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2753  }
2754  }
2755  }
2756 #endif
2757 
2758 #endif // KMP_USE_DYNAMIC_LOCK
2759 }
2760 
2761 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2762 #if KMP_USE_DYNAMIC_LOCK
2763 
2764  int tag = KMP_EXTRACT_D_TAG(user_lock);
2765 #if USE_ITT_BUILD
2766  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2767 #endif
2768 #if KMP_USE_INLINED_TAS
2769  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2770  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2771  } else
2772 #elif KMP_USE_INLINED_FUTEX
2773  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2774  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2775  } else
2776 #endif
2777  {
2778  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2779  }
2780 
2781 #if OMPT_SUPPORT && OMPT_OPTIONAL
2782  // This is the case, if called from omp_init_lock_with_hint:
2783  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784  if (!codeptr)
2785  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786  if (ompt_enabled.ompt_callback_mutex_released) {
2787  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2788  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2789  }
2790 #endif
2791 
2792 #else // KMP_USE_DYNAMIC_LOCK
2793 
2794  kmp_user_lock_p lck;
2795 
2796  /* Can't use serial interval since not block structured */
2797  /* release the lock */
2798 
2799  if ((__kmp_user_lock_kind == lk_tas) &&
2800  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2801 #if KMP_OS_LINUX && \
2802  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2803 // "fast" path implemented to fix customer performance issue
2804 #if USE_ITT_BUILD
2805  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2806 #endif /* USE_ITT_BUILD */
2807  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2808  KMP_MB();
2809 
2810 #if OMPT_SUPPORT && OMPT_OPTIONAL
2811  // This is the case, if called from omp_init_lock_with_hint:
2812  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2813  if (!codeptr)
2814  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2815  if (ompt_enabled.ompt_callback_mutex_released) {
2816  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2817  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2818  }
2819 #endif
2820 
2821  return;
2822 #else
2823  lck = (kmp_user_lock_p)user_lock;
2824 #endif
2825  }
2826 #if KMP_USE_FUTEX
2827  else if ((__kmp_user_lock_kind == lk_futex) &&
2828  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2829  lck = (kmp_user_lock_p)user_lock;
2830  }
2831 #endif
2832  else {
2833  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2834  }
2835 
2836 #if USE_ITT_BUILD
2837  __kmp_itt_lock_releasing(lck);
2838 #endif /* USE_ITT_BUILD */
2839 
2840  RELEASE_LOCK(lck, gtid);
2841 
2842 #if OMPT_SUPPORT && OMPT_OPTIONAL
2843  // This is the case, if called from omp_init_lock_with_hint:
2844  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2845  if (!codeptr)
2846  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2847  if (ompt_enabled.ompt_callback_mutex_released) {
2848  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2849  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2850  }
2851 #endif
2852 
2853 #endif // KMP_USE_DYNAMIC_LOCK
2854 }
2855 
2856 /* release the lock */
2857 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2858 #if KMP_USE_DYNAMIC_LOCK
2859 
2860 #if USE_ITT_BUILD
2861  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2862 #endif
2863  int release_status =
2864  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2865  (void) release_status;
2866 
2867 #if OMPT_SUPPORT && OMPT_OPTIONAL
2868  // This is the case, if called from omp_init_lock_with_hint:
2869  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2870  if (!codeptr)
2871  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2872  if (ompt_enabled.enabled) {
2873  if (release_status == KMP_LOCK_RELEASED) {
2874  if (ompt_enabled.ompt_callback_mutex_released) {
2875  // release_lock_last
2876  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2877  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2878  }
2879  } else if (ompt_enabled.ompt_callback_nest_lock) {
2880  // release_lock_prev
2881  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2882  ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2883  }
2884  }
2885 #endif
2886 
2887 #else // KMP_USE_DYNAMIC_LOCK
2888 
2889  kmp_user_lock_p lck;
2890 
2891  /* Can't use serial interval since not block structured */
2892 
2893  if ((__kmp_user_lock_kind == lk_tas) &&
2894  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2895  OMP_NEST_LOCK_T_SIZE)) {
2896 #if KMP_OS_LINUX && \
2897  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2898  // "fast" path implemented to fix customer performance issue
2899  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2900 #if USE_ITT_BUILD
2901  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2902 #endif /* USE_ITT_BUILD */
2903 
2904 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905  int release_status = KMP_LOCK_STILL_HELD;
2906 #endif
2907 
2908  if (--(tl->lk.depth_locked) == 0) {
2909  TCW_4(tl->lk.poll, 0);
2910 #if OMPT_SUPPORT && OMPT_OPTIONAL
2911  release_status = KMP_LOCK_RELEASED;
2912 #endif
2913  }
2914  KMP_MB();
2915 
2916 #if OMPT_SUPPORT && OMPT_OPTIONAL
2917  // This is the case, if called from omp_init_lock_with_hint:
2918  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2919  if (!codeptr)
2920  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2921  if (ompt_enabled.enabled) {
2922  if (release_status == KMP_LOCK_RELEASED) {
2923  if (ompt_enabled.ompt_callback_mutex_released) {
2924  // release_lock_last
2925  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2926  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2927  }
2928  } else if (ompt_enabled.ompt_callback_nest_lock) {
2929  // release_lock_previous
2930  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2931  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2932  }
2933  }
2934 #endif
2935 
2936  return;
2937 #else
2938  lck = (kmp_user_lock_p)user_lock;
2939 #endif
2940  }
2941 #if KMP_USE_FUTEX
2942  else if ((__kmp_user_lock_kind == lk_futex) &&
2943  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2944  OMP_NEST_LOCK_T_SIZE)) {
2945  lck = (kmp_user_lock_p)user_lock;
2946  }
2947 #endif
2948  else {
2949  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2950  }
2951 
2952 #if USE_ITT_BUILD
2953  __kmp_itt_lock_releasing(lck);
2954 #endif /* USE_ITT_BUILD */
2955 
2956  int release_status;
2957  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2958 #if OMPT_SUPPORT && OMPT_OPTIONAL
2959  // This is the case, if called from omp_init_lock_with_hint:
2960  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2961  if (!codeptr)
2962  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2963  if (ompt_enabled.enabled) {
2964  if (release_status == KMP_LOCK_RELEASED) {
2965  if (ompt_enabled.ompt_callback_mutex_released) {
2966  // release_lock_last
2967  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2968  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2969  }
2970  } else if (ompt_enabled.ompt_callback_nest_lock) {
2971  // release_lock_previous
2972  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2973  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2974  }
2975  }
2976 #endif
2977 
2978 #endif // KMP_USE_DYNAMIC_LOCK
2979 }
2980 
2981 /* try to acquire the lock */
2982 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2983  KMP_COUNT_BLOCK(OMP_test_lock);
2984 
2985 #if KMP_USE_DYNAMIC_LOCK
2986  int rc;
2987  int tag = KMP_EXTRACT_D_TAG(user_lock);
2988 #if USE_ITT_BUILD
2989  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2990 #endif
2991 #if OMPT_SUPPORT && OMPT_OPTIONAL
2992  // This is the case, if called from omp_init_lock_with_hint:
2993  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2994  if (!codeptr)
2995  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2996  if (ompt_enabled.ompt_callback_mutex_acquire) {
2997  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2998  ompt_mutex_lock, omp_lock_hint_none,
2999  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3000  codeptr);
3001  }
3002 #endif
3003 #if KMP_USE_INLINED_TAS
3004  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3005  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3006  } else
3007 #elif KMP_USE_INLINED_FUTEX
3008  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3009  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3010  } else
3011 #endif
3012  {
3013  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3014  }
3015  if (rc) {
3016 #if USE_ITT_BUILD
3017  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3018 #endif
3019 #if OMPT_SUPPORT && OMPT_OPTIONAL
3020  if (ompt_enabled.ompt_callback_mutex_acquired) {
3021  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3022  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
3023  }
3024 #endif
3025  return FTN_TRUE;
3026  } else {
3027 #if USE_ITT_BUILD
3028  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3029 #endif
3030  return FTN_FALSE;
3031  }
3032 
3033 #else // KMP_USE_DYNAMIC_LOCK
3034 
3035  kmp_user_lock_p lck;
3036  int rc;
3037 
3038  if ((__kmp_user_lock_kind == lk_tas) &&
3039  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3040  lck = (kmp_user_lock_p)user_lock;
3041  }
3042 #if KMP_USE_FUTEX
3043  else if ((__kmp_user_lock_kind == lk_futex) &&
3044  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3045  lck = (kmp_user_lock_p)user_lock;
3046  }
3047 #endif
3048  else {
3049  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3050  }
3051 
3052 #if USE_ITT_BUILD
3053  __kmp_itt_lock_acquiring(lck);
3054 #endif /* USE_ITT_BUILD */
3055 #if OMPT_SUPPORT && OMPT_OPTIONAL
3056  // This is the case, if called from omp_init_lock_with_hint:
3057  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3058  if (!codeptr)
3059  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3060  if (ompt_enabled.ompt_callback_mutex_acquire) {
3061  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3062  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3063  (ompt_wait_id_t)lck, codeptr);
3064  }
3065 #endif
3066 
3067  rc = TEST_LOCK(lck, gtid);
3068 #if USE_ITT_BUILD
3069  if (rc) {
3070  __kmp_itt_lock_acquired(lck);
3071  } else {
3072  __kmp_itt_lock_cancelled(lck);
3073  }
3074 #endif /* USE_ITT_BUILD */
3075 #if OMPT_SUPPORT && OMPT_OPTIONAL
3076  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3077  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3078  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3079  }
3080 #endif
3081 
3082  return (rc ? FTN_TRUE : FTN_FALSE);
3083 
3084 /* Can't use serial interval since not block structured */
3085 
3086 #endif // KMP_USE_DYNAMIC_LOCK
3087 }
3088 
3089 /* try to acquire the lock */
3090 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3091 #if KMP_USE_DYNAMIC_LOCK
3092  int rc;
3093 #if USE_ITT_BUILD
3094  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3095 #endif
3096 #if OMPT_SUPPORT && OMPT_OPTIONAL
3097  // This is the case, if called from omp_init_lock_with_hint:
3098  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3099  if (!codeptr)
3100  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3101  if (ompt_enabled.ompt_callback_mutex_acquire) {
3102  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3103  ompt_mutex_nest_lock, omp_lock_hint_none,
3104  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3105  codeptr);
3106  }
3107 #endif
3108  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3109 #if USE_ITT_BUILD
3110  if (rc) {
3111  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3112  } else {
3113  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3114  }
3115 #endif
3116 #if OMPT_SUPPORT && OMPT_OPTIONAL
3117  if (ompt_enabled.enabled && rc) {
3118  if (rc == 1) {
3119  if (ompt_enabled.ompt_callback_mutex_acquired) {
3120  // lock_first
3121  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3122  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3123  }
3124  } else {
3125  if (ompt_enabled.ompt_callback_nest_lock) {
3126  // lock_next
3127  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3128  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3129  }
3130  }
3131  }
3132 #endif
3133  return rc;
3134 
3135 #else // KMP_USE_DYNAMIC_LOCK
3136 
3137  kmp_user_lock_p lck;
3138  int rc;
3139 
3140  if ((__kmp_user_lock_kind == lk_tas) &&
3141  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3142  OMP_NEST_LOCK_T_SIZE)) {
3143  lck = (kmp_user_lock_p)user_lock;
3144  }
3145 #if KMP_USE_FUTEX
3146  else if ((__kmp_user_lock_kind == lk_futex) &&
3147  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3148  OMP_NEST_LOCK_T_SIZE)) {
3149  lck = (kmp_user_lock_p)user_lock;
3150  }
3151 #endif
3152  else {
3153  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3154  }
3155 
3156 #if USE_ITT_BUILD
3157  __kmp_itt_lock_acquiring(lck);
3158 #endif /* USE_ITT_BUILD */
3159 
3160 #if OMPT_SUPPORT && OMPT_OPTIONAL
3161  // This is the case, if called from omp_init_lock_with_hint:
3162  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3163  if (!codeptr)
3164  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3165  if (ompt_enabled.enabled) &&
3166  ompt_enabled.ompt_callback_mutex_acquire) {
3167  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3168  ompt_mutex_nest_lock, omp_lock_hint_none,
3169  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3170  }
3171 #endif
3172 
3173  rc = TEST_NESTED_LOCK(lck, gtid);
3174 #if USE_ITT_BUILD
3175  if (rc) {
3176  __kmp_itt_lock_acquired(lck);
3177  } else {
3178  __kmp_itt_lock_cancelled(lck);
3179  }
3180 #endif /* USE_ITT_BUILD */
3181 #if OMPT_SUPPORT && OMPT_OPTIONAL
3182  if (ompt_enabled.enabled && rc) {
3183  if (rc == 1) {
3184  if (ompt_enabled.ompt_callback_mutex_acquired) {
3185  // lock_first
3186  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3187  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3188  }
3189  } else {
3190  if (ompt_enabled.ompt_callback_nest_lock) {
3191  // lock_next
3192  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3193  ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3194  }
3195  }
3196  }
3197 #endif
3198  return rc;
3199 
3200 /* Can't use serial interval since not block structured */
3201 
3202 #endif // KMP_USE_DYNAMIC_LOCK
3203 }
3204 
3205 // Interface to fast scalable reduce methods routines
3206 
3207 // keep the selected method in a thread local structure for cross-function
3208 // usage: will be used in __kmpc_end_reduce* functions;
3209 // another solution: to re-determine the method one more time in
3210 // __kmpc_end_reduce* functions (new prototype required then)
3211 // AT: which solution is better?
3212 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3213  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3214 
3215 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3216  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3217 
3218 // description of the packed_reduction_method variable: look at the macros in
3219 // kmp.h
3220 
3221 // used in a critical section reduce block
3222 static __forceinline void
3223 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3224  kmp_critical_name *crit) {
3225 
3226  // this lock was visible to a customer and to the threading profile tool as a
3227  // serial overhead span (although it's used for an internal purpose only)
3228  // why was it visible in previous implementation?
3229  // should we keep it visible in new reduce block?
3230  kmp_user_lock_p lck;
3231 
3232 #if KMP_USE_DYNAMIC_LOCK
3233 
3234  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3235  // Check if it is initialized.
3236  if (*lk == 0) {
3237  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3238  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3239  KMP_GET_D_TAG(__kmp_user_lock_seq));
3240  } else {
3241  __kmp_init_indirect_csptr(crit, loc, global_tid,
3242  KMP_GET_I_TAG(__kmp_user_lock_seq));
3243  }
3244  }
3245  // Branch for accessing the actual lock object and set operation. This
3246  // branching is inevitable since this lock initialization does not follow the
3247  // normal dispatch path (lock table is not used).
3248  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3249  lck = (kmp_user_lock_p)lk;
3250  KMP_DEBUG_ASSERT(lck != NULL);
3251  if (__kmp_env_consistency_check) {
3252  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3253  }
3254  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3255  } else {
3256  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3257  lck = ilk->lock;
3258  KMP_DEBUG_ASSERT(lck != NULL);
3259  if (__kmp_env_consistency_check) {
3260  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3261  }
3262  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3263  }
3264 
3265 #else // KMP_USE_DYNAMIC_LOCK
3266 
3267  // We know that the fast reduction code is only emitted by Intel compilers
3268  // with 32 byte critical sections. If there isn't enough space, then we
3269  // have to use a pointer.
3270  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3271  lck = (kmp_user_lock_p)crit;
3272  } else {
3273  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3274  }
3275  KMP_DEBUG_ASSERT(lck != NULL);
3276 
3277  if (__kmp_env_consistency_check)
3278  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3279 
3280  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3281 
3282 #endif // KMP_USE_DYNAMIC_LOCK
3283 }
3284 
3285 // used in a critical section reduce block
3286 static __forceinline void
3287 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3288  kmp_critical_name *crit) {
3289 
3290  kmp_user_lock_p lck;
3291 
3292 #if KMP_USE_DYNAMIC_LOCK
3293 
3294  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3295  lck = (kmp_user_lock_p)crit;
3296  if (__kmp_env_consistency_check)
3297  __kmp_pop_sync(global_tid, ct_critical, loc);
3298  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3299  } else {
3300  kmp_indirect_lock_t *ilk =
3301  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3302  if (__kmp_env_consistency_check)
3303  __kmp_pop_sync(global_tid, ct_critical, loc);
3304  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3305  }
3306 
3307 #else // KMP_USE_DYNAMIC_LOCK
3308 
3309  // We know that the fast reduction code is only emitted by Intel compilers
3310  // with 32 byte critical sections. If there isn't enough space, then we have
3311  // to use a pointer.
3312  if (__kmp_base_user_lock_size > 32) {
3313  lck = *((kmp_user_lock_p *)crit);
3314  KMP_ASSERT(lck != NULL);
3315  } else {
3316  lck = (kmp_user_lock_p)crit;
3317  }
3318 
3319  if (__kmp_env_consistency_check)
3320  __kmp_pop_sync(global_tid, ct_critical, loc);
3321 
3322  __kmp_release_user_lock_with_checks(lck, global_tid);
3323 
3324 #endif // KMP_USE_DYNAMIC_LOCK
3325 } // __kmp_end_critical_section_reduce_block
3326 
3327 #if OMP_40_ENABLED
3328 static __forceinline int
3329 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3330  int *task_state) {
3331  kmp_team_t *team;
3332 
3333  // Check if we are inside the teams construct?
3334  if (th->th.th_teams_microtask) {
3335  *team_p = team = th->th.th_team;
3336  if (team->t.t_level == th->th.th_teams_level) {
3337  // This is reduction at teams construct.
3338  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3339  // Let's swap teams temporarily for the reduction.
3340  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3341  th->th.th_team = team->t.t_parent;
3342  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3343  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3344  *task_state = th->th.th_task_state;
3345  th->th.th_task_state = 0;
3346 
3347  return 1;
3348  }
3349  }
3350  return 0;
3351 }
3352 
3353 static __forceinline void
3354 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3355  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3356  th->th.th_info.ds.ds_tid = 0;
3357  th->th.th_team = team;
3358  th->th.th_team_nproc = team->t.t_nproc;
3359  th->th.th_task_team = team->t.t_task_team[task_state];
3360  th->th.th_task_state = task_state;
3361 }
3362 #endif
3363 
3364 /* 2.a.i. Reduce Block without a terminating barrier */
3380 kmp_int32
3381 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3382  size_t reduce_size, void *reduce_data,
3383  void (*reduce_func)(void *lhs_data, void *rhs_data),
3384  kmp_critical_name *lck) {
3385 
3386  KMP_COUNT_BLOCK(REDUCE_nowait);
3387  int retval = 0;
3388  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3389 #if OMP_40_ENABLED
3390  kmp_info_t *th;
3391  kmp_team_t *team;
3392  int teams_swapped = 0, task_state;
3393 #endif
3394  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3395 
3396  // why do we need this initialization here at all?
3397  // Reduction clause can not be used as a stand-alone directive.
3398 
3399  // do not call __kmp_serial_initialize(), it will be called by
3400  // __kmp_parallel_initialize() if needed
3401  // possible detection of false-positive race by the threadchecker ???
3402  if (!TCR_4(__kmp_init_parallel))
3403  __kmp_parallel_initialize();
3404 
3405 #if OMP_50_ENABLED
3406  __kmp_resume_if_soft_paused();
3407 #endif
3408 
3409 // check correctness of reduce block nesting
3410 #if KMP_USE_DYNAMIC_LOCK
3411  if (__kmp_env_consistency_check)
3412  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3413 #else
3414  if (__kmp_env_consistency_check)
3415  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3416 #endif
3417 
3418 #if OMP_40_ENABLED
3419  th = __kmp_thread_from_gtid(global_tid);
3420  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3421 #endif // OMP_40_ENABLED
3422 
3423  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3424  // the value should be kept in a variable
3425  // the variable should be either a construct-specific or thread-specific
3426  // property, not a team specific property
3427  // (a thread can reach the next reduce block on the next construct, reduce
3428  // method may differ on the next construct)
3429  // an ident_t "loc" parameter could be used as a construct-specific property
3430  // (what if loc == 0?)
3431  // (if both construct-specific and team-specific variables were shared,
3432  // then unness extra syncs should be needed)
3433  // a thread-specific variable is better regarding two issues above (next
3434  // construct and extra syncs)
3435  // a thread-specific "th_local.reduction_method" variable is used currently
3436  // each thread executes 'determine' and 'set' lines (no need to execute by one
3437  // thread, to avoid unness extra syncs)
3438 
3439  packed_reduction_method = __kmp_determine_reduction_method(
3440  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3441  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3442 
3443  if (packed_reduction_method == critical_reduce_block) {
3444 
3445  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3446  retval = 1;
3447 
3448  } else if (packed_reduction_method == empty_reduce_block) {
3449 
3450  // usage: if team size == 1, no synchronization is required ( Intel
3451  // platforms only )
3452  retval = 1;
3453 
3454  } else if (packed_reduction_method == atomic_reduce_block) {
3455 
3456  retval = 2;
3457 
3458  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3459  // won't be called by the code gen)
3460  // (it's not quite good, because the checking block has been closed by
3461  // this 'pop',
3462  // but atomic operation has not been executed yet, will be executed
3463  // slightly later, literally on next instruction)
3464  if (__kmp_env_consistency_check)
3465  __kmp_pop_sync(global_tid, ct_reduce, loc);
3466 
3467  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3468  tree_reduce_block)) {
3469 
3470 // AT: performance issue: a real barrier here
3471 // AT: (if master goes slow, other threads are blocked here waiting for the
3472 // master to come and release them)
3473 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3474 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3475 // be confusing to a customer)
3476 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3477 // might go faster and be more in line with sense of NOWAIT
3478 // AT: TO DO: do epcc test and compare times
3479 
3480 // this barrier should be invisible to a customer and to the threading profile
3481 // tool (it's neither a terminating barrier nor customer's code, it's
3482 // used for an internal purpose)
3483 #if OMPT_SUPPORT
3484  // JP: can this barrier potentially leed to task scheduling?
3485  // JP: as long as there is a barrier in the implementation, OMPT should and
3486  // will provide the barrier events
3487  // so we set-up the necessary frame/return addresses.
3488  ompt_frame_t *ompt_frame;
3489  if (ompt_enabled.enabled) {
3490  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3491  if (ompt_frame->enter_frame.ptr == NULL)
3492  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3493  OMPT_STORE_RETURN_ADDRESS(global_tid);
3494  }
3495 #endif
3496 #if USE_ITT_NOTIFY
3497  __kmp_threads[global_tid]->th.th_ident = loc;
3498 #endif
3499  retval =
3500  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3501  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3502  retval = (retval != 0) ? (0) : (1);
3503 #if OMPT_SUPPORT && OMPT_OPTIONAL
3504  if (ompt_enabled.enabled) {
3505  ompt_frame->enter_frame = ompt_data_none;
3506  }
3507 #endif
3508 
3509  // all other workers except master should do this pop here
3510  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3511  if (__kmp_env_consistency_check) {
3512  if (retval == 0) {
3513  __kmp_pop_sync(global_tid, ct_reduce, loc);
3514  }
3515  }
3516 
3517  } else {
3518 
3519  // should never reach this block
3520  KMP_ASSERT(0); // "unexpected method"
3521  }
3522 #if OMP_40_ENABLED
3523  if (teams_swapped) {
3524  __kmp_restore_swapped_teams(th, team, task_state);
3525  }
3526 #endif
3527  KA_TRACE(
3528  10,
3529  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3530  global_tid, packed_reduction_method, retval));
3531 
3532  return retval;
3533 }
3534 
3543 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3544  kmp_critical_name *lck) {
3545 
3546  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3547 
3548  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3549 
3550  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3551 
3552  if (packed_reduction_method == critical_reduce_block) {
3553 
3554  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3555 
3556  } else if (packed_reduction_method == empty_reduce_block) {
3557 
3558  // usage: if team size == 1, no synchronization is required ( on Intel
3559  // platforms only )
3560 
3561  } else if (packed_reduction_method == atomic_reduce_block) {
3562 
3563  // neither master nor other workers should get here
3564  // (code gen does not generate this call in case 2: atomic reduce block)
3565  // actually it's better to remove this elseif at all;
3566  // after removal this value will checked by the 'else' and will assert
3567 
3568  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3569  tree_reduce_block)) {
3570 
3571  // only master gets here
3572 
3573  } else {
3574 
3575  // should never reach this block
3576  KMP_ASSERT(0); // "unexpected method"
3577  }
3578 
3579  if (__kmp_env_consistency_check)
3580  __kmp_pop_sync(global_tid, ct_reduce, loc);
3581 
3582  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3583  global_tid, packed_reduction_method));
3584 
3585  return;
3586 }
3587 
3588 /* 2.a.ii. Reduce Block with a terminating barrier */
3589 
3605 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3606  size_t reduce_size, void *reduce_data,
3607  void (*reduce_func)(void *lhs_data, void *rhs_data),
3608  kmp_critical_name *lck) {
3609  KMP_COUNT_BLOCK(REDUCE_wait);
3610  int retval = 0;
3611  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3612 #if OMP_40_ENABLED
3613  kmp_info_t *th;
3614  kmp_team_t *team;
3615  int teams_swapped = 0, task_state;
3616 #endif
3617 
3618  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3619 
3620  // why do we need this initialization here at all?
3621  // Reduction clause can not be a stand-alone directive.
3622 
3623  // do not call __kmp_serial_initialize(), it will be called by
3624  // __kmp_parallel_initialize() if needed
3625  // possible detection of false-positive race by the threadchecker ???
3626  if (!TCR_4(__kmp_init_parallel))
3627  __kmp_parallel_initialize();
3628 
3629 #if OMP_50_ENABLED
3630  __kmp_resume_if_soft_paused();
3631 #endif
3632 
3633 // check correctness of reduce block nesting
3634 #if KMP_USE_DYNAMIC_LOCK
3635  if (__kmp_env_consistency_check)
3636  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3637 #else
3638  if (__kmp_env_consistency_check)
3639  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3640 #endif
3641 
3642 #if OMP_40_ENABLED
3643  th = __kmp_thread_from_gtid(global_tid);
3644  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3645 #endif // OMP_40_ENABLED
3646 
3647  packed_reduction_method = __kmp_determine_reduction_method(
3648  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3649  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3650 
3651  if (packed_reduction_method == critical_reduce_block) {
3652 
3653  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3654  retval = 1;
3655 
3656  } else if (packed_reduction_method == empty_reduce_block) {
3657 
3658  // usage: if team size == 1, no synchronization is required ( Intel
3659  // platforms only )
3660  retval = 1;
3661 
3662  } else if (packed_reduction_method == atomic_reduce_block) {
3663 
3664  retval = 2;
3665 
3666  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667  tree_reduce_block)) {
3668 
3669 // case tree_reduce_block:
3670 // this barrier should be visible to a customer and to the threading profile
3671 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3672 #if OMPT_SUPPORT
3673  ompt_frame_t *ompt_frame;
3674  if (ompt_enabled.enabled) {
3675  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3676  if (ompt_frame->enter_frame.ptr == NULL)
3677  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3678  OMPT_STORE_RETURN_ADDRESS(global_tid);
3679  }
3680 #endif
3681 #if USE_ITT_NOTIFY
3682  __kmp_threads[global_tid]->th.th_ident =
3683  loc; // needed for correct notification of frames
3684 #endif
3685  retval =
3686  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3687  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3688  retval = (retval != 0) ? (0) : (1);
3689 #if OMPT_SUPPORT && OMPT_OPTIONAL
3690  if (ompt_enabled.enabled) {
3691  ompt_frame->enter_frame = ompt_data_none;
3692  }
3693 #endif
3694 
3695  // all other workers except master should do this pop here
3696  // ( none of other workers except master will enter __kmpc_end_reduce() )
3697  if (__kmp_env_consistency_check) {
3698  if (retval == 0) { // 0: all other workers; 1: master
3699  __kmp_pop_sync(global_tid, ct_reduce, loc);
3700  }
3701  }
3702 
3703  } else {
3704 
3705  // should never reach this block
3706  KMP_ASSERT(0); // "unexpected method"
3707  }
3708 #if OMP_40_ENABLED
3709  if (teams_swapped) {
3710  __kmp_restore_swapped_teams(th, team, task_state);
3711  }
3712 #endif
3713 
3714  KA_TRACE(10,
3715  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3716  global_tid, packed_reduction_method, retval));
3717 
3718  return retval;
3719 }
3720 
3731 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3732  kmp_critical_name *lck) {
3733 
3734  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3735 #if OMP_40_ENABLED
3736  kmp_info_t *th;
3737  kmp_team_t *team;
3738  int teams_swapped = 0, task_state;
3739 #endif
3740 
3741  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3742 
3743 #if OMP_40_ENABLED
3744  th = __kmp_thread_from_gtid(global_tid);
3745  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3746 #endif // OMP_40_ENABLED
3747 
3748  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3749 
3750  // this barrier should be visible to a customer and to the threading profile
3751  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3752 
3753  if (packed_reduction_method == critical_reduce_block) {
3754 
3755  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3756 
3757 // TODO: implicit barrier: should be exposed
3758 #if OMPT_SUPPORT
3759  ompt_frame_t *ompt_frame;
3760  if (ompt_enabled.enabled) {
3761  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3762  if (ompt_frame->enter_frame.ptr == NULL)
3763  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3764  OMPT_STORE_RETURN_ADDRESS(global_tid);
3765  }
3766 #endif
3767 #if USE_ITT_NOTIFY
3768  __kmp_threads[global_tid]->th.th_ident = loc;
3769 #endif
3770  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3771 #if OMPT_SUPPORT && OMPT_OPTIONAL
3772  if (ompt_enabled.enabled) {
3773  ompt_frame->enter_frame = ompt_data_none;
3774  }
3775 #endif
3776 
3777  } else if (packed_reduction_method == empty_reduce_block) {
3778 
3779 // usage: if team size==1, no synchronization is required (Intel platforms only)
3780 
3781 // TODO: implicit barrier: should be exposed
3782 #if OMPT_SUPPORT
3783  ompt_frame_t *ompt_frame;
3784  if (ompt_enabled.enabled) {
3785  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3786  if (ompt_frame->enter_frame.ptr == NULL)
3787  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3788  OMPT_STORE_RETURN_ADDRESS(global_tid);
3789  }
3790 #endif
3791 #if USE_ITT_NOTIFY
3792  __kmp_threads[global_tid]->th.th_ident = loc;
3793 #endif
3794  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3795 #if OMPT_SUPPORT && OMPT_OPTIONAL
3796  if (ompt_enabled.enabled) {
3797  ompt_frame->enter_frame = ompt_data_none;
3798  }
3799 #endif
3800 
3801  } else if (packed_reduction_method == atomic_reduce_block) {
3802 
3803 #if OMPT_SUPPORT
3804  ompt_frame_t *ompt_frame;
3805  if (ompt_enabled.enabled) {
3806  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3807  if (ompt_frame->enter_frame.ptr == NULL)
3808  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3809  OMPT_STORE_RETURN_ADDRESS(global_tid);
3810  }
3811 #endif
3812 // TODO: implicit barrier: should be exposed
3813 #if USE_ITT_NOTIFY
3814  __kmp_threads[global_tid]->th.th_ident = loc;
3815 #endif
3816  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3817 #if OMPT_SUPPORT && OMPT_OPTIONAL
3818  if (ompt_enabled.enabled) {
3819  ompt_frame->enter_frame = ompt_data_none;
3820  }
3821 #endif
3822 
3823  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3824  tree_reduce_block)) {
3825 
3826  // only master executes here (master releases all other workers)
3827  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3828  global_tid);
3829 
3830  } else {
3831 
3832  // should never reach this block
3833  KMP_ASSERT(0); // "unexpected method"
3834  }
3835 #if OMP_40_ENABLED
3836  if (teams_swapped) {
3837  __kmp_restore_swapped_teams(th, team, task_state);
3838  }
3839 #endif
3840 
3841  if (__kmp_env_consistency_check)
3842  __kmp_pop_sync(global_tid, ct_reduce, loc);
3843 
3844  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3845  global_tid, packed_reduction_method));
3846 
3847  return;
3848 }
3849 
3850 #undef __KMP_GET_REDUCTION_METHOD
3851 #undef __KMP_SET_REDUCTION_METHOD
3852 
3853 /* end of interface to fast scalable reduce routines */
3854 
3855 kmp_uint64 __kmpc_get_taskid() {
3856 
3857  kmp_int32 gtid;
3858  kmp_info_t *thread;
3859 
3860  gtid = __kmp_get_gtid();
3861  if (gtid < 0) {
3862  return 0;
3863  }
3864  thread = __kmp_thread_from_gtid(gtid);
3865  return thread->th.th_current_task->td_task_id;
3866 
3867 } // __kmpc_get_taskid
3868 
3869 kmp_uint64 __kmpc_get_parent_taskid() {
3870 
3871  kmp_int32 gtid;
3872  kmp_info_t *thread;
3873  kmp_taskdata_t *parent_task;
3874 
3875  gtid = __kmp_get_gtid();
3876  if (gtid < 0) {
3877  return 0;
3878  }
3879  thread = __kmp_thread_from_gtid(gtid);
3880  parent_task = thread->th.th_current_task->td_parent;
3881  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3882 
3883 } // __kmpc_get_parent_taskid
3884 
3885 #if OMP_45_ENABLED
3886 
3897 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3898  const struct kmp_dim *dims) {
3899  int j, idx;
3900  kmp_int64 last, trace_count;
3901  kmp_info_t *th = __kmp_threads[gtid];
3902  kmp_team_t *team = th->th.th_team;
3903  kmp_uint32 *flags;
3904  kmp_disp_t *pr_buf = th->th.th_dispatch;
3905  dispatch_shared_info_t *sh_buf;
3906 
3907  KA_TRACE(
3908  20,
3909  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3910  gtid, num_dims, !team->t.t_serialized));
3911  KMP_DEBUG_ASSERT(dims != NULL);
3912  KMP_DEBUG_ASSERT(num_dims > 0);
3913 
3914  if (team->t.t_serialized) {
3915  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3916  return; // no dependencies if team is serialized
3917  }
3918  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3919  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3920  // the next loop
3921  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3922 
3923  // Save bounds info into allocated private buffer
3924  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3925  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3926  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3927  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3928  pr_buf->th_doacross_info[0] =
3929  (kmp_int64)num_dims; // first element is number of dimensions
3930  // Save also address of num_done in order to access it later without knowing
3931  // the buffer index
3932  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3933  pr_buf->th_doacross_info[2] = dims[0].lo;
3934  pr_buf->th_doacross_info[3] = dims[0].up;
3935  pr_buf->th_doacross_info[4] = dims[0].st;
3936  last = 5;
3937  for (j = 1; j < num_dims; ++j) {
3938  kmp_int64
3939  range_length; // To keep ranges of all dimensions but the first dims[0]
3940  if (dims[j].st == 1) { // most common case
3941  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3942  range_length = dims[j].up - dims[j].lo + 1;
3943  } else {
3944  if (dims[j].st > 0) {
3945  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3946  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3947  } else { // negative increment
3948  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3949  range_length =
3950  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3951  }
3952  }
3953  pr_buf->th_doacross_info[last++] = range_length;
3954  pr_buf->th_doacross_info[last++] = dims[j].lo;
3955  pr_buf->th_doacross_info[last++] = dims[j].up;
3956  pr_buf->th_doacross_info[last++] = dims[j].st;
3957  }
3958 
3959  // Compute total trip count.
3960  // Start with range of dims[0] which we don't need to keep in the buffer.
3961  if (dims[0].st == 1) { // most common case
3962  trace_count = dims[0].up - dims[0].lo + 1;
3963  } else if (dims[0].st > 0) {
3964  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3965  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3966  } else { // negative increment
3967  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3968  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3969  }
3970  for (j = 1; j < num_dims; ++j) {
3971  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3972  }
3973  KMP_DEBUG_ASSERT(trace_count > 0);
3974 
3975  // Check if shared buffer is not occupied by other loop (idx -
3976  // __kmp_dispatch_num_buffers)
3977  if (idx != sh_buf->doacross_buf_idx) {
3978  // Shared buffer is occupied, wait for it to be free
3979  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3980  __kmp_eq_4, NULL);
3981  }
3982 #if KMP_32_BIT_ARCH
3983  // Check if we are the first thread. After the CAS the first thread gets 0,
3984  // others get 1 if initialization is in progress, allocated pointer otherwise.
3985  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3986  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3987  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3988 #else
3989  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3990  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3991 #endif
3992  if (flags == NULL) {
3993  // we are the first thread, allocate the array of flags
3994  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3995  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3996  KMP_MB();
3997  sh_buf->doacross_flags = flags;
3998  } else if (flags == (kmp_uint32 *)1) {
3999 #if KMP_32_BIT_ARCH
4000  // initialization is still in progress, need to wait
4001  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4002 #else
4003  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4004 #endif
4005  KMP_YIELD(TRUE);
4006  KMP_MB();
4007  } else {
4008  KMP_MB();
4009  }
4010  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4011  pr_buf->th_doacross_flags =
4012  sh_buf->doacross_flags; // save private copy in order to not
4013  // touch shared buffer on each iteration
4014  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4015 }
4016 
4017 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4018  kmp_int32 shft, num_dims, i;
4019  kmp_uint32 flag;
4020  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4021  kmp_info_t *th = __kmp_threads[gtid];
4022  kmp_team_t *team = th->th.th_team;
4023  kmp_disp_t *pr_buf;
4024  kmp_int64 lo, up, st;
4025 
4026  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4027  if (team->t.t_serialized) {
4028  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4029  return; // no dependencies if team is serialized
4030  }
4031 
4032  // calculate sequential iteration number and check out-of-bounds condition
4033  pr_buf = th->th.th_dispatch;
4034  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4035  num_dims = pr_buf->th_doacross_info[0];
4036  lo = pr_buf->th_doacross_info[2];
4037  up = pr_buf->th_doacross_info[3];
4038  st = pr_buf->th_doacross_info[4];
4039  if (st == 1) { // most common case
4040  if (vec[0] < lo || vec[0] > up) {
4041  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4042  "bounds [%lld,%lld]\n",
4043  gtid, vec[0], lo, up));
4044  return;
4045  }
4046  iter_number = vec[0] - lo;
4047  } else if (st > 0) {
4048  if (vec[0] < lo || vec[0] > up) {
4049  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4050  "bounds [%lld,%lld]\n",
4051  gtid, vec[0], lo, up));
4052  return;
4053  }
4054  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4055  } else { // negative increment
4056  if (vec[0] > lo || vec[0] < up) {
4057  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058  "bounds [%lld,%lld]\n",
4059  gtid, vec[0], lo, up));
4060  return;
4061  }
4062  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4063  }
4064  for (i = 1; i < num_dims; ++i) {
4065  kmp_int64 iter, ln;
4066  kmp_int32 j = i * 4;
4067  ln = pr_buf->th_doacross_info[j + 1];
4068  lo = pr_buf->th_doacross_info[j + 2];
4069  up = pr_buf->th_doacross_info[j + 3];
4070  st = pr_buf->th_doacross_info[j + 4];
4071  if (st == 1) {
4072  if (vec[i] < lo || vec[i] > up) {
4073  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4074  "bounds [%lld,%lld]\n",
4075  gtid, vec[i], lo, up));
4076  return;
4077  }
4078  iter = vec[i] - lo;
4079  } else if (st > 0) {
4080  if (vec[i] < lo || vec[i] > up) {
4081  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4082  "bounds [%lld,%lld]\n",
4083  gtid, vec[i], lo, up));
4084  return;
4085  }
4086  iter = (kmp_uint64)(vec[i] - lo) / st;
4087  } else { // st < 0
4088  if (vec[i] > lo || vec[i] < up) {
4089  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4090  "bounds [%lld,%lld]\n",
4091  gtid, vec[i], lo, up));
4092  return;
4093  }
4094  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4095  }
4096  iter_number = iter + ln * iter_number;
4097  }
4098  shft = iter_number % 32; // use 32-bit granularity
4099  iter_number >>= 5; // divided by 32
4100  flag = 1 << shft;
4101  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4102  KMP_YIELD(TRUE);
4103  }
4104  KMP_MB();
4105  KA_TRACE(20,
4106  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4107  gtid, (iter_number << 5) + shft));
4108 }
4109 
4110 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4111  kmp_int32 shft, num_dims, i;
4112  kmp_uint32 flag;
4113  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4114  kmp_info_t *th = __kmp_threads[gtid];
4115  kmp_team_t *team = th->th.th_team;
4116  kmp_disp_t *pr_buf;
4117  kmp_int64 lo, st;
4118 
4119  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4120  if (team->t.t_serialized) {
4121  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4122  return; // no dependencies if team is serialized
4123  }
4124 
4125  // calculate sequential iteration number (same as in "wait" but no
4126  // out-of-bounds checks)
4127  pr_buf = th->th.th_dispatch;
4128  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4129  num_dims = pr_buf->th_doacross_info[0];
4130  lo = pr_buf->th_doacross_info[2];
4131  st = pr_buf->th_doacross_info[4];
4132  if (st == 1) { // most common case
4133  iter_number = vec[0] - lo;
4134  } else if (st > 0) {
4135  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4136  } else { // negative increment
4137  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4138  }
4139  for (i = 1; i < num_dims; ++i) {
4140  kmp_int64 iter, ln;
4141  kmp_int32 j = i * 4;
4142  ln = pr_buf->th_doacross_info[j + 1];
4143  lo = pr_buf->th_doacross_info[j + 2];
4144  st = pr_buf->th_doacross_info[j + 4];
4145  if (st == 1) {
4146  iter = vec[i] - lo;
4147  } else if (st > 0) {
4148  iter = (kmp_uint64)(vec[i] - lo) / st;
4149  } else { // st < 0
4150  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4151  }
4152  iter_number = iter + ln * iter_number;
4153  }
4154  shft = iter_number % 32; // use 32-bit granularity
4155  iter_number >>= 5; // divided by 32
4156  flag = 1 << shft;
4157  KMP_MB();
4158  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4159  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4160  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4161  (iter_number << 5) + shft));
4162 }
4163 
4164 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4165  kmp_int32 num_done;
4166  kmp_info_t *th = __kmp_threads[gtid];
4167  kmp_team_t *team = th->th.th_team;
4168  kmp_disp_t *pr_buf = th->th.th_dispatch;
4169 
4170  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4171  if (team->t.t_serialized) {
4172  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4173  return; // nothing to do
4174  }
4175  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4176  if (num_done == th->th.th_team_nproc) {
4177  // we are the last thread, need to free shared resources
4178  int idx = pr_buf->th_doacross_buf_idx - 1;
4179  dispatch_shared_info_t *sh_buf =
4180  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4181  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4182  (kmp_int64)&sh_buf->doacross_num_done);
4183  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4184  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4185  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4186  sh_buf->doacross_flags = NULL;
4187  sh_buf->doacross_num_done = 0;
4188  sh_buf->doacross_buf_idx +=
4189  __kmp_dispatch_num_buffers; // free buffer for future re-use
4190  }
4191  // free private resources (need to keep buffer index forever)
4192  pr_buf->th_doacross_flags = NULL;
4193  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4194  pr_buf->th_doacross_info = NULL;
4195  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4196 }
4197 #endif
4198 
4199 #if OMP_50_ENABLED
4200 int __kmpc_get_target_offload(void) {
4201  if (!__kmp_init_serial) {
4202  __kmp_serial_initialize();
4203  }
4204  return __kmp_target_offload;
4205 }
4206 
4207 int __kmpc_pause_resource(kmp_pause_status_t level) {
4208  if (!__kmp_init_serial) {
4209  return 1; // Can't pause if runtime is not initialized
4210  }
4211  return __kmp_pause_resource(level);
4212 }
4213 #endif // OMP_50_ENABLED
4214 
4215 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1424
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:889
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:223
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:233
kmp_int32 flags
Definition: kmp.h:225