LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Modules Pages
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
48 template <typename P> class kmp_flag_native {
49  volatile P *loc;
50  flag_type t;
51 
52 public:
53  typedef P flag_t;
54  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55  volatile P *get() { return loc; }
56  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61 };
62 
66 template <typename P> class kmp_flag {
67  std::atomic<P>
68  *loc;
71 public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
77  std::atomic<P> *get() { return loc; }
81  void *get_void_p() { return RCAST(void *, loc); }
85  void set(std::atomic<P> *new_loc) { loc = new_loc; }
89  flag_type get_type() { return t; }
93  P load() { return loc->load(std::memory_order_acquire); }
97  void store(P val) { loc->store(val, std::memory_order_release); }
98  // Derived classes must provide the following:
99  /*
100  kmp_info_t * get_waiter(kmp_uint32 i);
101  kmp_uint32 get_num_waiters();
102  bool done_check();
103  bool done_check_val(P old_loc);
104  bool notdone_check();
105  P internal_release();
106  void suspend(int th_gtid);
107  void resume(int th_gtid);
108  P set_sleeping();
109  P unset_sleeping();
110  bool is_sleeping();
111  bool is_any_sleeping();
112  bool is_sleeping_val(P old_loc);
113  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114  int *thread_finished
115  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116  is_constrained);
117  */
118 };
119 
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123  ompt_state_t ompt_state,
124  ompt_data_t *tId) {
125  int ds_tid = this_thr->th.th_info.ds.ds_tid;
126  if (ompt_state == ompt_state_wait_barrier_implicit) {
127  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129  void *codeptr = NULL;
130  if (ompt_enabled.ompt_callback_sync_region_wait) {
131  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
133  }
134  if (ompt_enabled.ompt_callback_sync_region) {
135  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
136  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
137  }
138 #endif
139  if (!KMP_MASTER_TID(ds_tid)) {
140  if (ompt_enabled.ompt_callback_implicit_task) {
141  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
142  ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
143  }
144  // return to idle state
145  this_thr->th.ompt_thread_info.state = ompt_state_idle;
146  } else {
147  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
148  }
149  }
150 }
151 #endif
152 
153 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
154  calls __kmp_wait_* must make certain that another thread calls __kmp_release
155  to wake it back up to prevent deadlocks!
156 
157  NOTE: We may not belong to a team at this point. */
158 template <class C, int final_spin, bool cancellable = false,
159  bool sleepable = true>
160 static inline bool
161 __kmp_wait_template(kmp_info_t *this_thr,
162  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY
164  volatile void *spin = flag->get();
165 #endif
166  kmp_uint32 spins;
167  int th_gtid;
168  int tasks_completed = FALSE;
169  int oversubscribed;
170 #if !KMP_USE_MONITOR
171  kmp_uint64 poll_count;
172  kmp_uint64 hibernate_goal;
173 #else
174  kmp_uint32 hibernate;
175 #endif
176 
177  KMP_FSYNC_SPIN_INIT(spin, NULL);
178  if (flag->done_check()) {
179  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
180  return false;
181  }
182  th_gtid = this_thr->th.th_info.ds.ds_gtid;
183  if (cancellable) {
184  kmp_team_t *team = this_thr->th.th_team;
185  if (team && team->t.t_cancel_request == cancel_parallel)
186  return true;
187  }
188 #if KMP_OS_UNIX
189  if (final_spin)
190  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
191 #endif
192  KA_TRACE(20,
193  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
194 #if KMP_STATS_ENABLED
195  stats_state_e thread_state = KMP_GET_THREAD_STATE();
196 #endif
197 
198 /* OMPT Behavior:
199 THIS function is called from
200  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
201  these have join / fork behavior
202 
203  In these cases, we don't change the state or trigger events in THIS
204 function.
205  Events are triggered in the calling code (__kmp_barrier):
206 
207  state := ompt_state_overhead
208  barrier-begin
209  barrier-wait-begin
210  state := ompt_state_wait_barrier
211  call join-barrier-implementation (finally arrive here)
212  {}
213  call fork-barrier-implementation (finally arrive here)
214  {}
215  state := ompt_state_overhead
216  barrier-wait-end
217  barrier-end
218  state := ompt_state_work_parallel
219 
220 
221  __kmp_fork_barrier (after thread creation, before executing implicit task)
222  call fork-barrier-implementation (finally arrive here)
223  {} // worker arrive here with state = ompt_state_idle
224 
225 
226  __kmp_join_barrier (implicit barrier at end of parallel region)
227  state := ompt_state_barrier_implicit
228  barrier-begin
229  barrier-wait-begin
230  call join-barrier-implementation (finally arrive here
231 final_spin=FALSE)
232  {
233  }
234  __kmp_fork_barrier (implicit barrier at end of parallel region)
235  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
236 
237  Worker after task-team is finished:
238  barrier-wait-end
239  barrier-end
240  implicit-task-end
241  idle-begin
242  state := ompt_state_idle
243 
244  Before leaving, if state = ompt_state_idle
245  idle-end
246  state := ompt_state_overhead
247 */
248 #if OMPT_SUPPORT
249  ompt_state_t ompt_entry_state;
250  ompt_data_t *tId;
251  if (ompt_enabled.enabled) {
252  ompt_entry_state = this_thr->th.ompt_thread_info.state;
253  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
254  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
255  ompt_lw_taskteam_t *team =
256  this_thr->th.th_team->t.ompt_serialized_team_info;
257  if (team) {
258  tId = &(team->ompt_task_info.task_data);
259  } else {
260  tId = OMPT_CUR_TASK_DATA(this_thr);
261  }
262  } else {
263  tId = &(this_thr->th.ompt_thread_info.task_data);
264  }
265  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
266  this_thr->th.th_task_team == NULL)) {
267  // implicit task is done. Either no taskqueue, or task-team finished
268  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
269  }
270  }
271 #endif
272 
273  // Setup for waiting
274  KMP_INIT_YIELD(spins);
275 
276  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
277 #if OMP_50_ENABLED
278  || __kmp_pause_status == kmp_soft_paused
279 #endif
280  ) {
281 #if KMP_USE_MONITOR
282 // The worker threads cannot rely on the team struct existing at this point.
283 // Use the bt values cached in the thread struct instead.
284 #ifdef KMP_ADJUST_BLOCKTIME
285  if (
286 #if OMP_50_ENABLED
287  __kmp_pause_status == kmp_soft_paused ||
288 #endif
289  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
290  // Force immediate suspend if not set by user and more threads than
291  // available procs
292  hibernate = 0;
293  else
294  hibernate = this_thr->th.th_team_bt_intervals;
295 #else
296  hibernate = this_thr->th.th_team_bt_intervals;
297 #endif /* KMP_ADJUST_BLOCKTIME */
298 
299  /* If the blocktime is nonzero, we want to make sure that we spin wait for
300  the entirety of the specified #intervals, plus up to one interval more.
301  This increment make certain that this thread doesn't go to sleep too
302  soon. */
303  if (hibernate != 0)
304  hibernate++;
305 
306  // Add in the current time value.
307  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
308  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
309  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
310  hibernate - __kmp_global.g.g_time.dt.t_value));
311 #else
312 #if OMP_50_ENABLED
313  if (__kmp_pause_status == kmp_soft_paused) {
314  // Force immediate suspend
315  hibernate_goal = KMP_NOW();
316  } else
317 #endif
318  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
319  poll_count = 0;
320 #endif // KMP_USE_MONITOR
321  }
322 
323  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
324  KMP_MB();
325 
326  // Main wait spin loop
327  while (flag->notdone_check()) {
328  int in_pool;
329  kmp_task_team_t *task_team = NULL;
330  if (__kmp_tasking_mode != tskm_immediate_exec) {
331  task_team = this_thr->th.th_task_team;
332  /* If the thread's task team pointer is NULL, it means one of 3 things:
333  1) A newly-created thread is first being released by
334  __kmp_fork_barrier(), and its task team has not been set up yet.
335  2) All tasks have been executed to completion.
336  3) Tasking is off for this region. This could be because we are in a
337  serialized region (perhaps the outer one), or else tasking was manually
338  disabled (KMP_TASKING=0). */
339  if (task_team != NULL) {
340  if (TCR_SYNC_4(task_team->tt.tt_active)) {
341  if (KMP_TASKING_ENABLED(task_team))
342  flag->execute_tasks(
343  this_thr, th_gtid, final_spin,
344  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
345  else
346  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
347  } else {
348  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
349 #if OMPT_SUPPORT
350  // task-team is done now, other cases should be catched above
351  if (final_spin && ompt_enabled.enabled)
352  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
353 #endif
354  this_thr->th.th_task_team = NULL;
355  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
356  }
357  } else {
358  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
359  } // if
360  } // if
361 
362  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
363  if (TCR_4(__kmp_global.g.g_done)) {
364  if (__kmp_global.g.g_abort)
365  __kmp_abort_thread();
366  break;
367  }
368 
369  // If we are oversubscribed, or have waited a bit (and
370  // KMP_LIBRARY=throughput), then yield
371  // TODO: Should it be number of cores instead of thread contexts? Like:
372  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
373  // Need performance improvement data to make the change...
374  if (oversubscribed) {
375  KMP_YIELD(1);
376  } else {
377  KMP_YIELD_SPIN(spins);
378  }
379  // Check if this thread was transferred from a team
380  // to the thread pool (or vice-versa) while spinning.
381  in_pool = !!TCR_4(this_thr->th.th_in_pool);
382  if (in_pool != !!this_thr->th.th_active_in_pool) {
383  if (in_pool) { // Recently transferred from team to pool
384  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
385  this_thr->th.th_active_in_pool = TRUE;
386  /* Here, we cannot assert that:
387  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
388  __kmp_thread_pool_nth);
389  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
390  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
391  inc/dec'd asynchronously by the workers. The two can get out of sync
392  for brief periods of time. */
393  } else { // Recently transferred from pool to team
394  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
395  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
396  this_thr->th.th_active_in_pool = FALSE;
397  }
398  }
399 
400 #if KMP_STATS_ENABLED
401  // Check if thread has been signalled to idle state
402  // This indicates that the logical "join-barrier" has finished
403  if (this_thr->th.th_stats->isIdle() &&
404  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
405  KMP_SET_THREAD_STATE(IDLE);
406  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
407  }
408 #endif
409  // Check if the barrier surrounding this wait loop has been cancelled
410  if (cancellable) {
411  kmp_team_t *team = this_thr->th.th_team;
412  if (team && team->t.t_cancel_request == cancel_parallel)
413  break;
414  }
415 
416  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
417  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME
418 #if OMP_50_ENABLED
419  && __kmp_pause_status != kmp_soft_paused
420 #endif
421  )
422  continue;
423 
424  // Don't suspend if there is a likelihood of new tasks being spawned.
425  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
426  continue;
427 
428 #if KMP_USE_MONITOR
429  // If we have waited a bit more, fall asleep
430  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
431  continue;
432 #else
433  if (KMP_BLOCKING(hibernate_goal, poll_count++))
434  continue;
435 #endif
436  // Don't suspend if wait loop designated non-sleepable
437  // in template parameters
438  if (!sleepable)
439  continue;
440 
441 #if OMP_50_ENABLED
442  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
443  __kmp_pause_status != kmp_soft_paused)
444  continue;
445 #endif
446 
447  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
448 
449 #if KMP_OS_UNIX
450  if (final_spin)
451  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
452 #endif
453  flag->suspend(th_gtid);
454 #if KMP_OS_UNIX
455  if (final_spin)
456  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
457 #endif
458 
459  if (TCR_4(__kmp_global.g.g_done)) {
460  if (__kmp_global.g.g_abort)
461  __kmp_abort_thread();
462  break;
463  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
464  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
465  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
466  }
467  // TODO: If thread is done with work and times out, disband/free
468  }
469 
470 #if OMPT_SUPPORT
471  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
472  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
473 #if OMPT_OPTIONAL
474  if (final_spin) {
475  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
476  ompt_exit_state = this_thr->th.ompt_thread_info.state;
477  }
478 #endif
479  if (ompt_exit_state == ompt_state_idle) {
480  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
481  }
482  }
483 #endif
484 #if KMP_STATS_ENABLED
485  // If we were put into idle state, pop that off the state stack
486  if (KMP_GET_THREAD_STATE() == IDLE) {
487  KMP_POP_PARTITIONED_TIMER();
488  KMP_SET_THREAD_STATE(thread_state);
489  this_thr->th.th_stats->resetIdleFlag();
490  }
491 #endif
492 
493 #if KMP_OS_UNIX
494  if (final_spin)
495  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
496 #endif
497  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
498  if (cancellable) {
499  kmp_team_t *team = this_thr->th.th_team;
500  if (team && team->t.t_cancel_request == cancel_parallel) {
501  if (tasks_completed) {
502  // undo the previous decrement of unfinished_threads so that the
503  // thread can decrement at the join barrier with no problem
504  kmp_task_team_t *task_team = this_thr->th.th_task_team;
505  std::atomic<kmp_int32> *unfinished_threads =
506  &(task_team->tt.tt_unfinished_threads);
507  KMP_ATOMIC_INC(unfinished_threads);
508  }
509  return true;
510  }
511  }
512  return false;
513 }
514 
515 /* Release any threads specified as waiting on the flag by releasing the flag
516  and resume the waiting thread if indicated by the sleep bit(s). A thread that
517  calls __kmp_wait_template must call this function to wake up the potentially
518  sleeping thread and prevent deadlocks! */
519 template <class C> static inline void __kmp_release_template(C *flag) {
520 #ifdef KMP_DEBUG
521  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
522 #endif
523  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
524  KMP_DEBUG_ASSERT(flag->get());
525  KMP_FSYNC_RELEASING(flag->get_void_p());
526 
527  flag->internal_release();
528 
529  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
530  flag->load()));
531 
532  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
533  // Only need to check sleep stuff if infinite block time not set.
534  // Are *any* threads waiting on flag sleeping?
535  if (flag->is_any_sleeping()) {
536  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
537  // if sleeping waiter exists at i, sets current_waiter to i inside flag
538  kmp_info_t *waiter = flag->get_waiter(i);
539  if (waiter) {
540  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
541  // Wake up thread if needed
542  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
543  "flag(%p) set\n",
544  gtid, wait_gtid, flag->get()));
545  flag->resume(wait_gtid); // unsets flag's current_waiter when done
546  }
547  }
548  }
549  }
550 }
551 
552 template <typename FlagType> struct flag_traits {};
553 
554 template <> struct flag_traits<kmp_uint32> {
555  typedef kmp_uint32 flag_t;
556  static const flag_type t = flag32;
557  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
558  static inline flag_t test_then_add4(volatile flag_t *f) {
559  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
560  }
561  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
562  return KMP_TEST_THEN_OR32(f, v);
563  }
564  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
565  return KMP_TEST_THEN_AND32(f, v);
566  }
567 };
568 
569 template <> struct flag_traits<kmp_uint64> {
570  typedef kmp_uint64 flag_t;
571  static const flag_type t = flag64;
572  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
573  static inline flag_t test_then_add4(volatile flag_t *f) {
574  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
575  }
576  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
577  return KMP_TEST_THEN_OR64(f, v);
578  }
579  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
580  return KMP_TEST_THEN_AND64(f, v);
581  }
582 };
583 
584 // Basic flag that does not use C11 Atomics
585 template <typename FlagType>
586 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
587  typedef flag_traits<FlagType> traits_type;
588  FlagType checker;
590  kmp_info_t
591  *waiting_threads[1];
592  kmp_uint32
593  num_waiting_threads;
594 public:
595  kmp_basic_flag_native(volatile FlagType *p)
596  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
597  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
598  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
599  waiting_threads[0] = thr;
600  }
601  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
602  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
603  num_waiting_threads(0) {}
608  kmp_info_t *get_waiter(kmp_uint32 i) {
609  KMP_DEBUG_ASSERT(i < num_waiting_threads);
610  return waiting_threads[i];
611  }
615  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
621  void set_waiter(kmp_info_t *thr) {
622  waiting_threads[0] = thr;
623  num_waiting_threads = 1;
624  }
628  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
633  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
641  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
646  void internal_release() {
647  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
648  }
654  FlagType set_sleeping() {
655  return traits_type::test_then_or((volatile FlagType *)this->get(),
656  KMP_BARRIER_SLEEP_STATE);
657  }
663  FlagType unset_sleeping() {
664  return traits_type::test_then_and((volatile FlagType *)this->get(),
665  ~KMP_BARRIER_SLEEP_STATE);
666  }
671  bool is_sleeping_val(FlagType old_loc) {
672  return old_loc & KMP_BARRIER_SLEEP_STATE;
673  }
677  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
678  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
679  kmp_uint8 *get_stolen() { return NULL; }
680  enum barrier_type get_bt() { return bs_last_barrier; }
681 };
682 
683 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
684  typedef flag_traits<FlagType> traits_type;
685  FlagType checker;
687  kmp_info_t
688  *waiting_threads[1];
689  kmp_uint32
690  num_waiting_threads;
691 public:
692  kmp_basic_flag(std::atomic<FlagType> *p)
693  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
694  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
695  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
696  waiting_threads[0] = thr;
697  }
698  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
699  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
700  num_waiting_threads(0) {}
705  kmp_info_t *get_waiter(kmp_uint32 i) {
706  KMP_DEBUG_ASSERT(i < num_waiting_threads);
707  return waiting_threads[i];
708  }
712  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
718  void set_waiter(kmp_info_t *thr) {
719  waiting_threads[0] = thr;
720  num_waiting_threads = 1;
721  }
725  bool done_check() { return this->load() == checker; }
730  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
738  bool notdone_check() { return this->load() != checker; }
743  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
749  FlagType set_sleeping() {
750  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
751  }
757  FlagType unset_sleeping() {
758  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
759  }
764  bool is_sleeping_val(FlagType old_loc) {
765  return old_loc & KMP_BARRIER_SLEEP_STATE;
766  }
770  bool is_sleeping() { return is_sleeping_val(this->load()); }
771  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
772  kmp_uint8 *get_stolen() { return NULL; }
773  enum barrier_type get_bt() { return bs_last_barrier; }
774 };
775 
776 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
777 public:
778  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
779  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
780  : kmp_basic_flag<kmp_uint32>(p, thr) {}
781  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
782  : kmp_basic_flag<kmp_uint32>(p, c) {}
783  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
784  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
785  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
786  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
787  kmp_int32 is_constrained) {
788  return __kmp_execute_tasks_32(
789  this_thr, gtid, this, final_spin,
790  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
791  }
792  void wait(kmp_info_t *this_thr,
793  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
794  if (final_spin)
795  __kmp_wait_template<kmp_flag_32, TRUE>(
796  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
797  else
798  __kmp_wait_template<kmp_flag_32, FALSE>(
799  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800  }
801  void release() { __kmp_release_template(this); }
802  flag_type get_ptr_type() { return flag32; }
803 };
804 
805 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
806 public:
807  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
808  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
809  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
810  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
811  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
812  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
813  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
814  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
815  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
816  kmp_int32 is_constrained) {
817  return __kmp_execute_tasks_64(
818  this_thr, gtid, this, final_spin,
819  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
820  }
821  void wait(kmp_info_t *this_thr,
822  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
823  if (final_spin)
824  __kmp_wait_template<kmp_flag_64, TRUE>(
825  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
826  else
827  __kmp_wait_template<kmp_flag_64, FALSE>(
828  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
829  }
830  bool wait_cancellable_nosleep(kmp_info_t *this_thr,
831  int final_spin
832  USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
833  bool retval = false;
834  if (final_spin)
835  retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
836  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
837  else
838  retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
839  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
840  return retval;
841  }
842  void release() { __kmp_release_template(this); }
843  flag_type get_ptr_type() { return flag64; }
844 };
845 
846 // Hierarchical 64-bit on-core barrier instantiation
847 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
848  kmp_uint64 checker;
849  kmp_info_t *waiting_threads[1];
850  kmp_uint32 num_waiting_threads;
851  kmp_uint32
852  offset;
853  bool flag_switch;
854  enum barrier_type bt;
855  kmp_info_t *this_thr;
857 #if USE_ITT_BUILD
858  void *
859  itt_sync_obj;
860 #endif
861  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
862  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
863  }
864 
865 public:
866  kmp_flag_oncore(volatile kmp_uint64 *p)
867  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
868  flag_switch(false) {}
869  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
870  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
871  offset(idx), flag_switch(false) {}
872  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
873  enum barrier_type bar_t,
874  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
875  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
876  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
877  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
878  kmp_info_t *get_waiter(kmp_uint32 i) {
879  KMP_DEBUG_ASSERT(i < num_waiting_threads);
880  return waiting_threads[i];
881  }
882  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
883  void set_waiter(kmp_info_t *thr) {
884  waiting_threads[0] = thr;
885  num_waiting_threads = 1;
886  }
887  bool done_check_val(kmp_uint64 old_loc) {
888  return byteref(&old_loc, offset) == checker;
889  }
890  bool done_check() { return done_check_val(*get()); }
891  bool notdone_check() {
892  // Calculate flag_switch
893  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
894  flag_switch = true;
895  if (byteref(get(), offset) != 1 && !flag_switch)
896  return true;
897  else if (flag_switch) {
898  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
899  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
900  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
901  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
902  }
903  return false;
904  }
905  void internal_release() {
906  // Other threads can write their own bytes simultaneously.
907  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
908  byteref(get(), offset) = 1;
909  } else {
910  kmp_uint64 mask = 0;
911  byteref(&mask, offset) = 1;
912  KMP_TEST_THEN_OR64(get(), mask);
913  }
914  }
915  kmp_uint64 set_sleeping() {
916  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
917  }
918  kmp_uint64 unset_sleeping() {
919  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
920  }
921  bool is_sleeping_val(kmp_uint64 old_loc) {
922  return old_loc & KMP_BARRIER_SLEEP_STATE;
923  }
924  bool is_sleeping() { return is_sleeping_val(*get()); }
925  bool is_any_sleeping() { return is_sleeping_val(*get()); }
926  void wait(kmp_info_t *this_thr, int final_spin) {
927  if (final_spin)
928  __kmp_wait_template<kmp_flag_oncore, TRUE>(
929  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
930  else
931  __kmp_wait_template<kmp_flag_oncore, FALSE>(
932  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
933  }
934  void release() { __kmp_release_template(this); }
935  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
936  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
937  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
938  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
939  kmp_int32 is_constrained) {
940  return __kmp_execute_tasks_oncore(
941  this_thr, gtid, this, final_spin,
942  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
943  }
944  kmp_uint8 *get_stolen() { return NULL; }
945  enum barrier_type get_bt() { return bt; }
946  flag_type get_ptr_type() { return flag_oncore; }
947 };
948 
949 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
950 // associated with int gtid.
951 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
952  if (!flag)
953  return;
954 
955  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
956  case flag32:
957  __kmp_resume_32(gtid, NULL);
958  break;
959  case flag64:
960  __kmp_resume_64(gtid, NULL);
961  break;
962  case flag_oncore:
963  __kmp_resume_oncore(gtid, NULL);
964  break;
965  }
966 }
967 
972 #endif // KMP_WAIT_RELEASE_H
void set(std::atomic< P > *new_loc)
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63