LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
108  kmp_info_t *waiting_threads[1] = {nullptr};
109  kmp_uint32 num_waiting_threads;
110  std::atomic<bool> *sleepLoc;
111 
112 public:
113  typedef flag_traits<FlagType> traits_type;
114  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
115  kmp_flag(int nwaiters)
116  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
117  kmp_flag(std::atomic<bool> *sloc)
118  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
120  flag_type get_type() { return (flag_type)(t.type); }
121 
124  kmp_info_t *get_waiter(kmp_uint32 i) {
125  KMP_DEBUG_ASSERT(i < num_waiting_threads);
126  return waiting_threads[i];
127  }
129  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
132  void set_waiter(kmp_info_t *thr) {
133  waiting_threads[0] = thr;
135  }
136  enum barrier_type get_bt() { return bs_last_barrier; }
137 };
138 
140 template <typename PtrType, flag_type FlagType, bool Sleepable>
141 class kmp_flag_native : public kmp_flag<FlagType> {
142 protected:
143  volatile PtrType *loc;
144  PtrType checker = (PtrType)0;
145  typedef flag_traits<FlagType> traits_type;
146 
147 public:
148  typedef PtrType flag_t;
149  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
150  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
151  : kmp_flag<FlagType>(1), loc(p) {
152  this->waiting_threads[0] = thr;
153  }
154  kmp_flag_native(volatile PtrType *p, PtrType c)
155  : kmp_flag<FlagType>(), loc(p), checker(c) {}
156  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
157  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
158  virtual ~kmp_flag_native() {}
159  void *operator new(size_t size) { return __kmp_allocate(size); }
160  void operator delete(void *p) { __kmp_free(p); }
161  volatile PtrType *get() { return loc; }
162  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
163  void set(volatile PtrType *new_loc) { loc = new_loc; }
164  PtrType load() { return *loc; }
165  void store(PtrType val) { *loc = val; }
167  virtual bool done_check() {
168  if (Sleepable && !(this->sleepLoc))
169  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
170  checker;
171  else
172  return traits_type::tcr(*(this->get())) == checker;
173  }
176  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
182  virtual bool notdone_check() {
183  return traits_type::tcr(*(this->get())) != checker;
184  }
188  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
189  }
193  PtrType set_sleeping() {
194  if (this->sleepLoc) {
195  this->sleepLoc->store(true);
196  return *(this->get());
197  }
198  return traits_type::test_then_or((volatile PtrType *)this->get(),
199  KMP_BARRIER_SLEEP_STATE);
200  }
204  void unset_sleeping() {
205  if (this->sleepLoc) {
206  this->sleepLoc->store(false);
207  return;
208  }
209  traits_type::test_then_and((volatile PtrType *)this->get(),
210  ~KMP_BARRIER_SLEEP_STATE);
211  }
214  bool is_sleeping_val(PtrType old_loc) {
215  if (this->sleepLoc)
216  return this->sleepLoc->load();
217  return old_loc & KMP_BARRIER_SLEEP_STATE;
218  }
220  bool is_sleeping() {
221  if (this->sleepLoc)
222  return this->sleepLoc->load();
223  return is_sleeping_val(*(this->get()));
224  }
225  bool is_any_sleeping() {
226  if (this->sleepLoc)
227  return this->sleepLoc->load();
228  return is_sleeping_val(*(this->get()));
229  }
230  kmp_uint8 *get_stolen() { return NULL; }
231 };
232 
234 template <typename PtrType, flag_type FlagType, bool Sleepable>
235 class kmp_flag_atomic : public kmp_flag<FlagType> {
236 protected:
237  std::atomic<PtrType> *loc;
238  PtrType checker = (PtrType)0;
239 public:
240  typedef flag_traits<FlagType> traits_type;
241  typedef PtrType flag_t;
242  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
243  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
244  : kmp_flag<FlagType>(1), loc(p) {
245  this->waiting_threads[0] = thr;
246  }
247  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
248  : kmp_flag<FlagType>(), loc(p), checker(c) {}
249  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
250  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
252  std::atomic<PtrType> *get() { return loc; }
254  void *get_void_p() { return RCAST(void *, loc); }
256  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
258  PtrType load() { return loc->load(std::memory_order_acquire); }
260  void store(PtrType val) { loc->store(val, std::memory_order_release); }
262  bool done_check() {
263  if (Sleepable && !(this->sleepLoc))
264  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
265  else
266  return this->load() == checker;
267  }
270  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
276  bool notdone_check() { return this->load() != checker; }
279  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
283  PtrType set_sleeping() {
284  if (this->sleepLoc) {
285  this->sleepLoc->store(true);
286  return *(this->get());
287  }
288  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
289  }
293  void unset_sleeping() {
294  if (this->sleepLoc) {
295  this->sleepLoc->store(false);
296  return;
297  }
298  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
299  }
302  bool is_sleeping_val(PtrType old_loc) {
303  if (this->sleepLoc)
304  return this->sleepLoc->load();
305  return old_loc & KMP_BARRIER_SLEEP_STATE;
306  }
308  bool is_sleeping() {
309  if (this->sleepLoc)
310  return this->sleepLoc->load();
311  return is_sleeping_val(this->load());
312  }
313  bool is_any_sleeping() {
314  if (this->sleepLoc)
315  return this->sleepLoc->load();
316  return is_sleeping_val(this->load());
317  }
318  kmp_uint8 *get_stolen() { return NULL; }
319 };
320 
321 #if OMPT_SUPPORT
322 OMPT_NOINLINE
323 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
324  ompt_state_t ompt_state,
325  ompt_data_t *tId) {
326  int ds_tid = this_thr->th.th_info.ds.ds_tid;
327  if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
328  ompt_state == ompt_state_wait_barrier_teams) {
329  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
330 #if OMPT_OPTIONAL
331  void *codeptr = NULL;
332  ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
333  if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
334  sync_kind = ompt_sync_region_barrier_teams;
335  if (ompt_enabled.ompt_callback_sync_region_wait) {
336  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
337  sync_kind, ompt_scope_end, NULL, tId, codeptr);
338  }
339  if (ompt_enabled.ompt_callback_sync_region) {
340  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
341  sync_kind, ompt_scope_end, NULL, tId, codeptr);
342  }
343 #endif
344  if (!KMP_MASTER_TID(ds_tid)) {
345  if (ompt_enabled.ompt_callback_implicit_task) {
346  int flags = this_thr->th.ompt_thread_info.parallel_flags;
347  flags = (flags & ompt_parallel_league) ? ompt_task_initial
348  : ompt_task_implicit;
349  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
350  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
351  }
352  // return to idle state
353  this_thr->th.ompt_thread_info.state = ompt_state_idle;
354  } else {
355  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
356  }
357  }
358 }
359 #endif
360 
361 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
362  __kmp_wait_* must make certain that another thread calls __kmp_release
363  to wake it back up to prevent deadlocks!
364 
365  NOTE: We may not belong to a team at this point. */
366 template <class C, bool final_spin, bool Cancellable = false,
367  bool Sleepable = true>
368 static inline bool
369 __kmp_wait_template(kmp_info_t *this_thr,
370  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
371 #if USE_ITT_BUILD && USE_ITT_NOTIFY
372  volatile void *spin = flag->get();
373 #endif
374  kmp_uint32 spins;
375  int th_gtid;
376  int tasks_completed = FALSE;
377 #if !KMP_USE_MONITOR
378  kmp_uint64 poll_count;
379  kmp_uint64 hibernate_goal;
380 #else
381  kmp_uint32 hibernate;
382 #endif
383  kmp_uint64 time;
384 
385  KMP_FSYNC_SPIN_INIT(spin, NULL);
386  if (flag->done_check()) {
387  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
388  return false;
389  }
390  th_gtid = this_thr->th.th_info.ds.ds_gtid;
391  if (Cancellable) {
392  kmp_team_t *team = this_thr->th.th_team;
393  if (team && team->t.t_cancel_request == cancel_parallel)
394  return true;
395  }
396 #if KMP_OS_UNIX
397  if (final_spin)
398  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
399 #endif
400  KA_TRACE(20,
401  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
402 #if KMP_STATS_ENABLED
403  stats_state_e thread_state = KMP_GET_THREAD_STATE();
404 #endif
405 
406 /* OMPT Behavior:
407 THIS function is called from
408  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
409  these have join / fork behavior
410 
411  In these cases, we don't change the state or trigger events in THIS
412 function.
413  Events are triggered in the calling code (__kmp_barrier):
414 
415  state := ompt_state_overhead
416  barrier-begin
417  barrier-wait-begin
418  state := ompt_state_wait_barrier
419  call join-barrier-implementation (finally arrive here)
420  {}
421  call fork-barrier-implementation (finally arrive here)
422  {}
423  state := ompt_state_overhead
424  barrier-wait-end
425  barrier-end
426  state := ompt_state_work_parallel
427 
428 
429  __kmp_fork_barrier (after thread creation, before executing implicit task)
430  call fork-barrier-implementation (finally arrive here)
431  {} // worker arrive here with state = ompt_state_idle
432 
433 
434  __kmp_join_barrier (implicit barrier at end of parallel region)
435  state := ompt_state_barrier_implicit
436  barrier-begin
437  barrier-wait-begin
438  call join-barrier-implementation (finally arrive here
439 final_spin=FALSE)
440  {
441  }
442  __kmp_fork_barrier (implicit barrier at end of parallel region)
443  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
444 
445  Worker after task-team is finished:
446  barrier-wait-end
447  barrier-end
448  implicit-task-end
449  idle-begin
450  state := ompt_state_idle
451 
452  Before leaving, if state = ompt_state_idle
453  idle-end
454  state := ompt_state_overhead
455 */
456 #if OMPT_SUPPORT
457  ompt_state_t ompt_entry_state;
458  ompt_data_t *tId;
459  if (ompt_enabled.enabled) {
460  ompt_entry_state = this_thr->th.ompt_thread_info.state;
461  if (!final_spin ||
462  (ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
463  ompt_entry_state != ompt_state_wait_barrier_teams) ||
464  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
465  ompt_lw_taskteam_t *team = NULL;
466  if (this_thr->th.th_team)
467  team = this_thr->th.th_team->t.ompt_serialized_team_info;
468  if (team) {
469  tId = &(team->ompt_task_info.task_data);
470  } else {
471  tId = OMPT_CUR_TASK_DATA(this_thr);
472  }
473  } else {
474  tId = &(this_thr->th.ompt_thread_info.task_data);
475  }
476  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
477  this_thr->th.th_task_team == NULL)) {
478  // implicit task is done. Either no taskqueue, or task-team finished
479  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
480  }
481  }
482 #endif
483 
484  KMP_INIT_YIELD(spins); // Setup for waiting
485  KMP_INIT_BACKOFF(time);
486 
487  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
488  __kmp_pause_status == kmp_soft_paused) {
489 #if KMP_USE_MONITOR
490 // The worker threads cannot rely on the team struct existing at this point.
491 // Use the bt values cached in the thread struct instead.
492 #ifdef KMP_ADJUST_BLOCKTIME
493  if (__kmp_pause_status == kmp_soft_paused ||
494  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
495  // Force immediate suspend if not set by user and more threads than
496  // available procs
497  hibernate = 0;
498  else
499  hibernate = this_thr->th.th_team_bt_intervals;
500 #else
501  hibernate = this_thr->th.th_team_bt_intervals;
502 #endif /* KMP_ADJUST_BLOCKTIME */
503 
504  /* If the blocktime is nonzero, we want to make sure that we spin wait for
505  the entirety of the specified #intervals, plus up to one interval more.
506  This increment make certain that this thread doesn't go to sleep too
507  soon. */
508  if (hibernate != 0)
509  hibernate++;
510 
511  // Add in the current time value.
512  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
513  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
514  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
515  hibernate - __kmp_global.g.g_time.dt.t_value));
516 #else
517  if (__kmp_pause_status == kmp_soft_paused) {
518  // Force immediate suspend
519  hibernate_goal = KMP_NOW();
520  } else
521  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
522  poll_count = 0;
523  (void)poll_count;
524 #endif // KMP_USE_MONITOR
525  }
526 
527  KMP_MB();
528 
529  // Main wait spin loop
530  while (flag->notdone_check()) {
531  kmp_task_team_t *task_team = NULL;
532  if (__kmp_tasking_mode != tskm_immediate_exec) {
533  task_team = this_thr->th.th_task_team;
534  /* If the thread's task team pointer is NULL, it means one of 3 things:
535  1) A newly-created thread is first being released by
536  __kmp_fork_barrier(), and its task team has not been set up yet.
537  2) All tasks have been executed to completion.
538  3) Tasking is off for this region. This could be because we are in a
539  serialized region (perhaps the outer one), or else tasking was manually
540  disabled (KMP_TASKING=0). */
541  if (task_team != NULL) {
542  if (TCR_SYNC_4(task_team->tt.tt_active)) {
543  if (KMP_TASKING_ENABLED(task_team)) {
544  flag->execute_tasks(
545  this_thr, th_gtid, final_spin,
546  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
547  } else
548  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
549  } else {
550  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
551 #if OMPT_SUPPORT
552  // task-team is done now, other cases should be catched above
553  if (final_spin && ompt_enabled.enabled)
554  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
555 #endif
556  this_thr->th.th_task_team = NULL;
557  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
558  }
559  } else {
560  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
561  } // if
562  } // if
563 
564  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
565  if (TCR_4(__kmp_global.g.g_done)) {
566  if (__kmp_global.g.g_abort)
567  __kmp_abort_thread();
568  break;
569  }
570 
571  // If we are oversubscribed, or have waited a bit (and
572  // KMP_LIBRARY=throughput), then yield
573  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
574 
575 #if KMP_STATS_ENABLED
576  // Check if thread has been signalled to idle state
577  // This indicates that the logical "join-barrier" has finished
578  if (this_thr->th.th_stats->isIdle() &&
579  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
580  KMP_SET_THREAD_STATE(IDLE);
581  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
582  }
583 #endif
584  // Check if the barrier surrounding this wait loop has been cancelled
585  if (Cancellable) {
586  kmp_team_t *team = this_thr->th.th_team;
587  if (team && team->t.t_cancel_request == cancel_parallel)
588  break;
589  }
590 
591  // For hidden helper thread, if task_team is nullptr, it means the main
592  // thread has not released the barrier. We cannot wait here because once the
593  // main thread releases all children barriers, all hidden helper threads are
594  // still sleeping. This leads to a problem that following configuration,
595  // such as task team sync, will not be performed such that this thread does
596  // not have task team. Usually it is not bad. However, a corner case is,
597  // when the first task encountered is an untied task, the check in
598  // __kmp_task_alloc will crash because it uses the task team pointer without
599  // checking whether it is nullptr. It is probably under some kind of
600  // assumption.
601  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
602  !TCR_4(__kmp_hidden_helper_team_done)) {
603  // If there is still hidden helper tasks to be executed, the hidden helper
604  // thread will not enter a waiting status.
605  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
606  __kmp_hidden_helper_worker_thread_wait();
607  }
608  continue;
609  }
610 
611  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
612  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
613  __kmp_pause_status != kmp_soft_paused)
614  continue;
615 
616  // Don't suspend if there is a likelihood of new tasks being spawned.
617  if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) &&
618  !__kmp_wpolicy_passive)
619  continue;
620 
621 #if KMP_USE_MONITOR
622  // If we have waited a bit more, fall asleep
623  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
624  continue;
625 #else
626  if (KMP_BLOCKING(hibernate_goal, poll_count++))
627  continue;
628 #endif
629  // Don't suspend if wait loop designated non-sleepable
630  // in template parameters
631  if (!Sleepable)
632  continue;
633 
634 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
635  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
636  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
637  flag->mwait(th_gtid);
638  } else {
639 #endif
640  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
641 #if KMP_OS_UNIX
642  if (final_spin)
643  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
644 #endif
645  flag->suspend(th_gtid);
646 #if KMP_OS_UNIX
647  if (final_spin)
648  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
649 #endif
650 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
651  }
652 #endif
653 
654  if (TCR_4(__kmp_global.g.g_done)) {
655  if (__kmp_global.g.g_abort)
656  __kmp_abort_thread();
657  break;
658  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
659  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
660  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
661  }
662  // TODO: If thread is done with work and times out, disband/free
663  }
664 
665 #if OMPT_SUPPORT
666  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
667  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
668 #if OMPT_OPTIONAL
669  if (final_spin) {
670  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
671  ompt_exit_state = this_thr->th.ompt_thread_info.state;
672  }
673 #endif
674  if (ompt_exit_state == ompt_state_idle) {
675  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
676  }
677  }
678 #endif
679 #if KMP_STATS_ENABLED
680  // If we were put into idle state, pop that off the state stack
681  if (KMP_GET_THREAD_STATE() == IDLE) {
682  KMP_POP_PARTITIONED_TIMER();
683  KMP_SET_THREAD_STATE(thread_state);
684  this_thr->th.th_stats->resetIdleFlag();
685  }
686 #endif
687 
688 #if KMP_OS_UNIX
689  if (final_spin)
690  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
691 #endif
692  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
693  if (Cancellable) {
694  kmp_team_t *team = this_thr->th.th_team;
695  if (team && team->t.t_cancel_request == cancel_parallel) {
696  if (tasks_completed) {
697  // undo the previous decrement of unfinished_threads so that the
698  // thread can decrement at the join barrier with no problem
699  kmp_task_team_t *task_team = this_thr->th.th_task_team;
700  std::atomic<kmp_int32> *unfinished_threads =
701  &(task_team->tt.tt_unfinished_threads);
702  KMP_ATOMIC_INC(unfinished_threads);
703  }
704  return true;
705  }
706  }
707  return false;
708 }
709 
710 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
711 // Set up a monitor on the flag variable causing the calling thread to wait in
712 // a less active state until the flag variable is modified.
713 template <class C>
714 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
715  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
716  kmp_info_t *th = __kmp_threads[th_gtid];
717 
718  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
719  flag->get()));
720 
721  // User-level mwait is available
722  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
723 
724  __kmp_suspend_initialize_thread(th);
725  __kmp_lock_suspend_mx(th);
726 
727  volatile void *spin = flag->get();
728  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
729 
730  if (!flag->done_check()) {
731  // Mark thread as no longer active
732  th->th.th_active = FALSE;
733  if (th->th.th_active_in_pool) {
734  th->th.th_active_in_pool = FALSE;
735  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
736  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
737  }
738  flag->set_sleeping();
739  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
740 #if KMP_HAVE_UMWAIT
741  if (__kmp_umwait_enabled) {
742  __kmp_umonitor(cacheline);
743  }
744 #elif KMP_HAVE_MWAIT
745  if (__kmp_mwait_enabled) {
746  __kmp_mm_monitor(cacheline, 0, 0);
747  }
748 #endif
749  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
750  // the address could happen after the last time we checked and before
751  // monitoring started, in which case monitor can't detect the change.
752  if (flag->done_check())
753  flag->unset_sleeping();
754  else {
755  // if flag changes here, wake-up happens immediately
756  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
757  th->th.th_sleep_loc_type = flag->get_type();
758  __kmp_unlock_suspend_mx(th);
759  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
760 #if KMP_HAVE_UMWAIT
761  if (__kmp_umwait_enabled) {
762  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
763  }
764 #elif KMP_HAVE_MWAIT
765  if (__kmp_mwait_enabled) {
766  __kmp_mm_mwait(0, __kmp_mwait_hints);
767  }
768 #endif
769  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
770  __kmp_lock_suspend_mx(th);
771  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
772  if (flag->is_sleeping())
773  flag->unset_sleeping();
774  TCW_PTR(th->th.th_sleep_loc, NULL);
775  th->th.th_sleep_loc_type = flag_unset;
776  }
777  // Mark thread as active again
778  th->th.th_active = TRUE;
779  if (TCR_4(th->th.th_in_pool)) {
780  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
781  th->th.th_active_in_pool = TRUE;
782  }
783  } // Drop out to main wait loop to check flag, handle tasks, etc.
784  __kmp_unlock_suspend_mx(th);
785  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
786 }
787 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
788 
789 /* Release any threads specified as waiting on the flag by releasing the flag
790  and resume the waiting thread if indicated by the sleep bit(s). A thread that
791  calls __kmp_wait_template must call this function to wake up the potentially
792  sleeping thread and prevent deadlocks! */
793 template <class C> static inline void __kmp_release_template(C *flag) {
794 #ifdef KMP_DEBUG
795  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
796 #endif
797  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
798  KMP_DEBUG_ASSERT(flag->get());
799  KMP_FSYNC_RELEASING(flag->get_void_p());
800 
801  flag->internal_release();
802 
803  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
804  flag->load()));
805 
806  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
807  // Only need to check sleep stuff if infinite block time not set.
808  // Are *any* threads waiting on flag sleeping?
809  if (flag->is_any_sleeping()) {
810  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
811  // if sleeping waiter exists at i, sets current_waiter to i inside flag
812  kmp_info_t *waiter = flag->get_waiter(i);
813  if (waiter) {
814  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
815  // Wake up thread if needed
816  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
817  "flag(%p) set\n",
818  gtid, wait_gtid, flag->get()));
819  flag->resume(wait_gtid); // unsets flag's current_waiter when done
820  }
821  }
822  }
823  }
824 }
825 
826 template <bool Cancellable, bool Sleepable>
827 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
828 public:
829  kmp_flag_32(std::atomic<kmp_uint32> *p)
830  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
831  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
832  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
833  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
834  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
835  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
836 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
837  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
838 #endif
839  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
840  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
841  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
842  kmp_int32 is_constrained) {
843  return __kmp_execute_tasks_32(
844  this_thr, gtid, this, final_spin,
845  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
846  }
847  bool wait(kmp_info_t *this_thr,
848  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
849  if (final_spin)
850  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
851  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
852  else
853  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
854  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
855  }
856  void release() { __kmp_release_template(this); }
857  flag_type get_ptr_type() { return flag32; }
858 };
859 
860 template <bool Cancellable, bool Sleepable>
861 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
862 public:
863  kmp_flag_64(volatile kmp_uint64 *p)
864  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
865  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
866  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
867  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
868  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
869  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
870  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
871  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
872 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
873  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
874 #endif
875  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
876  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
877  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
878  kmp_int32 is_constrained) {
879  return __kmp_execute_tasks_64(
880  this_thr, gtid, this, final_spin,
881  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
882  }
883  bool wait(kmp_info_t *this_thr,
884  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
885  if (final_spin)
886  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
887  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
888  else
889  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
890  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
891  }
892  void release() { __kmp_release_template(this); }
893  flag_type get_ptr_type() { return flag64; }
894 };
895 
896 template <bool Cancellable, bool Sleepable>
897 class kmp_atomic_flag_64
898  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
899 public:
900  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
901  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
902  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
903  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
904  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
905  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
906  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
907  std::atomic<bool> *loc)
908  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
909  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
910  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
911  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
912  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
913  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
914  kmp_int32 is_constrained) {
915  return __kmp_atomic_execute_tasks_64(
916  this_thr, gtid, this, final_spin,
917  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
918  }
919  bool wait(kmp_info_t *this_thr,
920  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
921  if (final_spin)
922  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
923  Sleepable>(
924  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
925  else
926  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
927  Sleepable>(
928  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
929  }
930  void release() { __kmp_release_template(this); }
931  flag_type get_ptr_type() { return atomic_flag64; }
932 };
933 
934 // Hierarchical 64-bit on-core barrier instantiation
935 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
936  kmp_uint32 offset;
937  bool flag_switch;
938  enum barrier_type bt;
940  kmp_info_t *this_thr = nullptr;
941 #if USE_ITT_BUILD
942  void *itt_sync_obj;
943 #endif
944  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
945  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
946  }
947 
948 public:
949  kmp_flag_oncore(volatile kmp_uint64 *p)
950  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
951  }
952  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
953  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
954  flag_switch(false),
955  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
956  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
957  enum barrier_type bar_t,
958  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
959  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
960  flag_switch(false), bt(bar_t),
961  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
962  virtual ~kmp_flag_oncore() override {}
963  void *operator new(size_t size) { return __kmp_allocate(size); }
964  void operator delete(void *p) { __kmp_free(p); }
965  bool done_check_val(kmp_uint64 old_loc) override {
966  return byteref(&old_loc, offset) == checker;
967  }
968  bool done_check() override { return done_check_val(*get()); }
969  bool notdone_check() override {
970  // Calculate flag_switch
971  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
972  flag_switch = true;
973  if (byteref(get(), offset) != 1 && !flag_switch)
974  return true;
975  else if (flag_switch) {
976  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
977  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
978  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
979  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
980  }
981  return false;
982  }
983  void internal_release() {
984  // Other threads can write their own bytes simultaneously.
985  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
986  byteref(get(), offset) = 1;
987  } else {
988  kmp_uint64 mask = 0;
989  byteref(&mask, offset) = 1;
990  KMP_TEST_THEN_OR64(get(), mask);
991  }
992  }
993  void wait(kmp_info_t *this_thr, int final_spin) {
994  if (final_spin)
995  __kmp_wait_template<kmp_flag_oncore, TRUE>(
996  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
997  else
998  __kmp_wait_template<kmp_flag_oncore, FALSE>(
999  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1000  }
1001  void release() { __kmp_release_template(this); }
1002  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1003 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1004  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1005 #endif
1006  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1007  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1008  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1009  kmp_int32 is_constrained) {
1010 #if OMPD_SUPPORT
1011  int ret = __kmp_execute_tasks_oncore(
1012  this_thr, gtid, this, final_spin,
1013  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1014  if (ompd_state & OMPD_ENABLE_BP)
1015  ompd_bp_task_end();
1016  return ret;
1017 #else
1018  return __kmp_execute_tasks_oncore(
1019  this_thr, gtid, this, final_spin,
1020  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1021 #endif
1022  }
1023  enum barrier_type get_bt() { return bt; }
1024  flag_type get_ptr_type() { return flag_oncore; }
1025 };
1026 
1027 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1028  int gtid = __kmp_gtid_from_thread(thr);
1029  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1030  flag_type type = thr->th.th_sleep_loc_type;
1031  if (!flag)
1032  return;
1033  // Attempt to wake up a thread: examine its type and call appropriate template
1034  switch (type) {
1035  case flag32:
1036  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1037  break;
1038  case flag64:
1039  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1040  break;
1041  case atomic_flag64:
1042  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1043  break;
1044  case flag_oncore:
1045  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1046  break;
1047  case flag_unset:
1048  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1049  break;
1050  }
1051 }
1052 
1057 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63