LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
166  virtual bool done_check() {
167  if (Sleepable && !(this->sleepLoc))
168  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169  checker;
170  else
171  return traits_type::tcr(*(this->get())) == checker;
172  }
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
181  virtual bool notdone_check() {
182  return traits_type::tcr(*(this->get())) != checker;
183  }
187  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
192  PtrType set_sleeping() {
193  if (this->sleepLoc) {
194  this->sleepLoc->store(true);
195  return *(this->get());
196  }
197  return traits_type::test_then_or((volatile PtrType *)this->get(),
198  KMP_BARRIER_SLEEP_STATE);
199  }
203  void unset_sleeping() {
204  if (this->sleepLoc) {
205  this->sleepLoc->store(false);
206  return;
207  }
208  traits_type::test_then_and((volatile PtrType *)this->get(),
209  ~KMP_BARRIER_SLEEP_STATE);
210  }
213  bool is_sleeping_val(PtrType old_loc) {
214  if (this->sleepLoc)
215  return this->sleepLoc->load();
216  return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
219  bool is_sleeping() {
220  if (this->sleepLoc)
221  return this->sleepLoc->load();
222  return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225  if (this->sleepLoc)
226  return this->sleepLoc->load();
227  return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230 };
231 
233 template <typename PtrType, flag_type FlagType, bool Sleepable>
234 class kmp_flag_atomic : public kmp_flag<FlagType> {
235 protected:
236  std::atomic<PtrType> *loc;
237  PtrType checker;
238 public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243  : kmp_flag<FlagType>(1), loc(p) {
244  this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247  : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
251  std::atomic<PtrType> *get() { return loc; }
253  void *get_void_p() { return RCAST(void *, loc); }
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
257  PtrType load() { return loc->load(std::memory_order_acquire); }
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
261  bool done_check() {
262  if (Sleepable && !(this->sleepLoc))
263  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264  else
265  return this->load() == checker;
266  }
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
275  bool notdone_check() { return this->load() != checker; }
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
282  PtrType set_sleeping() {
283  if (this->sleepLoc) {
284  this->sleepLoc->store(true);
285  return *(this->get());
286  }
287  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
292  void unset_sleeping() {
293  if (this->sleepLoc) {
294  this->sleepLoc->store(false);
295  return;
296  }
297  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
301  bool is_sleeping_val(PtrType old_loc) {
302  if (this->sleepLoc)
303  return this->sleepLoc->load();
304  return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
307  bool is_sleeping() {
308  if (this->sleepLoc)
309  return this->sleepLoc->load();
310  return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313  if (this->sleepLoc)
314  return this->sleepLoc->load();
315  return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318 };
319 
320 #if OMPT_SUPPORT
321 OMPT_NOINLINE
322 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323  ompt_state_t ompt_state,
324  ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit_parallel ||
327  ompt_state == ompt_state_wait_barrier_teams) {
328  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
329 #if OMPT_OPTIONAL
330  void *codeptr = NULL;
331  ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;
332  if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)
333  sync_kind = ompt_sync_region_barrier_teams;
334  if (ompt_enabled.ompt_callback_sync_region_wait) {
335  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
336  sync_kind, ompt_scope_end, NULL, tId, codeptr);
337  }
338  if (ompt_enabled.ompt_callback_sync_region) {
339  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
340  sync_kind, ompt_scope_end, NULL, tId, codeptr);
341  }
342 #endif
343  if (!KMP_MASTER_TID(ds_tid)) {
344  if (ompt_enabled.ompt_callback_implicit_task) {
345  int flags = this_thr->th.ompt_thread_info.parallel_flags;
346  flags = (flags & ompt_parallel_league) ? ompt_task_initial
347  : ompt_task_implicit;
348  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
349  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
350  }
351  // return to idle state
352  this_thr->th.ompt_thread_info.state = ompt_state_idle;
353  } else {
354  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
355  }
356  }
357 }
358 #endif
359 
360 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
361  __kmp_wait_* must make certain that another thread calls __kmp_release
362  to wake it back up to prevent deadlocks!
363 
364  NOTE: We may not belong to a team at this point. */
365 template <class C, bool final_spin, bool Cancellable = false,
366  bool Sleepable = true>
367 static inline bool
368 __kmp_wait_template(kmp_info_t *this_thr,
369  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
370 #if USE_ITT_BUILD && USE_ITT_NOTIFY
371  volatile void *spin = flag->get();
372 #endif
373  kmp_uint32 spins;
374  int th_gtid;
375  int tasks_completed = FALSE;
376 #if !KMP_USE_MONITOR
377  kmp_uint64 poll_count;
378  kmp_uint64 hibernate_goal;
379 #else
380  kmp_uint32 hibernate;
381 #endif
382  kmp_uint64 time;
383 
384  KMP_FSYNC_SPIN_INIT(spin, NULL);
385  if (flag->done_check()) {
386  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
387  return false;
388  }
389  th_gtid = this_thr->th.th_info.ds.ds_gtid;
390  if (Cancellable) {
391  kmp_team_t *team = this_thr->th.th_team;
392  if (team && team->t.t_cancel_request == cancel_parallel)
393  return true;
394  }
395 #if KMP_OS_UNIX
396  if (final_spin)
397  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
398 #endif
399  KA_TRACE(20,
400  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
401 #if KMP_STATS_ENABLED
402  stats_state_e thread_state = KMP_GET_THREAD_STATE();
403 #endif
404 
405 /* OMPT Behavior:
406 THIS function is called from
407  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
408  these have join / fork behavior
409 
410  In these cases, we don't change the state or trigger events in THIS
411 function.
412  Events are triggered in the calling code (__kmp_barrier):
413 
414  state := ompt_state_overhead
415  barrier-begin
416  barrier-wait-begin
417  state := ompt_state_wait_barrier
418  call join-barrier-implementation (finally arrive here)
419  {}
420  call fork-barrier-implementation (finally arrive here)
421  {}
422  state := ompt_state_overhead
423  barrier-wait-end
424  barrier-end
425  state := ompt_state_work_parallel
426 
427 
428  __kmp_fork_barrier (after thread creation, before executing implicit task)
429  call fork-barrier-implementation (finally arrive here)
430  {} // worker arrive here with state = ompt_state_idle
431 
432 
433  __kmp_join_barrier (implicit barrier at end of parallel region)
434  state := ompt_state_barrier_implicit
435  barrier-begin
436  barrier-wait-begin
437  call join-barrier-implementation (finally arrive here
438 final_spin=FALSE)
439  {
440  }
441  __kmp_fork_barrier (implicit barrier at end of parallel region)
442  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
443 
444  Worker after task-team is finished:
445  barrier-wait-end
446  barrier-end
447  implicit-task-end
448  idle-begin
449  state := ompt_state_idle
450 
451  Before leaving, if state = ompt_state_idle
452  idle-end
453  state := ompt_state_overhead
454 */
455 #if OMPT_SUPPORT
456  ompt_state_t ompt_entry_state;
457  ompt_data_t *tId;
458  if (ompt_enabled.enabled) {
459  ompt_entry_state = this_thr->th.ompt_thread_info.state;
460  if (!final_spin ||
461  (ompt_entry_state != ompt_state_wait_barrier_implicit_parallel &&
462  ompt_entry_state != ompt_state_wait_barrier_teams) ||
463  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
464  ompt_lw_taskteam_t *team = NULL;
465  if (this_thr->th.th_team)
466  team = this_thr->th.th_team->t.ompt_serialized_team_info;
467  if (team) {
468  tId = &(team->ompt_task_info.task_data);
469  } else {
470  tId = OMPT_CUR_TASK_DATA(this_thr);
471  }
472  } else {
473  tId = &(this_thr->th.ompt_thread_info.task_data);
474  }
475  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
476  this_thr->th.th_task_team == NULL)) {
477  // implicit task is done. Either no taskqueue, or task-team finished
478  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
479  }
480  }
481 #endif
482 
483  KMP_INIT_YIELD(spins); // Setup for waiting
484  KMP_INIT_BACKOFF(time);
485 
486  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
487  __kmp_pause_status == kmp_soft_paused) {
488 #if KMP_USE_MONITOR
489 // The worker threads cannot rely on the team struct existing at this point.
490 // Use the bt values cached in the thread struct instead.
491 #ifdef KMP_ADJUST_BLOCKTIME
492  if (__kmp_pause_status == kmp_soft_paused ||
493  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
494  // Force immediate suspend if not set by user and more threads than
495  // available procs
496  hibernate = 0;
497  else
498  hibernate = this_thr->th.th_team_bt_intervals;
499 #else
500  hibernate = this_thr->th.th_team_bt_intervals;
501 #endif /* KMP_ADJUST_BLOCKTIME */
502 
503  /* If the blocktime is nonzero, we want to make sure that we spin wait for
504  the entirety of the specified #intervals, plus up to one interval more.
505  This increment make certain that this thread doesn't go to sleep too
506  soon. */
507  if (hibernate != 0)
508  hibernate++;
509 
510  // Add in the current time value.
511  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
512  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
513  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
514  hibernate - __kmp_global.g.g_time.dt.t_value));
515 #else
516  if (__kmp_pause_status == kmp_soft_paused) {
517  // Force immediate suspend
518  hibernate_goal = KMP_NOW();
519  } else
520  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
521  poll_count = 0;
522  (void)poll_count;
523 #endif // KMP_USE_MONITOR
524  }
525 
526  KMP_MB();
527 
528  // Main wait spin loop
529  while (flag->notdone_check()) {
530  kmp_task_team_t *task_team = NULL;
531  if (__kmp_tasking_mode != tskm_immediate_exec) {
532  task_team = this_thr->th.th_task_team;
533  /* If the thread's task team pointer is NULL, it means one of 3 things:
534  1) A newly-created thread is first being released by
535  __kmp_fork_barrier(), and its task team has not been set up yet.
536  2) All tasks have been executed to completion.
537  3) Tasking is off for this region. This could be because we are in a
538  serialized region (perhaps the outer one), or else tasking was manually
539  disabled (KMP_TASKING=0). */
540  if (task_team != NULL) {
541  if (TCR_SYNC_4(task_team->tt.tt_active)) {
542  if (KMP_TASKING_ENABLED(task_team)) {
543  flag->execute_tasks(
544  this_thr, th_gtid, final_spin,
545  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
546  } else
547  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
548  } else {
549  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
550 #if OMPT_SUPPORT
551  // task-team is done now, other cases should be catched above
552  if (final_spin && ompt_enabled.enabled)
553  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
554 #endif
555  this_thr->th.th_task_team = NULL;
556  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
557  }
558  } else {
559  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
560  } // if
561  } // if
562 
563  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
564  if (TCR_4(__kmp_global.g.g_done)) {
565  if (__kmp_global.g.g_abort)
566  __kmp_abort_thread();
567  break;
568  }
569 
570  // If we are oversubscribed, or have waited a bit (and
571  // KMP_LIBRARY=throughput), then yield
572  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
573 
574 #if KMP_STATS_ENABLED
575  // Check if thread has been signalled to idle state
576  // This indicates that the logical "join-barrier" has finished
577  if (this_thr->th.th_stats->isIdle() &&
578  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
579  KMP_SET_THREAD_STATE(IDLE);
580  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
581  }
582 #endif
583  // Check if the barrier surrounding this wait loop has been cancelled
584  if (Cancellable) {
585  kmp_team_t *team = this_thr->th.th_team;
586  if (team && team->t.t_cancel_request == cancel_parallel)
587  break;
588  }
589 
590  // For hidden helper thread, if task_team is nullptr, it means the main
591  // thread has not released the barrier. We cannot wait here because once the
592  // main thread releases all children barriers, all hidden helper threads are
593  // still sleeping. This leads to a problem that following configuration,
594  // such as task team sync, will not be performed such that this thread does
595  // not have task team. Usually it is not bad. However, a corner case is,
596  // when the first task encountered is an untied task, the check in
597  // __kmp_task_alloc will crash because it uses the task team pointer without
598  // checking whether it is nullptr. It is probably under some kind of
599  // assumption.
600  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
601  !TCR_4(__kmp_hidden_helper_team_done)) {
602  // If there is still hidden helper tasks to be executed, the hidden helper
603  // thread will not enter a waiting status.
604  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
605  __kmp_hidden_helper_worker_thread_wait();
606  }
607  continue;
608  }
609 
610  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
611  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
612  __kmp_pause_status != kmp_soft_paused)
613  continue;
614 
615  // Don't suspend if there is a likelihood of new tasks being spawned.
616  if (task_team != NULL && TCR_4(task_team->tt.tt_found_tasks) &&
617  !__kmp_wpolicy_passive)
618  continue;
619 
620 #if KMP_USE_MONITOR
621  // If we have waited a bit more, fall asleep
622  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
623  continue;
624 #else
625  if (KMP_BLOCKING(hibernate_goal, poll_count++))
626  continue;
627 #endif
628  // Don't suspend if wait loop designated non-sleepable
629  // in template parameters
630  if (!Sleepable)
631  continue;
632 
633 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
634  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
635  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
636  flag->mwait(th_gtid);
637  } else {
638 #endif
639  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
640 #if KMP_OS_UNIX
641  if (final_spin)
642  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
643 #endif
644  flag->suspend(th_gtid);
645 #if KMP_OS_UNIX
646  if (final_spin)
647  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
648 #endif
649 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
650  }
651 #endif
652 
653  if (TCR_4(__kmp_global.g.g_done)) {
654  if (__kmp_global.g.g_abort)
655  __kmp_abort_thread();
656  break;
657  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
658  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
659  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
660  }
661  // TODO: If thread is done with work and times out, disband/free
662  }
663 
664 #if OMPT_SUPPORT
665  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
666  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
667 #if OMPT_OPTIONAL
668  if (final_spin) {
669  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
670  ompt_exit_state = this_thr->th.ompt_thread_info.state;
671  }
672 #endif
673  if (ompt_exit_state == ompt_state_idle) {
674  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
675  }
676  }
677 #endif
678 #if KMP_STATS_ENABLED
679  // If we were put into idle state, pop that off the state stack
680  if (KMP_GET_THREAD_STATE() == IDLE) {
681  KMP_POP_PARTITIONED_TIMER();
682  KMP_SET_THREAD_STATE(thread_state);
683  this_thr->th.th_stats->resetIdleFlag();
684  }
685 #endif
686 
687 #if KMP_OS_UNIX
688  if (final_spin)
689  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
690 #endif
691  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
692  if (Cancellable) {
693  kmp_team_t *team = this_thr->th.th_team;
694  if (team && team->t.t_cancel_request == cancel_parallel) {
695  if (tasks_completed) {
696  // undo the previous decrement of unfinished_threads so that the
697  // thread can decrement at the join barrier with no problem
698  kmp_task_team_t *task_team = this_thr->th.th_task_team;
699  std::atomic<kmp_int32> *unfinished_threads =
700  &(task_team->tt.tt_unfinished_threads);
701  KMP_ATOMIC_INC(unfinished_threads);
702  }
703  return true;
704  }
705  }
706  return false;
707 }
708 
709 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
710 // Set up a monitor on the flag variable causing the calling thread to wait in
711 // a less active state until the flag variable is modified.
712 template <class C>
713 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
714  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
715  kmp_info_t *th = __kmp_threads[th_gtid];
716 
717  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
718  flag->get()));
719 
720  // User-level mwait is available
721  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
722 
723  __kmp_suspend_initialize_thread(th);
724  __kmp_lock_suspend_mx(th);
725 
726  volatile void *spin = flag->get();
727  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
728 
729  if (!flag->done_check()) {
730  // Mark thread as no longer active
731  th->th.th_active = FALSE;
732  if (th->th.th_active_in_pool) {
733  th->th.th_active_in_pool = FALSE;
734  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
735  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
736  }
737  flag->set_sleeping();
738  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
739 #if KMP_HAVE_UMWAIT
740  if (__kmp_umwait_enabled) {
741  __kmp_umonitor(cacheline);
742  }
743 #elif KMP_HAVE_MWAIT
744  if (__kmp_mwait_enabled) {
745  __kmp_mm_monitor(cacheline, 0, 0);
746  }
747 #endif
748  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
749  // the address could happen after the last time we checked and before
750  // monitoring started, in which case monitor can't detect the change.
751  if (flag->done_check())
752  flag->unset_sleeping();
753  else {
754  // if flag changes here, wake-up happens immediately
755  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
756  th->th.th_sleep_loc_type = flag->get_type();
757  __kmp_unlock_suspend_mx(th);
758  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
759 #if KMP_HAVE_UMWAIT
760  if (__kmp_umwait_enabled) {
761  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
762  }
763 #elif KMP_HAVE_MWAIT
764  if (__kmp_mwait_enabled) {
765  __kmp_mm_mwait(0, __kmp_mwait_hints);
766  }
767 #endif
768  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
769  __kmp_lock_suspend_mx(th);
770  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
771  if (flag->is_sleeping())
772  flag->unset_sleeping();
773  TCW_PTR(th->th.th_sleep_loc, NULL);
774  th->th.th_sleep_loc_type = flag_unset;
775  }
776  // Mark thread as active again
777  th->th.th_active = TRUE;
778  if (TCR_4(th->th.th_in_pool)) {
779  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
780  th->th.th_active_in_pool = TRUE;
781  }
782  } // Drop out to main wait loop to check flag, handle tasks, etc.
783  __kmp_unlock_suspend_mx(th);
784  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
785 }
786 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
787 
788 /* Release any threads specified as waiting on the flag by releasing the flag
789  and resume the waiting thread if indicated by the sleep bit(s). A thread that
790  calls __kmp_wait_template must call this function to wake up the potentially
791  sleeping thread and prevent deadlocks! */
792 template <class C> static inline void __kmp_release_template(C *flag) {
793 #ifdef KMP_DEBUG
794  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
795 #endif
796  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
797  KMP_DEBUG_ASSERT(flag->get());
798  KMP_FSYNC_RELEASING(flag->get_void_p());
799 
800  flag->internal_release();
801 
802  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
803  flag->load()));
804 
805  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
806  // Only need to check sleep stuff if infinite block time not set.
807  // Are *any* threads waiting on flag sleeping?
808  if (flag->is_any_sleeping()) {
809  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
810  // if sleeping waiter exists at i, sets current_waiter to i inside flag
811  kmp_info_t *waiter = flag->get_waiter(i);
812  if (waiter) {
813  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
814  // Wake up thread if needed
815  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
816  "flag(%p) set\n",
817  gtid, wait_gtid, flag->get()));
818  flag->resume(wait_gtid); // unsets flag's current_waiter when done
819  }
820  }
821  }
822  }
823 }
824 
825 template <bool Cancellable, bool Sleepable>
826 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
827 public:
828  kmp_flag_32(std::atomic<kmp_uint32> *p)
829  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
830  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
831  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
832  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
833  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
834  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
835 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
836  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
837 #endif
838  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
839  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
840  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
841  kmp_int32 is_constrained) {
842  return __kmp_execute_tasks_32(
843  this_thr, gtid, this, final_spin,
844  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
845  }
846  bool wait(kmp_info_t *this_thr,
847  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
848  if (final_spin)
849  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
850  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
851  else
852  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
853  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
854  }
855  void release() { __kmp_release_template(this); }
856  flag_type get_ptr_type() { return flag32; }
857 };
858 
859 template <bool Cancellable, bool Sleepable>
860 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
861 public:
862  kmp_flag_64(volatile kmp_uint64 *p)
863  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
864  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
865  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
866  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
867  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
868  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
869  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
870  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
871 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
872  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
873 #endif
874  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
875  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
876  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
877  kmp_int32 is_constrained) {
878  return __kmp_execute_tasks_64(
879  this_thr, gtid, this, final_spin,
880  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
881  }
882  bool wait(kmp_info_t *this_thr,
883  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
884  if (final_spin)
885  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
886  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
887  else
888  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
889  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
890  }
891  void release() { __kmp_release_template(this); }
892  flag_type get_ptr_type() { return flag64; }
893 };
894 
895 template <bool Cancellable, bool Sleepable>
896 class kmp_atomic_flag_64
897  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
898 public:
899  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
900  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
901  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
902  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
903  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
904  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
905  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
906  std::atomic<bool> *loc)
907  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
908  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
909  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
910  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
911  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
912  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
913  kmp_int32 is_constrained) {
914  return __kmp_atomic_execute_tasks_64(
915  this_thr, gtid, this, final_spin,
916  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
917  }
918  bool wait(kmp_info_t *this_thr,
919  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
920  if (final_spin)
921  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
922  Sleepable>(
923  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
924  else
925  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
926  Sleepable>(
927  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
928  }
929  void release() { __kmp_release_template(this); }
930  flag_type get_ptr_type() { return atomic_flag64; }
931 };
932 
933 // Hierarchical 64-bit on-core barrier instantiation
934 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
935  kmp_uint32 offset;
936  bool flag_switch;
937  enum barrier_type bt;
938  kmp_info_t *this_thr;
939 #if USE_ITT_BUILD
940  void *itt_sync_obj;
941 #endif
942  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
943  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
944  }
945 
946 public:
947  kmp_flag_oncore(volatile kmp_uint64 *p)
948  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
949  }
950  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
951  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
952  flag_switch(false),
953  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
954  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
955  enum barrier_type bar_t,
956  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
957  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
958  flag_switch(false), bt(bar_t),
959  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
960  virtual ~kmp_flag_oncore() override {}
961  void *operator new(size_t size) { return __kmp_allocate(size); }
962  void operator delete(void *p) { __kmp_free(p); }
963  bool done_check_val(kmp_uint64 old_loc) override {
964  return byteref(&old_loc, offset) == checker;
965  }
966  bool done_check() override { return done_check_val(*get()); }
967  bool notdone_check() override {
968  // Calculate flag_switch
969  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
970  flag_switch = true;
971  if (byteref(get(), offset) != 1 && !flag_switch)
972  return true;
973  else if (flag_switch) {
974  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
975  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
976  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
977  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
978  }
979  return false;
980  }
981  void internal_release() {
982  // Other threads can write their own bytes simultaneously.
983  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
984  byteref(get(), offset) = 1;
985  } else {
986  kmp_uint64 mask = 0;
987  byteref(&mask, offset) = 1;
988  KMP_TEST_THEN_OR64(get(), mask);
989  }
990  }
991  void wait(kmp_info_t *this_thr, int final_spin) {
992  if (final_spin)
993  __kmp_wait_template<kmp_flag_oncore, TRUE>(
994  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
995  else
996  __kmp_wait_template<kmp_flag_oncore, FALSE>(
997  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
998  }
999  void release() { __kmp_release_template(this); }
1000  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1001 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1002  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1003 #endif
1004  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1005  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1006  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1007  kmp_int32 is_constrained) {
1008 #if OMPD_SUPPORT
1009  int ret = __kmp_execute_tasks_oncore(
1010  this_thr, gtid, this, final_spin,
1011  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1012  if (ompd_state & OMPD_ENABLE_BP)
1013  ompd_bp_task_end();
1014  return ret;
1015 #else
1016  return __kmp_execute_tasks_oncore(
1017  this_thr, gtid, this, final_spin,
1018  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1019 #endif
1020  }
1021  enum barrier_type get_bt() { return bt; }
1022  flag_type get_ptr_type() { return flag_oncore; }
1023 };
1024 
1025 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1026  int gtid = __kmp_gtid_from_thread(thr);
1027  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1028  flag_type type = thr->th.th_sleep_loc_type;
1029  if (!flag)
1030  return;
1031  // Attempt to wake up a thread: examine its type and call appropriate template
1032  switch (type) {
1033  case flag32:
1034  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1035  break;
1036  case flag64:
1037  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1038  break;
1039  case atomic_flag64:
1040  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1041  break;
1042  case flag_oncore:
1043  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1044  break;
1045  case flag_unset:
1046  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1047  break;
1048  }
1049 }
1050 
1055 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
kmp_info_t * waiting_threads[1]
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63