LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44  int i;
45  kmp_uint32 x, y;
46 
47  /* Check to make sure unsigned arithmetic does wraps properly */
48  x = ~((kmp_uint32)0) - 2;
49  y = x - 2;
50 
51  for (i = 0; i < 8; ++i, ++x, ++y) {
52  kmp_uint32 z = (x - y);
53  KMP_ASSERT(z == 2);
54  }
55 
56  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75  return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80  KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84  if ((curr != 0) && (curr != gtid + 1))
85  __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94  KMP_FSYNC_ACQUIRED(lck);
95  return KMP_LOCK_ACQUIRED_FIRST;
96  }
97 
98  kmp_uint32 spins;
99  kmp_uint64 time;
100  KMP_FSYNC_PREPARE(lck);
101  KMP_INIT_YIELD(spins);
102  KMP_INIT_BACKOFF(time);
103  kmp_backoff_t backoff = __kmp_spin_backoff_params;
104  do {
105 #if !KMP_HAVE_UMWAIT
106  __kmp_spin_backoff(&backoff);
107 #else
108  if (!__kmp_tpause_enabled)
109  __kmp_spin_backoff(&backoff);
110 #endif
111  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113  !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
114  KMP_FSYNC_ACQUIRED(lck);
115  return KMP_LOCK_ACQUIRED_FIRST;
116 }
117 
118 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
119  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120  return retval;
121 }
122 
123 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
124  kmp_int32 gtid) {
125  char const *const func = "omp_set_lock";
126  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
127  __kmp_is_tas_lock_nestable(lck)) {
128  KMP_FATAL(LockNestableUsedAsSimple, func);
129  }
130  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131  KMP_FATAL(LockIsAlreadyOwned, func);
132  }
133  return __kmp_acquire_tas_lock(lck, gtid);
134 }
135 
136 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
137  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
141  KMP_FSYNC_ACQUIRED(lck);
142  return TRUE;
143  }
144  return FALSE;
145 }
146 
147 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
148  kmp_int32 gtid) {
149  char const *const func = "omp_test_lock";
150  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
151  __kmp_is_tas_lock_nestable(lck)) {
152  KMP_FATAL(LockNestableUsedAsSimple, func);
153  }
154  return __kmp_test_tas_lock(lck, gtid);
155 }
156 
157 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
158  KMP_MB(); /* Flush all pending memory write invalidates. */
159 
160  KMP_FSYNC_RELEASING(lck);
161  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162  KMP_MB(); /* Flush all pending memory write invalidates. */
163 
164  KMP_YIELD_OVERSUB();
165  return KMP_LOCK_RELEASED;
166 }
167 
168 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
169  kmp_int32 gtid) {
170  char const *const func = "omp_unset_lock";
171  KMP_MB(); /* in case another processor initialized lock */
172  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
173  __kmp_is_tas_lock_nestable(lck)) {
174  KMP_FATAL(LockNestableUsedAsSimple, func);
175  }
176  if (__kmp_get_tas_lock_owner(lck) == -1) {
177  KMP_FATAL(LockUnsettingFree, func);
178  }
179  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180  (__kmp_get_tas_lock_owner(lck) != gtid)) {
181  KMP_FATAL(LockUnsettingSetByAnother, func);
182  }
183  return __kmp_release_tas_lock(lck, gtid);
184 }
185 
186 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
187  lck->lk.poll = KMP_LOCK_FREE(tas);
188 }
189 
190 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
191 
192 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
193  char const *const func = "omp_destroy_lock";
194  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
195  __kmp_is_tas_lock_nestable(lck)) {
196  KMP_FATAL(LockNestableUsedAsSimple, func);
197  }
198  if (__kmp_get_tas_lock_owner(lck) != -1) {
199  KMP_FATAL(LockStillOwned, func);
200  }
201  __kmp_destroy_tas_lock(lck);
202 }
203 
204 // nested test and set locks
205 
206 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
207  KMP_DEBUG_ASSERT(gtid >= 0);
208 
209  if (__kmp_get_tas_lock_owner(lck) == gtid) {
210  lck->lk.depth_locked += 1;
211  return KMP_LOCK_ACQUIRED_NEXT;
212  } else {
213  __kmp_acquire_tas_lock_timed_template(lck, gtid);
214  lck->lk.depth_locked = 1;
215  return KMP_LOCK_ACQUIRED_FIRST;
216  }
217 }
218 
219 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
220  kmp_int32 gtid) {
221  char const *const func = "omp_set_nest_lock";
222  if (!__kmp_is_tas_lock_nestable(lck)) {
223  KMP_FATAL(LockSimpleUsedAsNestable, func);
224  }
225  return __kmp_acquire_nested_tas_lock(lck, gtid);
226 }
227 
228 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
229  int retval;
230 
231  KMP_DEBUG_ASSERT(gtid >= 0);
232 
233  if (__kmp_get_tas_lock_owner(lck) == gtid) {
234  retval = ++lck->lk.depth_locked;
235  } else if (!__kmp_test_tas_lock(lck, gtid)) {
236  retval = 0;
237  } else {
238  KMP_MB();
239  retval = lck->lk.depth_locked = 1;
240  }
241  return retval;
242 }
243 
244 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
245  kmp_int32 gtid) {
246  char const *const func = "omp_test_nest_lock";
247  if (!__kmp_is_tas_lock_nestable(lck)) {
248  KMP_FATAL(LockSimpleUsedAsNestable, func);
249  }
250  return __kmp_test_nested_tas_lock(lck, gtid);
251 }
252 
253 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
254  KMP_DEBUG_ASSERT(gtid >= 0);
255 
256  KMP_MB();
257  if (--(lck->lk.depth_locked) == 0) {
258  __kmp_release_tas_lock(lck, gtid);
259  return KMP_LOCK_RELEASED;
260  }
261  return KMP_LOCK_STILL_HELD;
262 }
263 
264 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
265  kmp_int32 gtid) {
266  char const *const func = "omp_unset_nest_lock";
267  KMP_MB(); /* in case another processor initialized lock */
268  if (!__kmp_is_tas_lock_nestable(lck)) {
269  KMP_FATAL(LockSimpleUsedAsNestable, func);
270  }
271  if (__kmp_get_tas_lock_owner(lck) == -1) {
272  KMP_FATAL(LockUnsettingFree, func);
273  }
274  if (__kmp_get_tas_lock_owner(lck) != gtid) {
275  KMP_FATAL(LockUnsettingSetByAnother, func);
276  }
277  return __kmp_release_nested_tas_lock(lck, gtid);
278 }
279 
280 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
281  __kmp_init_tas_lock(lck);
282  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283 }
284 
285 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
286  __kmp_destroy_tas_lock(lck);
287  lck->lk.depth_locked = 0;
288 }
289 
290 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
291  char const *const func = "omp_destroy_nest_lock";
292  if (!__kmp_is_tas_lock_nestable(lck)) {
293  KMP_FATAL(LockSimpleUsedAsNestable, func);
294  }
295  if (__kmp_get_tas_lock_owner(lck) != -1) {
296  KMP_FATAL(LockStillOwned, func);
297  }
298  __kmp_destroy_nested_tas_lock(lck);
299 }
300 
301 #if KMP_USE_FUTEX
302 
303 /* ------------------------------------------------------------------------ */
304 /* futex locks */
305 
306 // futex locks are really just test and set locks, with a different method
307 // of handling contention. They take the same amount of space as test and
308 // set locks, and are allocated the same way (i.e. use the area allocated by
309 // the compiler for non-nested locks / allocate nested locks on the heap).
310 
311 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313 }
314 
315 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316  return lck->lk.depth_locked != -1;
317 }
318 
319 __forceinline static int
320 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321  kmp_int32 gtid_code = (gtid + 1) << 1;
322 
323  KMP_MB();
324 
325 #ifdef USE_LOCK_PROFILE
326  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327  if ((curr != 0) && (curr != gtid_code))
328  __kmp_printf("LOCK CONTENTION: %p\n", lck);
329 /* else __kmp_printf( "." );*/
330 #endif /* USE_LOCK_PROFILE */
331 
332  KMP_FSYNC_PREPARE(lck);
333  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334  lck, lck->lk.poll, gtid));
335 
336  kmp_int32 poll_val;
337 
338  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339  &(lck->lk.poll), KMP_LOCK_FREE(futex),
340  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341 
342  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343  KA_TRACE(
344  1000,
345  ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346  lck, gtid, poll_val, cond));
347 
348  // NOTE: if you try to use the following condition for this branch
349  //
350  // if ( poll_val & 1 == 0 )
351  //
352  // Then the 12.0 compiler has a bug where the following block will
353  // always be skipped, regardless of the value of the LSB of poll_val.
354  if (!cond) {
355  // Try to set the lsb in the poll to indicate to the owner
356  // thread that they need to wake this thread up.
357  if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358  poll_val | KMP_LOCK_BUSY(1, futex))) {
359  KA_TRACE(
360  1000,
361  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362  lck, lck->lk.poll, gtid));
363  continue;
364  }
365  poll_val |= KMP_LOCK_BUSY(1, futex);
366 
367  KA_TRACE(1000,
368  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369  lck->lk.poll, gtid));
370  }
371 
372  KA_TRACE(
373  1000,
374  ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375  lck, gtid, poll_val));
376 
377  long rc;
378  if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379  NULL, 0)) != 0) {
380  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381  "failed (rc=%ld errno=%d)\n",
382  lck, gtid, poll_val, rc, errno));
383  continue;
384  }
385 
386  KA_TRACE(1000,
387  ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388  lck, gtid, poll_val));
389  // This thread has now done a successful futex wait call and was entered on
390  // the OS futex queue. We must now perform a futex wake call when releasing
391  // the lock, as we have no idea how many other threads are in the queue.
392  gtid_code |= 1;
393  }
394 
395  KMP_FSYNC_ACQUIRED(lck);
396  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397  lck->lk.poll, gtid));
398  return KMP_LOCK_ACQUIRED_FIRST;
399 }
400 
401 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403  return retval;
404 }
405 
406 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407  kmp_int32 gtid) {
408  char const *const func = "omp_set_lock";
409  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410  __kmp_is_futex_lock_nestable(lck)) {
411  KMP_FATAL(LockNestableUsedAsSimple, func);
412  }
413  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414  KMP_FATAL(LockIsAlreadyOwned, func);
415  }
416  return __kmp_acquire_futex_lock(lck, gtid);
417 }
418 
419 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
422  KMP_FSYNC_ACQUIRED(lck);
423  return TRUE;
424  }
425  return FALSE;
426 }
427 
428 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429  kmp_int32 gtid) {
430  char const *const func = "omp_test_lock";
431  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432  __kmp_is_futex_lock_nestable(lck)) {
433  KMP_FATAL(LockNestableUsedAsSimple, func);
434  }
435  return __kmp_test_futex_lock(lck, gtid);
436 }
437 
438 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439  KMP_MB(); /* Flush all pending memory write invalidates. */
440 
441  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442  lck, lck->lk.poll, gtid));
443 
444  KMP_FSYNC_RELEASING(lck);
445 
446  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447 
448  KA_TRACE(1000,
449  ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450  lck, gtid, poll_val));
451 
452  if (KMP_LOCK_STRIP(poll_val) & 1) {
453  KA_TRACE(1000,
454  ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455  lck, gtid));
456  syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457  NULL, NULL, 0);
458  }
459 
460  KMP_MB(); /* Flush all pending memory write invalidates. */
461 
462  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463  lck->lk.poll, gtid));
464 
465  KMP_YIELD_OVERSUB();
466  return KMP_LOCK_RELEASED;
467 }
468 
469 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470  kmp_int32 gtid) {
471  char const *const func = "omp_unset_lock";
472  KMP_MB(); /* in case another processor initialized lock */
473  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474  __kmp_is_futex_lock_nestable(lck)) {
475  KMP_FATAL(LockNestableUsedAsSimple, func);
476  }
477  if (__kmp_get_futex_lock_owner(lck) == -1) {
478  KMP_FATAL(LockUnsettingFree, func);
479  }
480  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481  (__kmp_get_futex_lock_owner(lck) != gtid)) {
482  KMP_FATAL(LockUnsettingSetByAnother, func);
483  }
484  return __kmp_release_futex_lock(lck, gtid);
485 }
486 
487 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489 }
490 
491 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492 
493 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494  char const *const func = "omp_destroy_lock";
495  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496  __kmp_is_futex_lock_nestable(lck)) {
497  KMP_FATAL(LockNestableUsedAsSimple, func);
498  }
499  if (__kmp_get_futex_lock_owner(lck) != -1) {
500  KMP_FATAL(LockStillOwned, func);
501  }
502  __kmp_destroy_futex_lock(lck);
503 }
504 
505 // nested futex locks
506 
507 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508  KMP_DEBUG_ASSERT(gtid >= 0);
509 
510  if (__kmp_get_futex_lock_owner(lck) == gtid) {
511  lck->lk.depth_locked += 1;
512  return KMP_LOCK_ACQUIRED_NEXT;
513  } else {
514  __kmp_acquire_futex_lock_timed_template(lck, gtid);
515  lck->lk.depth_locked = 1;
516  return KMP_LOCK_ACQUIRED_FIRST;
517  }
518 }
519 
520 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521  kmp_int32 gtid) {
522  char const *const func = "omp_set_nest_lock";
523  if (!__kmp_is_futex_lock_nestable(lck)) {
524  KMP_FATAL(LockSimpleUsedAsNestable, func);
525  }
526  return __kmp_acquire_nested_futex_lock(lck, gtid);
527 }
528 
529 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530  int retval;
531 
532  KMP_DEBUG_ASSERT(gtid >= 0);
533 
534  if (__kmp_get_futex_lock_owner(lck) == gtid) {
535  retval = ++lck->lk.depth_locked;
536  } else if (!__kmp_test_futex_lock(lck, gtid)) {
537  retval = 0;
538  } else {
539  KMP_MB();
540  retval = lck->lk.depth_locked = 1;
541  }
542  return retval;
543 }
544 
545 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546  kmp_int32 gtid) {
547  char const *const func = "omp_test_nest_lock";
548  if (!__kmp_is_futex_lock_nestable(lck)) {
549  KMP_FATAL(LockSimpleUsedAsNestable, func);
550  }
551  return __kmp_test_nested_futex_lock(lck, gtid);
552 }
553 
554 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555  KMP_DEBUG_ASSERT(gtid >= 0);
556 
557  KMP_MB();
558  if (--(lck->lk.depth_locked) == 0) {
559  __kmp_release_futex_lock(lck, gtid);
560  return KMP_LOCK_RELEASED;
561  }
562  return KMP_LOCK_STILL_HELD;
563 }
564 
565 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566  kmp_int32 gtid) {
567  char const *const func = "omp_unset_nest_lock";
568  KMP_MB(); /* in case another processor initialized lock */
569  if (!__kmp_is_futex_lock_nestable(lck)) {
570  KMP_FATAL(LockSimpleUsedAsNestable, func);
571  }
572  if (__kmp_get_futex_lock_owner(lck) == -1) {
573  KMP_FATAL(LockUnsettingFree, func);
574  }
575  if (__kmp_get_futex_lock_owner(lck) != gtid) {
576  KMP_FATAL(LockUnsettingSetByAnother, func);
577  }
578  return __kmp_release_nested_futex_lock(lck, gtid);
579 }
580 
581 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582  __kmp_init_futex_lock(lck);
583  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584 }
585 
586 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587  __kmp_destroy_futex_lock(lck);
588  lck->lk.depth_locked = 0;
589 }
590 
591 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592  char const *const func = "omp_destroy_nest_lock";
593  if (!__kmp_is_futex_lock_nestable(lck)) {
594  KMP_FATAL(LockSimpleUsedAsNestable, func);
595  }
596  if (__kmp_get_futex_lock_owner(lck) != -1) {
597  KMP_FATAL(LockStillOwned, func);
598  }
599  __kmp_destroy_nested_futex_lock(lck);
600 }
601 
602 #endif // KMP_USE_FUTEX
603 
604 /* ------------------------------------------------------------------------ */
605 /* ticket (bakery) locks */
606 
607 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
608  return std::atomic_load_explicit(&lck->lk.owner_id,
609  std::memory_order_relaxed) -
610  1;
611 }
612 
613 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
614  return std::atomic_load_explicit(&lck->lk.depth_locked,
615  std::memory_order_relaxed) != -1;
616 }
617 
618 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620  std::memory_order_acquire) == my_ticket;
621 }
622 
623 __forceinline static int
624 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
625  kmp_int32 gtid) {
626  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627  &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628 
629 #ifdef USE_LOCK_PROFILE
630  if (std::atomic_load_explicit(&lck->lk.now_serving,
631  std::memory_order_relaxed) != my_ticket)
632  __kmp_printf("LOCK CONTENTION: %p\n", lck);
633 /* else __kmp_printf( "." );*/
634 #endif /* USE_LOCK_PROFILE */
635 
636  if (std::atomic_load_explicit(&lck->lk.now_serving,
637  std::memory_order_acquire) == my_ticket) {
638  return KMP_LOCK_ACQUIRED_FIRST;
639  }
640  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
641  return KMP_LOCK_ACQUIRED_FIRST;
642 }
643 
644 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
645  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
646  return retval;
647 }
648 
649 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
650  kmp_int32 gtid) {
651  char const *const func = "omp_set_lock";
652 
653  if (!std::atomic_load_explicit(&lck->lk.initialized,
654  std::memory_order_relaxed)) {
655  KMP_FATAL(LockIsUninitialized, func);
656  }
657  if (lck->lk.self != lck) {
658  KMP_FATAL(LockIsUninitialized, func);
659  }
660  if (__kmp_is_ticket_lock_nestable(lck)) {
661  KMP_FATAL(LockNestableUsedAsSimple, func);
662  }
663  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664  KMP_FATAL(LockIsAlreadyOwned, func);
665  }
666 
667  __kmp_acquire_ticket_lock(lck, gtid);
668 
669  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670  std::memory_order_relaxed);
671  return KMP_LOCK_ACQUIRED_FIRST;
672 }
673 
674 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
675  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676  std::memory_order_relaxed);
677 
678  if (std::atomic_load_explicit(&lck->lk.now_serving,
679  std::memory_order_relaxed) == my_ticket) {
680  kmp_uint32 next_ticket = my_ticket + 1;
681  if (std::atomic_compare_exchange_strong_explicit(
682  &lck->lk.next_ticket, &my_ticket, next_ticket,
683  std::memory_order_acquire, std::memory_order_acquire)) {
684  return TRUE;
685  }
686  }
687  return FALSE;
688 }
689 
690 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
691  kmp_int32 gtid) {
692  char const *const func = "omp_test_lock";
693 
694  if (!std::atomic_load_explicit(&lck->lk.initialized,
695  std::memory_order_relaxed)) {
696  KMP_FATAL(LockIsUninitialized, func);
697  }
698  if (lck->lk.self != lck) {
699  KMP_FATAL(LockIsUninitialized, func);
700  }
701  if (__kmp_is_ticket_lock_nestable(lck)) {
702  KMP_FATAL(LockNestableUsedAsSimple, func);
703  }
704 
705  int retval = __kmp_test_ticket_lock(lck, gtid);
706 
707  if (retval) {
708  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709  std::memory_order_relaxed);
710  }
711  return retval;
712 }
713 
714 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
715  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
716  std::memory_order_release);
717 
718  return KMP_LOCK_RELEASED;
719 }
720 
721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722  kmp_int32 gtid) {
723  char const *const func = "omp_unset_lock";
724 
725  if (!std::atomic_load_explicit(&lck->lk.initialized,
726  std::memory_order_relaxed)) {
727  KMP_FATAL(LockIsUninitialized, func);
728  }
729  if (lck->lk.self != lck) {
730  KMP_FATAL(LockIsUninitialized, func);
731  }
732  if (__kmp_is_ticket_lock_nestable(lck)) {
733  KMP_FATAL(LockNestableUsedAsSimple, func);
734  }
735  if (__kmp_get_ticket_lock_owner(lck) == -1) {
736  KMP_FATAL(LockUnsettingFree, func);
737  }
738  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739  (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740  KMP_FATAL(LockUnsettingSetByAnother, func);
741  }
742  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743  return __kmp_release_ticket_lock(lck, gtid);
744 }
745 
746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747  lck->lk.location = NULL;
748  lck->lk.self = lck;
749  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750  std::memory_order_relaxed);
751  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752  std::memory_order_relaxed);
753  std::atomic_store_explicit(
754  &lck->lk.owner_id, 0,
755  std::memory_order_relaxed); // no thread owns the lock.
756  std::atomic_store_explicit(
757  &lck->lk.depth_locked, -1,
758  std::memory_order_relaxed); // -1 => not a nested lock.
759  std::atomic_store_explicit(&lck->lk.initialized, true,
760  std::memory_order_release);
761 }
762 
763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764  std::atomic_store_explicit(&lck->lk.initialized, false,
765  std::memory_order_release);
766  lck->lk.self = NULL;
767  lck->lk.location = NULL;
768  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769  std::memory_order_relaxed);
770  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771  std::memory_order_relaxed);
772  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774  std::memory_order_relaxed);
775 }
776 
777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778  char const *const func = "omp_destroy_lock";
779 
780  if (!std::atomic_load_explicit(&lck->lk.initialized,
781  std::memory_order_relaxed)) {
782  KMP_FATAL(LockIsUninitialized, func);
783  }
784  if (lck->lk.self != lck) {
785  KMP_FATAL(LockIsUninitialized, func);
786  }
787  if (__kmp_is_ticket_lock_nestable(lck)) {
788  KMP_FATAL(LockNestableUsedAsSimple, func);
789  }
790  if (__kmp_get_ticket_lock_owner(lck) != -1) {
791  KMP_FATAL(LockStillOwned, func);
792  }
793  __kmp_destroy_ticket_lock(lck);
794 }
795 
796 // nested ticket locks
797 
798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799  KMP_DEBUG_ASSERT(gtid >= 0);
800 
801  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802  std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803  std::memory_order_relaxed);
804  return KMP_LOCK_ACQUIRED_NEXT;
805  } else {
806  __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808  std::memory_order_relaxed);
809  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810  std::memory_order_relaxed);
811  return KMP_LOCK_ACQUIRED_FIRST;
812  }
813 }
814 
815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816  kmp_int32 gtid) {
817  char const *const func = "omp_set_nest_lock";
818 
819  if (!std::atomic_load_explicit(&lck->lk.initialized,
820  std::memory_order_relaxed)) {
821  KMP_FATAL(LockIsUninitialized, func);
822  }
823  if (lck->lk.self != lck) {
824  KMP_FATAL(LockIsUninitialized, func);
825  }
826  if (!__kmp_is_ticket_lock_nestable(lck)) {
827  KMP_FATAL(LockSimpleUsedAsNestable, func);
828  }
829  return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831 
832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833  int retval;
834 
835  KMP_DEBUG_ASSERT(gtid >= 0);
836 
837  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838  retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839  std::memory_order_relaxed) +
840  1;
841  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842  retval = 0;
843  } else {
844  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845  std::memory_order_relaxed);
846  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847  std::memory_order_relaxed);
848  retval = 1;
849  }
850  return retval;
851 }
852 
853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854  kmp_int32 gtid) {
855  char const *const func = "omp_test_nest_lock";
856 
857  if (!std::atomic_load_explicit(&lck->lk.initialized,
858  std::memory_order_relaxed)) {
859  KMP_FATAL(LockIsUninitialized, func);
860  }
861  if (lck->lk.self != lck) {
862  KMP_FATAL(LockIsUninitialized, func);
863  }
864  if (!__kmp_is_ticket_lock_nestable(lck)) {
865  KMP_FATAL(LockSimpleUsedAsNestable, func);
866  }
867  return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869 
870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871  KMP_DEBUG_ASSERT(gtid >= 0);
872 
873  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874  std::memory_order_relaxed) -
875  1) == 0) {
876  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877  __kmp_release_ticket_lock(lck, gtid);
878  return KMP_LOCK_RELEASED;
879  }
880  return KMP_LOCK_STILL_HELD;
881 }
882 
883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884  kmp_int32 gtid) {
885  char const *const func = "omp_unset_nest_lock";
886 
887  if (!std::atomic_load_explicit(&lck->lk.initialized,
888  std::memory_order_relaxed)) {
889  KMP_FATAL(LockIsUninitialized, func);
890  }
891  if (lck->lk.self != lck) {
892  KMP_FATAL(LockIsUninitialized, func);
893  }
894  if (!__kmp_is_ticket_lock_nestable(lck)) {
895  KMP_FATAL(LockSimpleUsedAsNestable, func);
896  }
897  if (__kmp_get_ticket_lock_owner(lck) == -1) {
898  KMP_FATAL(LockUnsettingFree, func);
899  }
900  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901  KMP_FATAL(LockUnsettingSetByAnother, func);
902  }
903  return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905 
906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907  __kmp_init_ticket_lock(lck);
908  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909  std::memory_order_relaxed);
910  // >= 0 for nestable locks, -1 for simple locks
911 }
912 
913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914  __kmp_destroy_ticket_lock(lck);
915  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916  std::memory_order_relaxed);
917 }
918 
919 static void
920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921  char const *const func = "omp_destroy_nest_lock";
922 
923  if (!std::atomic_load_explicit(&lck->lk.initialized,
924  std::memory_order_relaxed)) {
925  KMP_FATAL(LockIsUninitialized, func);
926  }
927  if (lck->lk.self != lck) {
928  KMP_FATAL(LockIsUninitialized, func);
929  }
930  if (!__kmp_is_ticket_lock_nestable(lck)) {
931  KMP_FATAL(LockSimpleUsedAsNestable, func);
932  }
933  if (__kmp_get_ticket_lock_owner(lck) != -1) {
934  KMP_FATAL(LockStillOwned, func);
935  }
936  __kmp_destroy_nested_ticket_lock(lck);
937 }
938 
939 // access functions to fields which don't exist for all lock kinds.
940 
941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942  return lck->lk.location;
943 }
944 
945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946  const ident_t *loc) {
947  lck->lk.location = loc;
948 }
949 
950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951  return lck->lk.flags;
952 }
953 
954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955  kmp_lock_flags_t flags) {
956  lck->lk.flags = flags;
957 }
958 
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961 
962 /* First the states
963  (head,tail) = 0, 0 means lock is unheld, nobody on queue
964  UINT_MAX or -1, 0 means lock is held, nobody on queue
965  h, h means lock held or about to transition,
966  1 element on queue
967  h, t h <> t, means lock is held or about to
968  transition, >1 elements on queue
969 
970  Now the transitions
971  Acquire(0,0) = -1 ,0
972  Release(0,0) = Error
973  Acquire(-1,0) = h ,h h > 0
974  Release(-1,0) = 0 ,0
975  Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976  Release(h,h) = -1 ,0 h > 0
977  Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978  Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979 
980  And pictorially
981 
982  +-----+
983  | 0, 0|------- release -------> Error
984  +-----+
985  | ^
986  acquire| |release
987  | |
988  | |
989  v |
990  +-----+
991  |-1, 0|
992  +-----+
993  | ^
994  acquire| |release
995  | |
996  | |
997  v |
998  +-----+
999  | h, h|
1000  +-----+
1001  | ^
1002  acquire| |release
1003  | |
1004  | |
1005  v |
1006  +-----+
1007  | h, t|----- acquire, release loopback ---+
1008  +-----+ |
1009  ^ |
1010  | |
1011  +------------------------------------+
1012  */
1013 
1014 #ifdef DEBUG_QUEUING_LOCKS
1015 
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y) \
1021  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z) \
1023  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1025  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026  Z, Q);
1027 
1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029  kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030  kmp_int32 tail_id) {
1031  kmp_int32 t, i;
1032 
1033  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034 
1035  i = tc % TRACE_BUF_ELE;
1036  __kmp_printf_no_lock("%s\n", traces[i]);
1037  i = (i + 1) % TRACE_BUF_ELE;
1038  while (i != (tc % TRACE_BUF_ELE)) {
1039  __kmp_printf_no_lock("%s", traces[i]);
1040  i = (i + 1) % TRACE_BUF_ELE;
1041  }
1042  __kmp_printf_no_lock("\n");
1043 
1044  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045  "next_wait:%d, head_id:%d, tail_id:%d\n",
1046  gtid + 1, this_thr->th.th_spin_here,
1047  this_thr->th.th_next_waiting, head_id, tail_id);
1048 
1049  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050 
1051  if (lck->lk.head_id >= 1) {
1052  t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053  while (t > 0) {
1054  __kmp_printf_no_lock("-> %d ", t);
1055  t = __kmp_threads[t - 1]->th.th_next_waiting;
1056  }
1057  }
1058  __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059  __kmp_printf_no_lock("\n\n");
1060 }
1061 
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063 
1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065  return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067 
1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069  return lck->lk.depth_locked != -1;
1070 }
1071 
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075  is a potential compiler problem with __forceinline. */
1076 __forceinline static int
1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078  kmp_int32 gtid) {
1079  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082  volatile kmp_uint32 *spin_here_p;
1083 
1084 #if OMPT_SUPPORT
1085  ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087 
1088  KA_TRACE(1000,
1089  ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090 
1091  KMP_FSYNC_PREPARE(lck);
1092  KMP_DEBUG_ASSERT(this_thr != NULL);
1093  spin_here_p = &this_thr->th.th_spin_here;
1094 
1095 #ifdef DEBUG_QUEUING_LOCKS
1096  TRACE_LOCK(gtid + 1, "acq ent");
1097  if (*spin_here_p)
1098  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099  if (this_thr->th.th_next_waiting != 0)
1100  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102  KMP_DEBUG_ASSERT(!*spin_here_p);
1103  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104 
1105  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106  head_id_p that may follow, not just in execution order, but also in
1107  visibility order. This way, when a releasing thread observes the changes to
1108  the queue by this thread, it can rightly assume that spin_here_p has
1109  already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110  not premature. If the releasing thread sets spin_here_p to FALSE before
1111  this thread sets it to TRUE, this thread will hang. */
1112  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113 
1114  while (1) {
1115  kmp_int32 enqueued;
1116  kmp_int32 head;
1117  kmp_int32 tail;
1118 
1119  head = *head_id_p;
1120 
1121  switch (head) {
1122 
1123  case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125  tail = *tail_id_p;
1126  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128  tail = 0; /* to make sure next link asynchronously read is not set
1129  accidentally; this assignment prevents us from entering the
1130  if ( t > 0 ) condition in the enqueued case below, which is not
1131  necessary for this state transition */
1132 
1133  /* try (-1,0)->(tid,tid) */
1134  enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135  KMP_PACK_64(-1, 0),
1136  KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138  if (enqueued)
1139  TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141  } break;
1142 
1143  default: {
1144  tail = *tail_id_p;
1145  KMP_DEBUG_ASSERT(tail != gtid + 1);
1146 
1147 #ifdef DEBUG_QUEUING_LOCKS
1148  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150 
1151  if (tail == 0) {
1152  enqueued = FALSE;
1153  } else {
1154  /* try (h,t) or (h,h)->(h,tid) */
1155  enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156 
1157 #ifdef DEBUG_QUEUING_LOCKS
1158  if (enqueued)
1159  TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161  }
1162  } break;
1163 
1164  case 0: /* empty queue */
1165  {
1166  kmp_int32 grabbed_lock;
1167 
1168 #ifdef DEBUG_QUEUING_LOCKS
1169  tail = *tail_id_p;
1170  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172  /* try (0,0)->(-1,0) */
1173 
1174  /* only legal transition out of head = 0 is head = -1 with no change to
1175  * tail */
1176  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177 
1178  if (grabbed_lock) {
1179 
1180  *spin_here_p = FALSE;
1181 
1182  KA_TRACE(
1183  1000,
1184  ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185  lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187  TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189 
1190 #if OMPT_SUPPORT
1191  if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192  /* change the state before clearing wait_id */
1193  this_thr->th.ompt_thread_info.state = prev_state;
1194  this_thr->th.ompt_thread_info.wait_id = 0;
1195  }
1196 #endif
1197 
1198  KMP_FSYNC_ACQUIRED(lck);
1199  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200  }
1201  enqueued = FALSE;
1202  } break;
1203  }
1204 
1205 #if OMPT_SUPPORT
1206  if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207  /* this thread will spin; set wait_id before entering wait state */
1208  prev_state = this_thr->th.ompt_thread_info.state;
1209  this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211  }
1212 #endif
1213 
1214  if (enqueued) {
1215  if (tail > 0) {
1216  kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217  KMP_ASSERT(tail_thr != NULL);
1218  tail_thr->th.th_next_waiting = gtid + 1;
1219  /* corresponding wait for this write in release code */
1220  }
1221  KA_TRACE(1000,
1222  ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223  lck, gtid));
1224 
1225  KMP_MB();
1226  // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227  KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228  // Synchronize writes to both runtime thread structures
1229  // and writes in user code.
1230  KMP_MB();
1231 
1232 #ifdef DEBUG_QUEUING_LOCKS
1233  TRACE_LOCK(gtid + 1, "acq spin");
1234 
1235  if (this_thr->th.th_next_waiting != 0)
1236  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239  KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240  "waiting on queue\n",
1241  lck, gtid));
1242 
1243 #ifdef DEBUG_QUEUING_LOCKS
1244  TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246 
1247 #if OMPT_SUPPORT
1248  /* change the state before clearing wait_id */
1249  this_thr->th.ompt_thread_info.state = prev_state;
1250  this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252 
1253  /* got lock, we were dequeued by the thread that released lock */
1254  return KMP_LOCK_ACQUIRED_FIRST;
1255  }
1256 
1257  /* Yield if number of threads > number of logical processors */
1258  /* ToDo: Not sure why this should only be in oversubscription case,
1259  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260  KMP_YIELD_OVERSUB();
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263  TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265  }
1266  KMP_ASSERT2(0, "should not get here");
1267  return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269 
1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271  KMP_DEBUG_ASSERT(gtid >= 0);
1272 
1273  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274  return retval;
1275 }
1276 
1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278  kmp_int32 gtid) {
1279  char const *const func = "omp_set_lock";
1280  if (lck->lk.initialized != lck) {
1281  KMP_FATAL(LockIsUninitialized, func);
1282  }
1283  if (__kmp_is_queuing_lock_nestable(lck)) {
1284  KMP_FATAL(LockNestableUsedAsSimple, func);
1285  }
1286  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287  KMP_FATAL(LockIsAlreadyOwned, func);
1288  }
1289 
1290  __kmp_acquire_queuing_lock(lck, gtid);
1291 
1292  lck->lk.owner_id = gtid + 1;
1293  return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295 
1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298  kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300  kmp_info_t *this_thr;
1301 #endif
1302 
1303  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304  KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306  this_thr = __kmp_thread_from_gtid(gtid);
1307  KMP_DEBUG_ASSERT(this_thr != NULL);
1308  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310 
1311  head = *head_id_p;
1312 
1313  if (head == 0) { /* nobody on queue, nobody holding */
1314  /* try (0,0)->(-1,0) */
1315  if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316  KA_TRACE(1000,
1317  ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318  KMP_FSYNC_ACQUIRED(lck);
1319  return TRUE;
1320  }
1321  }
1322 
1323  KA_TRACE(1000,
1324  ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325  return FALSE;
1326 }
1327 
1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329  kmp_int32 gtid) {
1330  char const *const func = "omp_test_lock";
1331  if (lck->lk.initialized != lck) {
1332  KMP_FATAL(LockIsUninitialized, func);
1333  }
1334  if (__kmp_is_queuing_lock_nestable(lck)) {
1335  KMP_FATAL(LockNestableUsedAsSimple, func);
1336  }
1337 
1338  int retval = __kmp_test_queuing_lock(lck, gtid);
1339 
1340  if (retval) {
1341  lck->lk.owner_id = gtid + 1;
1342  }
1343  return retval;
1344 }
1345 
1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1348  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1349 
1350  KA_TRACE(1000,
1351  ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1352  KMP_DEBUG_ASSERT(gtid >= 0);
1353 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1354  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1355 #endif
1356  KMP_DEBUG_ASSERT(this_thr != NULL);
1357 #ifdef DEBUG_QUEUING_LOCKS
1358  TRACE_LOCK(gtid + 1, "rel ent");
1359 
1360  if (this_thr->th.th_spin_here)
1361  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1362  if (this_thr->th.th_next_waiting != 0)
1363  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1364 #endif
1365  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1366  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1367 
1368  KMP_FSYNC_RELEASING(lck);
1369 
1370  while (1) {
1371  kmp_int32 dequeued;
1372  kmp_int32 head;
1373  kmp_int32 tail;
1374 
1375  head = *head_id_p;
1376 
1377 #ifdef DEBUG_QUEUING_LOCKS
1378  tail = *tail_id_p;
1379  TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1380  if (head == 0)
1381  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1382 #endif
1383  KMP_DEBUG_ASSERT(head !=
1384  0); /* holding the lock, head must be -1 or queue head */
1385 
1386  if (head == -1) { /* nobody on queue */
1387  /* try (-1,0)->(0,0) */
1388  if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1389  KA_TRACE(
1390  1000,
1391  ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1392  lck, gtid));
1393 #ifdef DEBUG_QUEUING_LOCKS
1394  TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1395 #endif
1396 
1397 #if OMPT_SUPPORT
1398 /* nothing to do - no other thread is trying to shift blame */
1399 #endif
1400  return KMP_LOCK_RELEASED;
1401  }
1402  dequeued = FALSE;
1403  } else {
1404  KMP_MB();
1405  tail = *tail_id_p;
1406  if (head == tail) { /* only one thread on the queue */
1407 #ifdef DEBUG_QUEUING_LOCKS
1408  if (head <= 0)
1409  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1410 #endif
1411  KMP_DEBUG_ASSERT(head > 0);
1412 
1413  /* try (h,h)->(-1,0) */
1414  dequeued = KMP_COMPARE_AND_STORE_REL64(
1415  RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1416  KMP_PACK_64(-1, 0));
1417 #ifdef DEBUG_QUEUING_LOCKS
1418  TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1419 #endif
1420 
1421  } else {
1422  volatile kmp_int32 *waiting_id_p;
1423  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1424  KMP_DEBUG_ASSERT(head_thr != NULL);
1425  waiting_id_p = &head_thr->th.th_next_waiting;
1426 
1427 /* Does this require synchronous reads? */
1428 #ifdef DEBUG_QUEUING_LOCKS
1429  if (head <= 0 || tail <= 0)
1430  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1431 #endif
1432  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1433 
1434  /* try (h,t)->(h',t) or (t,t) */
1435  KMP_MB();
1436  /* make sure enqueuing thread has time to update next waiting thread
1437  * field */
1438  *head_id_p =
1439  KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1440 #ifdef DEBUG_QUEUING_LOCKS
1441  TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1442 #endif
1443  dequeued = TRUE;
1444  }
1445  }
1446 
1447  if (dequeued) {
1448  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1449  KMP_DEBUG_ASSERT(head_thr != NULL);
1450 
1451 /* Does this require synchronous reads? */
1452 #ifdef DEBUG_QUEUING_LOCKS
1453  if (head <= 0 || tail <= 0)
1454  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1455 #endif
1456  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1457 
1458  /* For clean code only. Thread not released until next statement prevents
1459  race with acquire code. */
1460  head_thr->th.th_next_waiting = 0;
1461 #ifdef DEBUG_QUEUING_LOCKS
1462  TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1463 #endif
1464 
1465  KMP_MB();
1466  /* reset spin value */
1467  head_thr->th.th_spin_here = FALSE;
1468 
1469  KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1470  "dequeuing\n",
1471  lck, gtid));
1472 #ifdef DEBUG_QUEUING_LOCKS
1473  TRACE_LOCK(gtid + 1, "rel exit 2");
1474 #endif
1475  return KMP_LOCK_RELEASED;
1476  }
1477  /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1478  threads */
1479 
1480 #ifdef DEBUG_QUEUING_LOCKS
1481  TRACE_LOCK(gtid + 1, "rel retry");
1482 #endif
1483 
1484  } /* while */
1485  KMP_ASSERT2(0, "should not get here");
1486  return KMP_LOCK_RELEASED;
1487 }
1488 
1489 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1490  kmp_int32 gtid) {
1491  char const *const func = "omp_unset_lock";
1492  KMP_MB(); /* in case another processor initialized lock */
1493  if (lck->lk.initialized != lck) {
1494  KMP_FATAL(LockIsUninitialized, func);
1495  }
1496  if (__kmp_is_queuing_lock_nestable(lck)) {
1497  KMP_FATAL(LockNestableUsedAsSimple, func);
1498  }
1499  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1500  KMP_FATAL(LockUnsettingFree, func);
1501  }
1502  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1503  KMP_FATAL(LockUnsettingSetByAnother, func);
1504  }
1505  lck->lk.owner_id = 0;
1506  return __kmp_release_queuing_lock(lck, gtid);
1507 }
1508 
1509 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1510  lck->lk.location = NULL;
1511  lck->lk.head_id = 0;
1512  lck->lk.tail_id = 0;
1513  lck->lk.next_ticket = 0;
1514  lck->lk.now_serving = 0;
1515  lck->lk.owner_id = 0; // no thread owns the lock.
1516  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1517  lck->lk.initialized = lck;
1518 
1519  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1520 }
1521 
1522 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1523  lck->lk.initialized = NULL;
1524  lck->lk.location = NULL;
1525  lck->lk.head_id = 0;
1526  lck->lk.tail_id = 0;
1527  lck->lk.next_ticket = 0;
1528  lck->lk.now_serving = 0;
1529  lck->lk.owner_id = 0;
1530  lck->lk.depth_locked = -1;
1531 }
1532 
1533 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1534  char const *const func = "omp_destroy_lock";
1535  if (lck->lk.initialized != lck) {
1536  KMP_FATAL(LockIsUninitialized, func);
1537  }
1538  if (__kmp_is_queuing_lock_nestable(lck)) {
1539  KMP_FATAL(LockNestableUsedAsSimple, func);
1540  }
1541  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1542  KMP_FATAL(LockStillOwned, func);
1543  }
1544  __kmp_destroy_queuing_lock(lck);
1545 }
1546 
1547 // nested queuing locks
1548 
1549 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1550  KMP_DEBUG_ASSERT(gtid >= 0);
1551 
1552  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1553  lck->lk.depth_locked += 1;
1554  return KMP_LOCK_ACQUIRED_NEXT;
1555  } else {
1556  __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1557  KMP_MB();
1558  lck->lk.depth_locked = 1;
1559  KMP_MB();
1560  lck->lk.owner_id = gtid + 1;
1561  return KMP_LOCK_ACQUIRED_FIRST;
1562  }
1563 }
1564 
1565 static int
1566 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1567  kmp_int32 gtid) {
1568  char const *const func = "omp_set_nest_lock";
1569  if (lck->lk.initialized != lck) {
1570  KMP_FATAL(LockIsUninitialized, func);
1571  }
1572  if (!__kmp_is_queuing_lock_nestable(lck)) {
1573  KMP_FATAL(LockSimpleUsedAsNestable, func);
1574  }
1575  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1576 }
1577 
1578 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1579  int retval;
1580 
1581  KMP_DEBUG_ASSERT(gtid >= 0);
1582 
1583  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1584  retval = ++lck->lk.depth_locked;
1585  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1586  retval = 0;
1587  } else {
1588  KMP_MB();
1589  retval = lck->lk.depth_locked = 1;
1590  KMP_MB();
1591  lck->lk.owner_id = gtid + 1;
1592  }
1593  return retval;
1594 }
1595 
1596 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1597  kmp_int32 gtid) {
1598  char const *const func = "omp_test_nest_lock";
1599  if (lck->lk.initialized != lck) {
1600  KMP_FATAL(LockIsUninitialized, func);
1601  }
1602  if (!__kmp_is_queuing_lock_nestable(lck)) {
1603  KMP_FATAL(LockSimpleUsedAsNestable, func);
1604  }
1605  return __kmp_test_nested_queuing_lock(lck, gtid);
1606 }
1607 
1608 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1609  KMP_DEBUG_ASSERT(gtid >= 0);
1610 
1611  KMP_MB();
1612  if (--(lck->lk.depth_locked) == 0) {
1613  KMP_MB();
1614  lck->lk.owner_id = 0;
1615  __kmp_release_queuing_lock(lck, gtid);
1616  return KMP_LOCK_RELEASED;
1617  }
1618  return KMP_LOCK_STILL_HELD;
1619 }
1620 
1621 static int
1622 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1623  kmp_int32 gtid) {
1624  char const *const func = "omp_unset_nest_lock";
1625  KMP_MB(); /* in case another processor initialized lock */
1626  if (lck->lk.initialized != lck) {
1627  KMP_FATAL(LockIsUninitialized, func);
1628  }
1629  if (!__kmp_is_queuing_lock_nestable(lck)) {
1630  KMP_FATAL(LockSimpleUsedAsNestable, func);
1631  }
1632  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1633  KMP_FATAL(LockUnsettingFree, func);
1634  }
1635  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1636  KMP_FATAL(LockUnsettingSetByAnother, func);
1637  }
1638  return __kmp_release_nested_queuing_lock(lck, gtid);
1639 }
1640 
1641 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1642  __kmp_init_queuing_lock(lck);
1643  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1644 }
1645 
1646 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1647  __kmp_destroy_queuing_lock(lck);
1648  lck->lk.depth_locked = 0;
1649 }
1650 
1651 static void
1652 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1653  char const *const func = "omp_destroy_nest_lock";
1654  if (lck->lk.initialized != lck) {
1655  KMP_FATAL(LockIsUninitialized, func);
1656  }
1657  if (!__kmp_is_queuing_lock_nestable(lck)) {
1658  KMP_FATAL(LockSimpleUsedAsNestable, func);
1659  }
1660  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1661  KMP_FATAL(LockStillOwned, func);
1662  }
1663  __kmp_destroy_nested_queuing_lock(lck);
1664 }
1665 
1666 // access functions to fields which don't exist for all lock kinds.
1667 
1668 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1669  return lck->lk.location;
1670 }
1671 
1672 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1673  const ident_t *loc) {
1674  lck->lk.location = loc;
1675 }
1676 
1677 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1678  return lck->lk.flags;
1679 }
1680 
1681 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1682  kmp_lock_flags_t flags) {
1683  lck->lk.flags = flags;
1684 }
1685 
1686 #if KMP_USE_ADAPTIVE_LOCKS
1687 
1688 /* RTM Adaptive locks */
1689 
1690 #if KMP_HAVE_RTM_INTRINSICS
1691 #include <immintrin.h>
1692 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1693 
1694 #else
1695 
1696 // Values from the status register after failed speculation.
1697 #define _XBEGIN_STARTED (~0u)
1698 #define _XABORT_EXPLICIT (1 << 0)
1699 #define _XABORT_RETRY (1 << 1)
1700 #define _XABORT_CONFLICT (1 << 2)
1701 #define _XABORT_CAPACITY (1 << 3)
1702 #define _XABORT_DEBUG (1 << 4)
1703 #define _XABORT_NESTED (1 << 5)
1704 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1705 
1706 // Aborts for which it's worth trying again immediately
1707 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1708 
1709 #define STRINGIZE_INTERNAL(arg) #arg
1710 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1711 
1712 // Access to RTM instructions
1713 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1714  an abort. This is the same definition as the compiler intrinsic that will be
1715  supported at some point. */
1716 static __inline int _xbegin() {
1717  int res = -1;
1718 
1719 #if KMP_OS_WINDOWS
1720 #if KMP_ARCH_X86_64
1721  _asm {
1722  _emit 0xC7
1723  _emit 0xF8
1724  _emit 2
1725  _emit 0
1726  _emit 0
1727  _emit 0
1728  jmp L2
1729  mov res, eax
1730  L2:
1731  }
1732 #else /* IA32 */
1733  _asm {
1734  _emit 0xC7
1735  _emit 0xF8
1736  _emit 2
1737  _emit 0
1738  _emit 0
1739  _emit 0
1740  jmp L2
1741  mov res, eax
1742  L2:
1743  }
1744 #endif // KMP_ARCH_X86_64
1745 #else
1746  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1747  returned in %eax(%rax) on abort. Other register values are restored, so
1748  don't need to be killed.
1749 
1750  We must also mark 'res' as an input and an output, since otherwise
1751  'res=-1' may be dropped as being dead, whereas we do need the assignment on
1752  the successful (i.e., non-abort) path. */
1753  __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1754  " .long 1f-1b-6\n"
1755  " jmp 2f\n"
1756  "1: movl %%eax,%0\n"
1757  "2:"
1758  : "+r"(res)::"memory", "%eax");
1759 #endif // KMP_OS_WINDOWS
1760  return res;
1761 }
1762 
1763 /* Transaction end */
1764 static __inline void _xend() {
1765 #if KMP_OS_WINDOWS
1766  __asm {
1767  _emit 0x0f
1768  _emit 0x01
1769  _emit 0xd5
1770  }
1771 #else
1772  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1773 #endif
1774 }
1775 
1776 /* This is a macro, the argument must be a single byte constant which can be
1777  evaluated by the inline assembler, since it is emitted as a byte into the
1778  assembly code. */
1779 // clang-format off
1780 #if KMP_OS_WINDOWS
1781 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1782 #else
1783 #define _xabort(ARG) \
1784  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1785 #endif
1786 // clang-format on
1787 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1788 
1789 // Statistics is collected for testing purpose
1790 #if KMP_DEBUG_ADAPTIVE_LOCKS
1791 
1792 // We accumulate speculative lock statistics when the lock is destroyed. We
1793 // keep locks that haven't been destroyed in the liveLocks list so that we can
1794 // grab their statistics too.
1795 static kmp_adaptive_lock_statistics_t destroyedStats;
1796 
1797 // To hold the list of live locks.
1798 static kmp_adaptive_lock_info_t liveLocks;
1799 
1800 // A lock so we can safely update the list of locks.
1801 static kmp_bootstrap_lock_t chain_lock =
1802  KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1803 
1804 // Initialize the list of stats.
1805 void __kmp_init_speculative_stats() {
1806  kmp_adaptive_lock_info_t *lck = &liveLocks;
1807 
1808  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1809  sizeof(lck->stats));
1810  lck->stats.next = lck;
1811  lck->stats.prev = lck;
1812 
1813  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1814  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1815 
1816  __kmp_init_bootstrap_lock(&chain_lock);
1817 }
1818 
1819 // Insert the lock into the circular list
1820 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1821  __kmp_acquire_bootstrap_lock(&chain_lock);
1822 
1823  lck->stats.next = liveLocks.stats.next;
1824  lck->stats.prev = &liveLocks;
1825 
1826  liveLocks.stats.next = lck;
1827  lck->stats.next->stats.prev = lck;
1828 
1829  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1830  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1831 
1832  __kmp_release_bootstrap_lock(&chain_lock);
1833 }
1834 
1835 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1836  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838 
1839  kmp_adaptive_lock_info_t *n = lck->stats.next;
1840  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1841 
1842  n->stats.prev = p;
1843  p->stats.next = n;
1844 }
1845 
1846 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1847  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1848  sizeof(lck->stats));
1849  __kmp_remember_lock(lck);
1850 }
1851 
1852 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1853  kmp_adaptive_lock_info_t *lck) {
1854  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1855 
1856  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1857  t->successfulSpeculations += s->successfulSpeculations;
1858  t->hardFailedSpeculations += s->hardFailedSpeculations;
1859  t->softFailedSpeculations += s->softFailedSpeculations;
1860  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1861  t->lemmingYields += s->lemmingYields;
1862 }
1863 
1864 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1865  __kmp_acquire_bootstrap_lock(&chain_lock);
1866 
1867  __kmp_add_stats(&destroyedStats, lck);
1868  __kmp_forget_lock(lck);
1869 
1870  __kmp_release_bootstrap_lock(&chain_lock);
1871 }
1872 
1873 static float percent(kmp_uint32 count, kmp_uint32 total) {
1874  return (total == 0) ? 0.0 : (100.0 * count) / total;
1875 }
1876 
1877 void __kmp_print_speculative_stats() {
1878  kmp_adaptive_lock_statistics_t total = destroyedStats;
1879  kmp_adaptive_lock_info_t *lck;
1880 
1881  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1882  __kmp_add_stats(&total, lck);
1883  }
1884  kmp_adaptive_lock_statistics_t *t = &total;
1885  kmp_uint32 totalSections =
1886  t->nonSpeculativeAcquires + t->successfulSpeculations;
1887  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1888  t->hardFailedSpeculations +
1889  t->softFailedSpeculations;
1890  if (totalSections <= 0)
1891  return;
1892 
1893  kmp_safe_raii_file_t statsFile;
1894  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1895  statsFile.set_stdout();
1896  } else {
1897  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1898  char buffer[buffLen];
1899  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1900  (kmp_int32)getpid());
1901  statsFile.open(buffer, "w");
1902  }
1903 
1904  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1905  fprintf(statsFile,
1906  " Lock parameters: \n"
1907  " max_soft_retries : %10d\n"
1908  " max_badness : %10d\n",
1909  __kmp_adaptive_backoff_params.max_soft_retries,
1910  __kmp_adaptive_backoff_params.max_badness);
1911  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1912  t->nonSpeculativeAcquireAttempts);
1913  fprintf(statsFile, " Total critical sections : %10d\n",
1914  totalSections);
1915  fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1916  t->successfulSpeculations,
1917  percent(t->successfulSpeculations, totalSections));
1918  fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1919  t->nonSpeculativeAcquires,
1920  percent(t->nonSpeculativeAcquires, totalSections));
1921  fprintf(statsFile, " Lemming yields : %10d\n\n",
1922  t->lemmingYields);
1923 
1924  fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1925  totalSpeculations);
1926  fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1927  t->successfulSpeculations,
1928  percent(t->successfulSpeculations, totalSpeculations));
1929  fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1930  t->softFailedSpeculations,
1931  percent(t->softFailedSpeculations, totalSpeculations));
1932  fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1933  t->hardFailedSpeculations,
1934  percent(t->hardFailedSpeculations, totalSpeculations));
1935 }
1936 
1937 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1938 #else
1939 #define KMP_INC_STAT(lck, stat)
1940 
1941 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1942 
1943 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1944  // It is enough to check that the head_id is zero.
1945  // We don't also need to check the tail.
1946  bool res = lck->lk.head_id == 0;
1947 
1948 // We need a fence here, since we must ensure that no memory operations
1949 // from later in this thread float above that read.
1950 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1951  _mm_mfence();
1952 #else
1953  __sync_synchronize();
1954 #endif
1955 
1956  return res;
1957 }
1958 
1959 // Functions for manipulating the badness
1960 static __inline void
1961 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1962  // Reset the badness to zero so we eagerly try to speculate again
1963  lck->lk.adaptive.badness = 0;
1964  KMP_INC_STAT(lck, successfulSpeculations);
1965 }
1966 
1967 // Create a bit mask with one more set bit.
1968 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1969  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1970  if (newBadness > lck->lk.adaptive.max_badness) {
1971  return;
1972  } else {
1973  lck->lk.adaptive.badness = newBadness;
1974  }
1975 }
1976 
1977 // Check whether speculation should be attempted.
1978 KMP_ATTRIBUTE_TARGET_RTM
1979 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1980  kmp_int32 gtid) {
1981  kmp_uint32 badness = lck->lk.adaptive.badness;
1982  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1983  int res = (attempts & badness) == 0;
1984  return res;
1985 }
1986 
1987 // Attempt to acquire only the speculative lock.
1988 // Does not back off to the non-speculative lock.
1989 KMP_ATTRIBUTE_TARGET_RTM
1990 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1991  kmp_int32 gtid) {
1992  int retries = lck->lk.adaptive.max_soft_retries;
1993 
1994  // We don't explicitly count the start of speculation, rather we record the
1995  // results (success, hard fail, soft fail). The sum of all of those is the
1996  // total number of times we started speculation since all speculations must
1997  // end one of those ways.
1998  do {
1999  kmp_uint32 status = _xbegin();
2000  // Switch this in to disable actual speculation but exercise at least some
2001  // of the rest of the code. Useful for debugging...
2002  // kmp_uint32 status = _XABORT_NESTED;
2003 
2004  if (status == _XBEGIN_STARTED) {
2005  /* We have successfully started speculation. Check that no-one acquired
2006  the lock for real between when we last looked and now. This also gets
2007  the lock cache line into our read-set, which we need so that we'll
2008  abort if anyone later claims it for real. */
2009  if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2010  // Lock is now visibly acquired, so someone beat us to it. Abort the
2011  // transaction so we'll restart from _xbegin with the failure status.
2012  _xabort(0x01);
2013  KMP_ASSERT2(0, "should not get here");
2014  }
2015  return 1; // Lock has been acquired (speculatively)
2016  } else {
2017  // We have aborted, update the statistics
2018  if (status & SOFT_ABORT_MASK) {
2019  KMP_INC_STAT(lck, softFailedSpeculations);
2020  // and loop round to retry.
2021  } else {
2022  KMP_INC_STAT(lck, hardFailedSpeculations);
2023  // Give up if we had a hard failure.
2024  break;
2025  }
2026  }
2027  } while (retries--); // Loop while we have retries, and didn't fail hard.
2028 
2029  // Either we had a hard failure or we didn't succeed softly after
2030  // the full set of attempts, so back off the badness.
2031  __kmp_step_badness(lck);
2032  return 0;
2033 }
2034 
2035 // Attempt to acquire the speculative lock, or back off to the non-speculative
2036 // one if the speculative lock cannot be acquired.
2037 // We can succeed speculatively, non-speculatively, or fail.
2038 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2039  // First try to acquire the lock speculatively
2040  if (__kmp_should_speculate(lck, gtid) &&
2041  __kmp_test_adaptive_lock_only(lck, gtid))
2042  return 1;
2043 
2044  // Speculative acquisition failed, so try to acquire it non-speculatively.
2045  // Count the non-speculative acquire attempt
2046  lck->lk.adaptive.acquire_attempts++;
2047 
2048  // Use base, non-speculative lock.
2049  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2050  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2051  return 1; // Lock is acquired (non-speculatively)
2052  } else {
2053  return 0; // Failed to acquire the lock, it's already visibly locked.
2054  }
2055 }
2056 
2057 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2058  kmp_int32 gtid) {
2059  char const *const func = "omp_test_lock";
2060  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2061  KMP_FATAL(LockIsUninitialized, func);
2062  }
2063 
2064  int retval = __kmp_test_adaptive_lock(lck, gtid);
2065 
2066  if (retval) {
2067  lck->lk.qlk.owner_id = gtid + 1;
2068  }
2069  return retval;
2070 }
2071 
2072 // Block until we can acquire a speculative, adaptive lock. We check whether we
2073 // should be trying to speculate. If we should be, we check the real lock to see
2074 // if it is free, and, if not, pause without attempting to acquire it until it
2075 // is. Then we try the speculative acquire. This means that although we suffer
2076 // from lemmings a little (because all we can't acquire the lock speculatively
2077 // until the queue of threads waiting has cleared), we don't get into a state
2078 // where we can never acquire the lock speculatively (because we force the queue
2079 // to clear by preventing new arrivals from entering the queue). This does mean
2080 // that when we're trying to break lemmings, the lock is no longer fair. However
2081 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2082 // problem.
2083 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2084  kmp_int32 gtid) {
2085  if (__kmp_should_speculate(lck, gtid)) {
2086  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2087  if (__kmp_test_adaptive_lock_only(lck, gtid))
2088  return;
2089  // We tried speculation and failed, so give up.
2090  } else {
2091  // We can't try speculation until the lock is free, so we pause here
2092  // (without suspending on the queueing lock, to allow it to drain, then
2093  // try again. All other threads will also see the same result for
2094  // shouldSpeculate, so will be doing the same if they try to claim the
2095  // lock from now on.
2096  while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2097  KMP_INC_STAT(lck, lemmingYields);
2098  KMP_YIELD(TRUE);
2099  }
2100 
2101  if (__kmp_test_adaptive_lock_only(lck, gtid))
2102  return;
2103  }
2104  }
2105 
2106  // Speculative acquisition failed, so acquire it non-speculatively.
2107  // Count the non-speculative acquire attempt
2108  lck->lk.adaptive.acquire_attempts++;
2109 
2110  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2111  // We have acquired the base lock, so count that.
2112  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2113 }
2114 
2115 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2116  kmp_int32 gtid) {
2117  char const *const func = "omp_set_lock";
2118  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2119  KMP_FATAL(LockIsUninitialized, func);
2120  }
2121  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2122  KMP_FATAL(LockIsAlreadyOwned, func);
2123  }
2124 
2125  __kmp_acquire_adaptive_lock(lck, gtid);
2126 
2127  lck->lk.qlk.owner_id = gtid + 1;
2128 }
2129 
2130 KMP_ATTRIBUTE_TARGET_RTM
2131 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2132  kmp_int32 gtid) {
2133  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2134  lck))) { // If the lock doesn't look claimed we must be speculating.
2135  // (Or the user's code is buggy and they're releasing without locking;
2136  // if we had XTEST we'd be able to check that case...)
2137  _xend(); // Exit speculation
2138  __kmp_update_badness_after_success(lck);
2139  } else { // Since the lock *is* visibly locked we're not speculating,
2140  // so should use the underlying lock's release scheme.
2141  __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2142  }
2143  return KMP_LOCK_RELEASED;
2144 }
2145 
2146 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2147  kmp_int32 gtid) {
2148  char const *const func = "omp_unset_lock";
2149  KMP_MB(); /* in case another processor initialized lock */
2150  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2151  KMP_FATAL(LockIsUninitialized, func);
2152  }
2153  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2154  KMP_FATAL(LockUnsettingFree, func);
2155  }
2156  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2157  KMP_FATAL(LockUnsettingSetByAnother, func);
2158  }
2159  lck->lk.qlk.owner_id = 0;
2160  __kmp_release_adaptive_lock(lck, gtid);
2161  return KMP_LOCK_RELEASED;
2162 }
2163 
2164 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2165  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2166  lck->lk.adaptive.badness = 0;
2167  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2168  lck->lk.adaptive.max_soft_retries =
2169  __kmp_adaptive_backoff_params.max_soft_retries;
2170  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2171 #if KMP_DEBUG_ADAPTIVE_LOCKS
2172  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2173 #endif
2174  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2175 }
2176 
2177 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2178 #if KMP_DEBUG_ADAPTIVE_LOCKS
2179  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2180 #endif
2181  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2182  // Nothing needed for the speculative part.
2183 }
2184 
2185 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2186  char const *const func = "omp_destroy_lock";
2187  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2188  KMP_FATAL(LockIsUninitialized, func);
2189  }
2190  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2191  KMP_FATAL(LockStillOwned, func);
2192  }
2193  __kmp_destroy_adaptive_lock(lck);
2194 }
2195 
2196 #endif // KMP_USE_ADAPTIVE_LOCKS
2197 
2198 /* ------------------------------------------------------------------------ */
2199 /* DRDPA ticket locks */
2200 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2201 
2202 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2203  return lck->lk.owner_id - 1;
2204 }
2205 
2206 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2207  return lck->lk.depth_locked != -1;
2208 }
2209 
2210 __forceinline static int
2211 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2212  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2213  kmp_uint64 mask = lck->lk.mask; // atomic load
2214  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2215 
2216 #ifdef USE_LOCK_PROFILE
2217  if (polls[ticket & mask] != ticket)
2218  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2219 /* else __kmp_printf( "." );*/
2220 #endif /* USE_LOCK_PROFILE */
2221 
2222  // Now spin-wait, but reload the polls pointer and mask, in case the
2223  // polling area has been reconfigured. Unless it is reconfigured, the
2224  // reloads stay in L1 cache and are cheap.
2225  //
2226  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2227  // The current implementation of KMP_WAIT doesn't allow for mask
2228  // and poll to be re-read every spin iteration.
2229  kmp_uint32 spins;
2230  kmp_uint64 time;
2231  KMP_FSYNC_PREPARE(lck);
2232  KMP_INIT_YIELD(spins);
2233  KMP_INIT_BACKOFF(time);
2234  while (polls[ticket & mask] < ticket) { // atomic load
2235  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2236  // Re-read the mask and the poll pointer from the lock structure.
2237  //
2238  // Make certain that "mask" is read before "polls" !!!
2239  //
2240  // If another thread picks reconfigures the polling area and updates their
2241  // values, and we get the new value of mask and the old polls pointer, we
2242  // could access memory beyond the end of the old polling area.
2243  mask = lck->lk.mask; // atomic load
2244  polls = lck->lk.polls; // atomic load
2245  }
2246 
2247  // Critical section starts here
2248  KMP_FSYNC_ACQUIRED(lck);
2249  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2250  ticket, lck));
2251  lck->lk.now_serving = ticket; // non-volatile store
2252 
2253  // Deallocate a garbage polling area if we know that we are the last
2254  // thread that could possibly access it.
2255  //
2256  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2257  // ticket.
2258  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2259  __kmp_free(lck->lk.old_polls);
2260  lck->lk.old_polls = NULL;
2261  lck->lk.cleanup_ticket = 0;
2262  }
2263 
2264  // Check to see if we should reconfigure the polling area.
2265  // If there is still a garbage polling area to be deallocated from a
2266  // previous reconfiguration, let a later thread reconfigure it.
2267  if (lck->lk.old_polls == NULL) {
2268  bool reconfigure = false;
2269  std::atomic<kmp_uint64> *old_polls = polls;
2270  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2271 
2272  if (TCR_4(__kmp_nth) >
2273  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2274  // We are in oversubscription mode. Contract the polling area
2275  // down to a single location, if that hasn't been done already.
2276  if (num_polls > 1) {
2277  reconfigure = true;
2278  num_polls = TCR_4(lck->lk.num_polls);
2279  mask = 0;
2280  num_polls = 1;
2281  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2282  sizeof(*polls));
2283  polls[0] = ticket;
2284  }
2285  } else {
2286  // We are in under/fully subscribed mode. Check the number of
2287  // threads waiting on the lock. The size of the polling area
2288  // should be at least the number of threads waiting.
2289  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2290  if (num_waiting > num_polls) {
2291  kmp_uint32 old_num_polls = num_polls;
2292  reconfigure = true;
2293  do {
2294  mask = (mask << 1) | 1;
2295  num_polls *= 2;
2296  } while (num_polls <= num_waiting);
2297 
2298  // Allocate the new polling area, and copy the relevant portion
2299  // of the old polling area to the new area. __kmp_allocate()
2300  // zeroes the memory it allocates, and most of the old area is
2301  // just zero padding, so we only copy the release counters.
2302  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2303  sizeof(*polls));
2304  kmp_uint32 i;
2305  for (i = 0; i < old_num_polls; i++) {
2306  polls[i].store(old_polls[i]);
2307  }
2308  }
2309  }
2310 
2311  if (reconfigure) {
2312  // Now write the updated fields back to the lock structure.
2313  //
2314  // Make certain that "polls" is written before "mask" !!!
2315  //
2316  // If another thread picks up the new value of mask and the old polls
2317  // pointer , it could access memory beyond the end of the old polling
2318  // area.
2319  //
2320  // On x86, we need memory fences.
2321  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2322  "lock %p to %d polls\n",
2323  ticket, lck, num_polls));
2324 
2325  lck->lk.old_polls = old_polls;
2326  lck->lk.polls = polls; // atomic store
2327 
2328  KMP_MB();
2329 
2330  lck->lk.num_polls = num_polls;
2331  lck->lk.mask = mask; // atomic store
2332 
2333  KMP_MB();
2334 
2335  // Only after the new polling area and mask have been flushed
2336  // to main memory can we update the cleanup ticket field.
2337  //
2338  // volatile load / non-volatile store
2339  lck->lk.cleanup_ticket = lck->lk.next_ticket;
2340  }
2341  }
2342  return KMP_LOCK_ACQUIRED_FIRST;
2343 }
2344 
2345 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2346  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2347  return retval;
2348 }
2349 
2350 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2351  kmp_int32 gtid) {
2352  char const *const func = "omp_set_lock";
2353  if (lck->lk.initialized != lck) {
2354  KMP_FATAL(LockIsUninitialized, func);
2355  }
2356  if (__kmp_is_drdpa_lock_nestable(lck)) {
2357  KMP_FATAL(LockNestableUsedAsSimple, func);
2358  }
2359  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2360  KMP_FATAL(LockIsAlreadyOwned, func);
2361  }
2362 
2363  __kmp_acquire_drdpa_lock(lck, gtid);
2364 
2365  lck->lk.owner_id = gtid + 1;
2366  return KMP_LOCK_ACQUIRED_FIRST;
2367 }
2368 
2369 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2370  // First get a ticket, then read the polls pointer and the mask.
2371  // The polls pointer must be read before the mask!!! (See above)
2372  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2373  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2374  kmp_uint64 mask = lck->lk.mask; // atomic load
2375  if (polls[ticket & mask] == ticket) {
2376  kmp_uint64 next_ticket = ticket + 1;
2377  if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2378  next_ticket)) {
2379  KMP_FSYNC_ACQUIRED(lck);
2380  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2381  ticket, lck));
2382  lck->lk.now_serving = ticket; // non-volatile store
2383 
2384  // Since no threads are waiting, there is no possibility that we would
2385  // want to reconfigure the polling area. We might have the cleanup ticket
2386  // value (which says that it is now safe to deallocate old_polls), but
2387  // we'll let a later thread which calls __kmp_acquire_lock do that - this
2388  // routine isn't supposed to block, and we would risk blocks if we called
2389  // __kmp_free() to do the deallocation.
2390  return TRUE;
2391  }
2392  }
2393  return FALSE;
2394 }
2395 
2396 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2397  kmp_int32 gtid) {
2398  char const *const func = "omp_test_lock";
2399  if (lck->lk.initialized != lck) {
2400  KMP_FATAL(LockIsUninitialized, func);
2401  }
2402  if (__kmp_is_drdpa_lock_nestable(lck)) {
2403  KMP_FATAL(LockNestableUsedAsSimple, func);
2404  }
2405 
2406  int retval = __kmp_test_drdpa_lock(lck, gtid);
2407 
2408  if (retval) {
2409  lck->lk.owner_id = gtid + 1;
2410  }
2411  return retval;
2412 }
2413 
2414 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2415  // Read the ticket value from the lock data struct, then the polls pointer and
2416  // the mask. The polls pointer must be read before the mask!!! (See above)
2417  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2418  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2419  kmp_uint64 mask = lck->lk.mask; // atomic load
2420  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2421  ticket - 1, lck));
2422  KMP_FSYNC_RELEASING(lck);
2423  polls[ticket & mask] = ticket; // atomic store
2424  return KMP_LOCK_RELEASED;
2425 }
2426 
2427 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2428  kmp_int32 gtid) {
2429  char const *const func = "omp_unset_lock";
2430  KMP_MB(); /* in case another processor initialized lock */
2431  if (lck->lk.initialized != lck) {
2432  KMP_FATAL(LockIsUninitialized, func);
2433  }
2434  if (__kmp_is_drdpa_lock_nestable(lck)) {
2435  KMP_FATAL(LockNestableUsedAsSimple, func);
2436  }
2437  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2438  KMP_FATAL(LockUnsettingFree, func);
2439  }
2440  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2441  (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2442  KMP_FATAL(LockUnsettingSetByAnother, func);
2443  }
2444  lck->lk.owner_id = 0;
2445  return __kmp_release_drdpa_lock(lck, gtid);
2446 }
2447 
2448 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2449  lck->lk.location = NULL;
2450  lck->lk.mask = 0;
2451  lck->lk.num_polls = 1;
2452  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2453  lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2454  lck->lk.cleanup_ticket = 0;
2455  lck->lk.old_polls = NULL;
2456  lck->lk.next_ticket = 0;
2457  lck->lk.now_serving = 0;
2458  lck->lk.owner_id = 0; // no thread owns the lock.
2459  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2460  lck->lk.initialized = lck;
2461 
2462  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2463 }
2464 
2465 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2466  lck->lk.initialized = NULL;
2467  lck->lk.location = NULL;
2468  if (lck->lk.polls.load() != NULL) {
2469  __kmp_free(lck->lk.polls.load());
2470  lck->lk.polls = NULL;
2471  }
2472  if (lck->lk.old_polls != NULL) {
2473  __kmp_free(lck->lk.old_polls);
2474  lck->lk.old_polls = NULL;
2475  }
2476  lck->lk.mask = 0;
2477  lck->lk.num_polls = 0;
2478  lck->lk.cleanup_ticket = 0;
2479  lck->lk.next_ticket = 0;
2480  lck->lk.now_serving = 0;
2481  lck->lk.owner_id = 0;
2482  lck->lk.depth_locked = -1;
2483 }
2484 
2485 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2486  char const *const func = "omp_destroy_lock";
2487  if (lck->lk.initialized != lck) {
2488  KMP_FATAL(LockIsUninitialized, func);
2489  }
2490  if (__kmp_is_drdpa_lock_nestable(lck)) {
2491  KMP_FATAL(LockNestableUsedAsSimple, func);
2492  }
2493  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2494  KMP_FATAL(LockStillOwned, func);
2495  }
2496  __kmp_destroy_drdpa_lock(lck);
2497 }
2498 
2499 // nested drdpa ticket locks
2500 
2501 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2502  KMP_DEBUG_ASSERT(gtid >= 0);
2503 
2504  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2505  lck->lk.depth_locked += 1;
2506  return KMP_LOCK_ACQUIRED_NEXT;
2507  } else {
2508  __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2509  KMP_MB();
2510  lck->lk.depth_locked = 1;
2511  KMP_MB();
2512  lck->lk.owner_id = gtid + 1;
2513  return KMP_LOCK_ACQUIRED_FIRST;
2514  }
2515 }
2516 
2517 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2518  kmp_int32 gtid) {
2519  char const *const func = "omp_set_nest_lock";
2520  if (lck->lk.initialized != lck) {
2521  KMP_FATAL(LockIsUninitialized, func);
2522  }
2523  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2524  KMP_FATAL(LockSimpleUsedAsNestable, func);
2525  }
2526  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2527 }
2528 
2529 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2530  int retval;
2531 
2532  KMP_DEBUG_ASSERT(gtid >= 0);
2533 
2534  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2535  retval = ++lck->lk.depth_locked;
2536  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2537  retval = 0;
2538  } else {
2539  KMP_MB();
2540  retval = lck->lk.depth_locked = 1;
2541  KMP_MB();
2542  lck->lk.owner_id = gtid + 1;
2543  }
2544  return retval;
2545 }
2546 
2547 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2548  kmp_int32 gtid) {
2549  char const *const func = "omp_test_nest_lock";
2550  if (lck->lk.initialized != lck) {
2551  KMP_FATAL(LockIsUninitialized, func);
2552  }
2553  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2554  KMP_FATAL(LockSimpleUsedAsNestable, func);
2555  }
2556  return __kmp_test_nested_drdpa_lock(lck, gtid);
2557 }
2558 
2559 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2560  KMP_DEBUG_ASSERT(gtid >= 0);
2561 
2562  KMP_MB();
2563  if (--(lck->lk.depth_locked) == 0) {
2564  KMP_MB();
2565  lck->lk.owner_id = 0;
2566  __kmp_release_drdpa_lock(lck, gtid);
2567  return KMP_LOCK_RELEASED;
2568  }
2569  return KMP_LOCK_STILL_HELD;
2570 }
2571 
2572 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2573  kmp_int32 gtid) {
2574  char const *const func = "omp_unset_nest_lock";
2575  KMP_MB(); /* in case another processor initialized lock */
2576  if (lck->lk.initialized != lck) {
2577  KMP_FATAL(LockIsUninitialized, func);
2578  }
2579  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2580  KMP_FATAL(LockSimpleUsedAsNestable, func);
2581  }
2582  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2583  KMP_FATAL(LockUnsettingFree, func);
2584  }
2585  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2586  KMP_FATAL(LockUnsettingSetByAnother, func);
2587  }
2588  return __kmp_release_nested_drdpa_lock(lck, gtid);
2589 }
2590 
2591 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2592  __kmp_init_drdpa_lock(lck);
2593  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2594 }
2595 
2596 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2597  __kmp_destroy_drdpa_lock(lck);
2598  lck->lk.depth_locked = 0;
2599 }
2600 
2601 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2602  char const *const func = "omp_destroy_nest_lock";
2603  if (lck->lk.initialized != lck) {
2604  KMP_FATAL(LockIsUninitialized, func);
2605  }
2606  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2607  KMP_FATAL(LockSimpleUsedAsNestable, func);
2608  }
2609  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2610  KMP_FATAL(LockStillOwned, func);
2611  }
2612  __kmp_destroy_nested_drdpa_lock(lck);
2613 }
2614 
2615 // access functions to fields which don't exist for all lock kinds.
2616 
2617 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2618  return lck->lk.location;
2619 }
2620 
2621 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2622  const ident_t *loc) {
2623  lck->lk.location = loc;
2624 }
2625 
2626 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2627  return lck->lk.flags;
2628 }
2629 
2630 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2631  kmp_lock_flags_t flags) {
2632  lck->lk.flags = flags;
2633 }
2634 
2635 // Time stamp counter
2636 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2637 #define __kmp_tsc() __kmp_hardware_timestamp()
2638 // Runtime's default backoff parameters
2639 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2640 #else
2641 // Use nanoseconds for other platforms
2642 extern kmp_uint64 __kmp_now_nsec();
2643 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2644 #define __kmp_tsc() __kmp_now_nsec()
2645 #endif
2646 
2647 // A useful predicate for dealing with timestamps that may wrap.
2648 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2649 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2650 // Times where going clockwise is less distance than going anti-clockwise
2651 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2652 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2653 // signed(b) = 0 captures the actual difference
2654 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2655  return ((kmp_int64)b - (kmp_int64)a) > 0;
2656 }
2657 
2658 // Truncated binary exponential backoff function
2659 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2660  // We could flatten this loop, but making it a nested loop gives better result
2661  kmp_uint32 i;
2662  for (i = boff->step; i > 0; i--) {
2663  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2664 #if KMP_HAVE_UMWAIT
2665  if (__kmp_umwait_enabled) {
2666  __kmp_tpause(0, boff->min_tick);
2667  } else {
2668 #endif
2669  do {
2670  KMP_CPU_PAUSE();
2671  } while (before(__kmp_tsc(), goal));
2672 #if KMP_HAVE_UMWAIT
2673  }
2674 #endif
2675  }
2676  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2677 }
2678 
2679 #if KMP_USE_DYNAMIC_LOCK
2680 
2681 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2682 // lock word.
2683 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2684  kmp_dyna_lockseq_t seq) {
2685  TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
2686  KA_TRACE(
2687  20,
2688  ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2689 }
2690 
2691 #if KMP_USE_TSX
2692 
2693 // HLE lock functions - imported from the testbed runtime.
2694 #define HLE_ACQUIRE ".byte 0xf2;"
2695 #define HLE_RELEASE ".byte 0xf3;"
2696 
2697 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2698  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2699  return v;
2700 }
2701 
2702 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2703 
2704 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2705  TCW_4(*lck, 0);
2706 }
2707 
2708 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2709  // Use gtid for KMP_LOCK_BUSY if necessary
2710  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2711  int delay = 1;
2712  do {
2713  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2714  for (int i = delay; i != 0; --i)
2715  KMP_CPU_PAUSE();
2716  delay = ((delay << 1) | 1) & 7;
2717  }
2718  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2719  }
2720 }
2721 
2722 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2723  kmp_int32 gtid) {
2724  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2725 }
2726 
2727 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2728  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2729  : "=m"(*lck)
2730  : "r"(KMP_LOCK_FREE(hle))
2731  : "memory");
2732  return KMP_LOCK_RELEASED;
2733 }
2734 
2735 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2736  kmp_int32 gtid) {
2737  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2738 }
2739 
2740 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2741  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2742 }
2743 
2744 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2745  kmp_int32 gtid) {
2746  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2747 }
2748 
2749 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2750  __kmp_init_queuing_lock(lck);
2751 }
2752 
2753 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2754  __kmp_destroy_queuing_lock(lck);
2755 }
2756 
2757 static void
2758 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2759  __kmp_destroy_queuing_lock_with_checks(lck);
2760 }
2761 
2762 KMP_ATTRIBUTE_TARGET_RTM
2763 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2764  kmp_int32 gtid) {
2765  unsigned retries = 3, status;
2766  do {
2767  status = _xbegin();
2768  if (status == _XBEGIN_STARTED) {
2769  if (__kmp_is_unlocked_queuing_lock(lck))
2770  return;
2771  _xabort(0xff);
2772  }
2773  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2774  // Wait until lock becomes free
2775  while (!__kmp_is_unlocked_queuing_lock(lck)) {
2776  KMP_YIELD(TRUE);
2777  }
2778  } else if (!(status & _XABORT_RETRY))
2779  break;
2780  } while (retries--);
2781 
2782  // Fall-back non-speculative lock (xchg)
2783  __kmp_acquire_queuing_lock(lck, gtid);
2784 }
2785 
2786 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2787  kmp_int32 gtid) {
2788  __kmp_acquire_rtm_queuing_lock(lck, gtid);
2789 }
2790 
2791 KMP_ATTRIBUTE_TARGET_RTM
2792 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2793  kmp_int32 gtid) {
2794  if (__kmp_is_unlocked_queuing_lock(lck)) {
2795  // Releasing from speculation
2796  _xend();
2797  } else {
2798  // Releasing from a real lock
2799  __kmp_release_queuing_lock(lck, gtid);
2800  }
2801  return KMP_LOCK_RELEASED;
2802 }
2803 
2804 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2805  kmp_int32 gtid) {
2806  return __kmp_release_rtm_queuing_lock(lck, gtid);
2807 }
2808 
2809 KMP_ATTRIBUTE_TARGET_RTM
2810 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2811  kmp_int32 gtid) {
2812  unsigned retries = 3, status;
2813  do {
2814  status = _xbegin();
2815  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2816  return 1;
2817  }
2818  if (!(status & _XABORT_RETRY))
2819  break;
2820  } while (retries--);
2821 
2822  return __kmp_test_queuing_lock(lck, gtid);
2823 }
2824 
2825 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2826  kmp_int32 gtid) {
2827  return __kmp_test_rtm_queuing_lock(lck, gtid);
2828 }
2829 
2830 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2831 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2832 
2833 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2834  KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2835 }
2836 
2837 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2838  __kmp_destroy_rtm_spin_lock(lck);
2839 }
2840 
2841 KMP_ATTRIBUTE_TARGET_RTM
2842 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2843  kmp_int32 gtid) {
2844  unsigned retries = 3, status;
2845  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2846  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2847  do {
2848  status = _xbegin();
2849  if (status == _XBEGIN_STARTED) {
2850  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2851  return KMP_LOCK_ACQUIRED_FIRST;
2852  _xabort(0xff);
2853  }
2854  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2855  // Wait until lock becomes free
2856  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2857  KMP_YIELD(TRUE);
2858  }
2859  } else if (!(status & _XABORT_RETRY))
2860  break;
2861  } while (retries--);
2862 
2863  // Fall-back spin lock
2864  KMP_FSYNC_PREPARE(lck);
2865  kmp_backoff_t backoff = __kmp_spin_backoff_params;
2866  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2867  !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2868  __kmp_spin_backoff(&backoff);
2869  }
2870  KMP_FSYNC_ACQUIRED(lck);
2871  return KMP_LOCK_ACQUIRED_FIRST;
2872 }
2873 
2874 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2875  kmp_int32 gtid) {
2876  return __kmp_acquire_rtm_spin_lock(lck, gtid);
2877 }
2878 
2879 KMP_ATTRIBUTE_TARGET_RTM
2880 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2881  kmp_int32 gtid) {
2882  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2883  // Releasing from speculation
2884  _xend();
2885  } else {
2886  // Releasing from a real lock
2887  KMP_FSYNC_RELEASING(lck);
2888  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2889  }
2890  return KMP_LOCK_RELEASED;
2891 }
2892 
2893 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2894  kmp_int32 gtid) {
2895  return __kmp_release_rtm_spin_lock(lck, gtid);
2896 }
2897 
2898 KMP_ATTRIBUTE_TARGET_RTM
2899 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2900  unsigned retries = 3, status;
2901  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2902  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2903  do {
2904  status = _xbegin();
2905  if (status == _XBEGIN_STARTED &&
2906  KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2907  return TRUE;
2908  }
2909  if (!(status & _XABORT_RETRY))
2910  break;
2911  } while (retries--);
2912 
2913  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2914  __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2915  KMP_FSYNC_ACQUIRED(lck);
2916  return TRUE;
2917  }
2918  return FALSE;
2919 }
2920 
2921 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2922  kmp_int32 gtid) {
2923  return __kmp_test_rtm_spin_lock(lck, gtid);
2924 }
2925 
2926 #endif // KMP_USE_TSX
2927 
2928 // Entry functions for indirect locks (first element of direct lock jump tables)
2929 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2930  kmp_dyna_lockseq_t tag);
2931 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2932 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2933 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2934 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2935 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2936  kmp_int32);
2937 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2938  kmp_int32);
2939 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2940  kmp_int32);
2941 
2942 // Lock function definitions for the union parameter type
2943 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2944 
2945 #define expand1(lk, op) \
2946  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2947  __kmp_##op##_##lk##_##lock(&lock->lk); \
2948  }
2949 #define expand2(lk, op) \
2950  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2951  kmp_int32 gtid) { \
2952  return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2953  }
2954 #define expand3(lk, op) \
2955  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2956  kmp_lock_flags_t flags) { \
2957  __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2958  }
2959 #define expand4(lk, op) \
2960  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2961  const ident_t *loc) { \
2962  __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2963  }
2964 
2965 KMP_FOREACH_LOCK_KIND(expand1, init)
2966 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2967 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2968 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2969 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2970 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2971 KMP_FOREACH_LOCK_KIND(expand2, release)
2972 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2973 KMP_FOREACH_LOCK_KIND(expand2, test)
2974 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2975 KMP_FOREACH_LOCK_KIND(expand3, )
2976 KMP_FOREACH_LOCK_KIND(expand4, )
2977 
2978 #undef expand1
2979 #undef expand2
2980 #undef expand3
2981 #undef expand4
2982 
2983 // Jump tables for the indirect lock functions
2984 // Only fill in the odd entries, that avoids the need to shift out the low bit
2985 
2986 // init functions
2987 #define expand(l, op) 0, __kmp_init_direct_lock,
2988 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2989  __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2990 #undef expand
2991 
2992 // destroy functions
2993 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2994 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2995  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2996 #undef expand
2997 #define expand(l, op) \
2998  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2999 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3000  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3001 #undef expand
3002 
3003 // set/acquire functions
3004 #define expand(l, op) \
3005  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3006 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3007  __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3008 #undef expand
3009 #define expand(l, op) \
3010  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3011 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3012  __kmp_set_indirect_lock_with_checks, 0,
3013  KMP_FOREACH_D_LOCK(expand, acquire)};
3014 #undef expand
3015 
3016 // unset/release and test functions
3017 #define expand(l, op) \
3018  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3019 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3020  __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3021 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3022  __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3023 #undef expand
3024 #define expand(l, op) \
3025  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3026 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3027  __kmp_unset_indirect_lock_with_checks, 0,
3028  KMP_FOREACH_D_LOCK(expand, release)};
3029 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3030  __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3031 #undef expand
3032 
3033 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3034 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3035 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3036 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3037 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3038 
3039 // Jump tables for the indirect lock functions
3040 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3041 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3042  KMP_FOREACH_I_LOCK(expand, init)};
3043 #undef expand
3044 
3045 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3046 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3047  KMP_FOREACH_I_LOCK(expand, destroy)};
3048 #undef expand
3049 #define expand(l, op) \
3050  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3051 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3052  KMP_FOREACH_I_LOCK(expand, destroy)};
3053 #undef expand
3054 
3055 // set/acquire functions
3056 #define expand(l, op) \
3057  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3058 static int (*indirect_set[])(kmp_user_lock_p,
3059  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3060 #undef expand
3061 #define expand(l, op) \
3062  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3063 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3064  KMP_FOREACH_I_LOCK(expand, acquire)};
3065 #undef expand
3066 
3067 // unset/release and test functions
3068 #define expand(l, op) \
3069  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3070 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3071  KMP_FOREACH_I_LOCK(expand, release)};
3072 static int (*indirect_test[])(kmp_user_lock_p,
3073  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3074 #undef expand
3075 #define expand(l, op) \
3076  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3077 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3078  KMP_FOREACH_I_LOCK(expand, release)};
3079 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3080  KMP_FOREACH_I_LOCK(expand, test)};
3081 #undef expand
3082 
3083 // Exposes only one jump tables (*lock or *lock_with_checks).
3084 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3085 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3086 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3087 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3088 
3089 // Lock index table.
3090 kmp_indirect_lock_table_t __kmp_i_lock_table;
3091 
3092 // Size of indirect locks.
3093 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3094 
3095 // Jump tables for lock accessor/modifier.
3096 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3097  const ident_t *) = {0};
3098 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3099  kmp_lock_flags_t) = {0};
3100 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3101  kmp_user_lock_p) = {0};
3102 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3103  kmp_user_lock_p) = {0};
3104 
3105 // Use different lock pools for different lock types.
3106 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3107 
3108 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3109 // the indirect lock table holds the address and type of the allocated indirect
3110 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3111 // full. A destroyed indirect lock object is returned to the reusable pool of
3112 // locks, unique to each lock type.
3113 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3114  kmp_int32 gtid,
3115  kmp_indirect_locktag_t tag) {
3116  kmp_indirect_lock_t *lck;
3117  kmp_lock_index_t idx, table_idx;
3118 
3119  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3120 
3121  if (__kmp_indirect_lock_pool[tag] != NULL) {
3122  // Reuse the allocated and destroyed lock object
3123  lck = __kmp_indirect_lock_pool[tag];
3124  if (OMP_LOCK_T_SIZE < sizeof(void *))
3125  idx = lck->lock->pool.index;
3126  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3127  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3128  lck));
3129  } else {
3130  kmp_uint32 row, col;
3131  kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3132  idx = 0;
3133  // Find location in list of lock tables to put new lock
3134  while (1) {
3135  table_idx = lock_table->next; // index within this table
3136  idx += lock_table->next; // global index within list of tables
3137  if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3138  row = table_idx / KMP_I_LOCK_CHUNK;
3139  col = table_idx % KMP_I_LOCK_CHUNK;
3140  // Allocate a new row of locks if necessary
3141  if (!lock_table->table[row]) {
3142  lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3143  sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3144  }
3145  break;
3146  }
3147  // Allocate a new lock table if necessary with double the capacity
3148  if (!lock_table->next_table) {
3149  kmp_indirect_lock_table_t *next_table =
3150  (kmp_indirect_lock_table_t *)__kmp_allocate(
3151  sizeof(kmp_indirect_lock_table_t));
3152  next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3153  sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3154  next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3155  next_table->next = 0;
3156  next_table->next_table = nullptr;
3157  lock_table->next_table = next_table;
3158  }
3159  lock_table = lock_table->next_table;
3160  KMP_ASSERT(lock_table);
3161  }
3162  lock_table->next++;
3163 
3164  lck = &lock_table->table[row][col];
3165  // Allocate a new base lock object
3166  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3167  KA_TRACE(20,
3168  ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3169  }
3170 
3171  __kmp_release_lock(&__kmp_global_lock, gtid);
3172 
3173  lck->type = tag;
3174 
3175  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3176  *(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
3177  idx << 1; // indirect lock word must be even
3178  } else {
3179  *((kmp_indirect_lock_t **)user_lock) = lck;
3180  }
3181 
3182  return lck;
3183 }
3184 
3185 // User lock lookup for dynamically dispatched locks.
3186 static __forceinline kmp_indirect_lock_t *
3187 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3188  if (__kmp_env_consistency_check) {
3189  kmp_indirect_lock_t *lck = NULL;
3190  if (user_lock == NULL) {
3191  KMP_FATAL(LockIsUninitialized, func);
3192  }
3193  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3194  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3195  lck = __kmp_get_i_lock(idx);
3196  } else {
3197  lck = *((kmp_indirect_lock_t **)user_lock);
3198  }
3199  if (lck == NULL) {
3200  KMP_FATAL(LockIsUninitialized, func);
3201  }
3202  return lck;
3203  } else {
3204  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3205  return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3206  } else {
3207  return *((kmp_indirect_lock_t **)user_lock);
3208  }
3209  }
3210 }
3211 
3212 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3213  kmp_dyna_lockseq_t seq) {
3214 #if KMP_USE_ADAPTIVE_LOCKS
3215  if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3216  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3217  seq = lockseq_queuing;
3218  }
3219 #endif
3220 #if KMP_USE_TSX
3221  if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3222  seq = lockseq_queuing;
3223  }
3224 #endif
3225  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3226  kmp_indirect_lock_t *l =
3227  __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3228  KMP_I_LOCK_FUNC(l, init)(l->lock);
3229  KA_TRACE(
3230  20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3231  seq));
3232 }
3233 
3234 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3235  kmp_uint32 gtid = __kmp_entry_gtid();
3236  kmp_indirect_lock_t *l =
3237  __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3238  if (l == nullptr)
3239  return; // avoid segv if lock already destroyed
3240  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3241  kmp_indirect_locktag_t tag = l->type;
3242 
3243  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3244 
3245  // Use the base lock's space to keep the pool chain.
3246  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3247  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3248  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3249  }
3250  __kmp_indirect_lock_pool[tag] = l;
3251 
3252  __kmp_release_lock(&__kmp_global_lock, gtid);
3253 }
3254 
3255 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3256  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3257  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3258 }
3259 
3260 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3263 }
3264 
3265 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3268 }
3269 
3270 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3271  kmp_int32 gtid) {
3272  kmp_indirect_lock_t *l =
3273  __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3274  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3275 }
3276 
3277 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3278  kmp_int32 gtid) {
3279  kmp_indirect_lock_t *l =
3280  __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3281  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3282 }
3283 
3284 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3285  kmp_int32 gtid) {
3286  kmp_indirect_lock_t *l =
3287  __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3288  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3289 }
3290 
3291 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3292 
3293 // This is used only in kmp_error.cpp when consistency checking is on.
3294 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3295  switch (seq) {
3296  case lockseq_tas:
3297  case lockseq_nested_tas:
3298  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3299 #if KMP_USE_FUTEX
3300  case lockseq_futex:
3301  case lockseq_nested_futex:
3302  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3303 #endif
3304  case lockseq_ticket:
3305  case lockseq_nested_ticket:
3306  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3307  case lockseq_queuing:
3308  case lockseq_nested_queuing:
3309 #if KMP_USE_ADAPTIVE_LOCKS
3310  case lockseq_adaptive:
3311 #endif
3312  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3313  case lockseq_drdpa:
3314  case lockseq_nested_drdpa:
3315  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3316  default:
3317  return 0;
3318  }
3319 }
3320 
3321 // Initializes data for dynamic user locks.
3322 void __kmp_init_dynamic_user_locks() {
3323  // Initialize jump table for the lock functions
3324  if (__kmp_env_consistency_check) {
3325  __kmp_direct_set = direct_set_check;
3326  __kmp_direct_unset = direct_unset_check;
3327  __kmp_direct_test = direct_test_check;
3328  __kmp_direct_destroy = direct_destroy_check;
3329  __kmp_indirect_set = indirect_set_check;
3330  __kmp_indirect_unset = indirect_unset_check;
3331  __kmp_indirect_test = indirect_test_check;
3332  __kmp_indirect_destroy = indirect_destroy_check;
3333  } else {
3334  __kmp_direct_set = direct_set;
3335  __kmp_direct_unset = direct_unset;
3336  __kmp_direct_test = direct_test;
3337  __kmp_direct_destroy = direct_destroy;
3338  __kmp_indirect_set = indirect_set;
3339  __kmp_indirect_unset = indirect_unset;
3340  __kmp_indirect_test = indirect_test;
3341  __kmp_indirect_destroy = indirect_destroy;
3342  }
3343  // If the user locks have already been initialized, then return. Allow the
3344  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3345  // new lock tables if they have already been allocated.
3346  if (__kmp_init_user_locks)
3347  return;
3348 
3349  // Initialize lock index table
3350  __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3351  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3352  sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3353  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3354  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3355  __kmp_i_lock_table.next = 0;
3356  __kmp_i_lock_table.next_table = nullptr;
3357 
3358  // Indirect lock size
3359  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3360  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3361 #if KMP_USE_ADAPTIVE_LOCKS
3362  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3363 #endif
3364  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3365 #if KMP_USE_TSX
3366  __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3367 #endif
3368  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3369 #if KMP_USE_FUTEX
3370  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3371 #endif
3372  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3373  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3374  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3375 
3376 // Initialize lock accessor/modifier
3377 #define fill_jumps(table, expand, sep) \
3378  { \
3379  table[locktag##sep##ticket] = expand(ticket); \
3380  table[locktag##sep##queuing] = expand(queuing); \
3381  table[locktag##sep##drdpa] = expand(drdpa); \
3382  }
3383 
3384 #if KMP_USE_ADAPTIVE_LOCKS
3385 #define fill_table(table, expand) \
3386  { \
3387  fill_jumps(table, expand, _); \
3388  table[locktag_adaptive] = expand(queuing); \
3389  fill_jumps(table, expand, _nested_); \
3390  }
3391 #else
3392 #define fill_table(table, expand) \
3393  { \
3394  fill_jumps(table, expand, _); \
3395  fill_jumps(table, expand, _nested_); \
3396  }
3397 #endif // KMP_USE_ADAPTIVE_LOCKS
3398 
3399 #define expand(l) \
3400  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3401  fill_table(__kmp_indirect_set_location, expand);
3402 #undef expand
3403 #define expand(l) \
3404  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3405  fill_table(__kmp_indirect_set_flags, expand);
3406 #undef expand
3407 #define expand(l) \
3408  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3409  fill_table(__kmp_indirect_get_location, expand);
3410 #undef expand
3411 #define expand(l) \
3412  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3413  fill_table(__kmp_indirect_get_flags, expand);
3414 #undef expand
3415 
3416  __kmp_init_user_locks = TRUE;
3417 }
3418 
3419 // Clean up the lock table.
3420 void __kmp_cleanup_indirect_user_locks() {
3421  int k;
3422 
3423  // Clean up locks in the pools first (they were already destroyed before going
3424  // into the pools).
3425  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3426  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3427  while (l != NULL) {
3428  kmp_indirect_lock_t *ll = l;
3429  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3430  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3431  ll));
3432  __kmp_free(ll->lock);
3433  ll->lock = NULL;
3434  }
3435  __kmp_indirect_lock_pool[k] = NULL;
3436  }
3437  // Clean up the remaining undestroyed locks.
3438  kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3439  while (ptr) {
3440  for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3441  if (!ptr->table[row])
3442  continue;
3443  for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3444  kmp_indirect_lock_t *l = &ptr->table[row][col];
3445  if (l->lock) {
3446  // Locks not destroyed explicitly need to be destroyed here.
3447  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3448  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3449  "from table\n",
3450  l));
3451  __kmp_free(l->lock);
3452  }
3453  }
3454  __kmp_free(ptr->table[row]);
3455  }
3456  __kmp_free(ptr->table);
3457  kmp_indirect_lock_table_t *next_table = ptr->next_table;
3458  if (ptr != &__kmp_i_lock_table)
3459  __kmp_free(ptr);
3460  ptr = next_table;
3461  }
3462 
3463  __kmp_init_user_locks = FALSE;
3464 }
3465 
3466 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3467 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3468 
3469 #else // KMP_USE_DYNAMIC_LOCK
3470 
3471 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3472  __kmp_init_tas_lock(lck);
3473 }
3474 
3475 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3476  __kmp_init_nested_tas_lock(lck);
3477 }
3478 
3479 #if KMP_USE_FUTEX
3480 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3481  __kmp_init_futex_lock(lck);
3482 }
3483 
3484 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485  __kmp_init_nested_futex_lock(lck);
3486 }
3487 #endif
3488 
3489 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3490  return lck == lck->lk.self;
3491 }
3492 
3493 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3494  __kmp_init_ticket_lock(lck);
3495 }
3496 
3497 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3498  __kmp_init_nested_ticket_lock(lck);
3499 }
3500 
3501 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3502  return lck == lck->lk.initialized;
3503 }
3504 
3505 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3506  __kmp_init_queuing_lock(lck);
3507 }
3508 
3509 static void
3510 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3511  __kmp_init_nested_queuing_lock(lck);
3512 }
3513 
3514 #if KMP_USE_ADAPTIVE_LOCKS
3515 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3516  __kmp_init_adaptive_lock(lck);
3517 }
3518 #endif
3519 
3520 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3521  return lck == lck->lk.initialized;
3522 }
3523 
3524 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3525  __kmp_init_drdpa_lock(lck);
3526 }
3527 
3528 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3529  __kmp_init_nested_drdpa_lock(lck);
3530 }
3531 
3532 /* user locks
3533  * They are implemented as a table of function pointers which are set to the
3534  * lock functions of the appropriate kind, once that has been determined. */
3535 
3536 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3537 
3538 size_t __kmp_base_user_lock_size = 0;
3539 size_t __kmp_user_lock_size = 0;
3540 
3541 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3542 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3543  kmp_int32 gtid) = NULL;
3544 
3545 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3546  kmp_int32 gtid) = NULL;
3547 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3548  kmp_int32 gtid) = NULL;
3549 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3550 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3551 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3552 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3553  kmp_int32 gtid) = NULL;
3554 
3555 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3556  kmp_int32 gtid) = NULL;
3557 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3558  kmp_int32 gtid) = NULL;
3559 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3560 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3561 
3562 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3563 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3564 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3565  const ident_t *loc) = NULL;
3566 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3567 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3568  kmp_lock_flags_t flags) = NULL;
3569 
3570 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3571  switch (user_lock_kind) {
3572  case lk_default:
3573  default:
3574  KMP_ASSERT(0);
3575 
3576  case lk_tas: {
3577  __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3578  __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3579 
3580  __kmp_get_user_lock_owner_ =
3581  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3582 
3583  if (__kmp_env_consistency_check) {
3584  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3585  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3586  } else {
3587  KMP_BIND_USER_LOCK(tas);
3588  KMP_BIND_NESTED_USER_LOCK(tas);
3589  }
3590 
3591  __kmp_destroy_user_lock_ =
3592  (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3593 
3594  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3595 
3596  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3597 
3598  __kmp_set_user_lock_location_ =
3599  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600 
3601  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3602 
3603  __kmp_set_user_lock_flags_ =
3604  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3605  } break;
3606 
3607 #if KMP_USE_FUTEX
3608 
3609  case lk_futex: {
3610  __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3611  __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3612 
3613  __kmp_get_user_lock_owner_ =
3614  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3615 
3616  if (__kmp_env_consistency_check) {
3617  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3618  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3619  } else {
3620  KMP_BIND_USER_LOCK(futex);
3621  KMP_BIND_NESTED_USER_LOCK(futex);
3622  }
3623 
3624  __kmp_destroy_user_lock_ =
3625  (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3626 
3627  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3628 
3629  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3630 
3631  __kmp_set_user_lock_location_ =
3632  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3633 
3634  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3635 
3636  __kmp_set_user_lock_flags_ =
3637  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3638  } break;
3639 
3640 #endif // KMP_USE_FUTEX
3641 
3642  case lk_ticket: {
3643  __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3644  __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3645 
3646  __kmp_get_user_lock_owner_ =
3647  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3648 
3649  if (__kmp_env_consistency_check) {
3650  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3651  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3652  } else {
3653  KMP_BIND_USER_LOCK(ticket);
3654  KMP_BIND_NESTED_USER_LOCK(ticket);
3655  }
3656 
3657  __kmp_destroy_user_lock_ =
3658  (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3659 
3660  __kmp_is_user_lock_initialized_ =
3661  (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3662 
3663  __kmp_get_user_lock_location_ =
3664  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3665 
3666  __kmp_set_user_lock_location_ = (void (*)(
3667  kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3668 
3669  __kmp_get_user_lock_flags_ =
3670  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3671 
3672  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3673  &__kmp_set_ticket_lock_flags);
3674  } break;
3675 
3676  case lk_queuing: {
3677  __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3678  __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3679 
3680  __kmp_get_user_lock_owner_ =
3681  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3682 
3683  if (__kmp_env_consistency_check) {
3684  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3685  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3686  } else {
3687  KMP_BIND_USER_LOCK(queuing);
3688  KMP_BIND_NESTED_USER_LOCK(queuing);
3689  }
3690 
3691  __kmp_destroy_user_lock_ =
3692  (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3693 
3694  __kmp_is_user_lock_initialized_ =
3695  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3696 
3697  __kmp_get_user_lock_location_ =
3698  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3699 
3700  __kmp_set_user_lock_location_ = (void (*)(
3701  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3702 
3703  __kmp_get_user_lock_flags_ =
3704  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3705 
3706  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3707  &__kmp_set_queuing_lock_flags);
3708  } break;
3709 
3710 #if KMP_USE_ADAPTIVE_LOCKS
3711  case lk_adaptive: {
3712  __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3713  __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3714 
3715  __kmp_get_user_lock_owner_ =
3716  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3717 
3718  if (__kmp_env_consistency_check) {
3719  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3720  } else {
3721  KMP_BIND_USER_LOCK(adaptive);
3722  }
3723 
3724  __kmp_destroy_user_lock_ =
3725  (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3726 
3727  __kmp_is_user_lock_initialized_ =
3728  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3729 
3730  __kmp_get_user_lock_location_ =
3731  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3732 
3733  __kmp_set_user_lock_location_ = (void (*)(
3734  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3735 
3736  __kmp_get_user_lock_flags_ =
3737  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3738 
3739  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3740  &__kmp_set_queuing_lock_flags);
3741 
3742  } break;
3743 #endif // KMP_USE_ADAPTIVE_LOCKS
3744 
3745  case lk_drdpa: {
3746  __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3747  __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3748 
3749  __kmp_get_user_lock_owner_ =
3750  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3751 
3752  if (__kmp_env_consistency_check) {
3753  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3754  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3755  } else {
3756  KMP_BIND_USER_LOCK(drdpa);
3757  KMP_BIND_NESTED_USER_LOCK(drdpa);
3758  }
3759 
3760  __kmp_destroy_user_lock_ =
3761  (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3762 
3763  __kmp_is_user_lock_initialized_ =
3764  (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3765 
3766  __kmp_get_user_lock_location_ =
3767  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3768 
3769  __kmp_set_user_lock_location_ = (void (*)(
3770  kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3771 
3772  __kmp_get_user_lock_flags_ =
3773  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3774 
3775  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3776  &__kmp_set_drdpa_lock_flags);
3777  } break;
3778  }
3779 }
3780 
3781 // ----------------------------------------------------------------------------
3782 // User lock table & lock allocation
3783 
3784 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3785 kmp_user_lock_p __kmp_lock_pool = NULL;
3786 
3787 // Lock block-allocation support.
3788 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3789 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3790 
3791 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3792  // Assume that kmp_global_lock is held upon entry/exit.
3793  kmp_lock_index_t index;
3794  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3795  kmp_lock_index_t size;
3796  kmp_user_lock_p *table;
3797  // Reallocate lock table.
3798  if (__kmp_user_lock_table.allocated == 0) {
3799  size = 1024;
3800  } else {
3801  size = __kmp_user_lock_table.allocated * 2;
3802  }
3803  table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3804  KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3805  sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3806  table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3807  // We cannot free the previous table now, since it may be in use by other
3808  // threads. So save the pointer to the previous table in the first
3809  // element of the new table. All the tables will be organized into a list,
3810  // and could be freed when library shutting down.
3811  __kmp_user_lock_table.table = table;
3812  __kmp_user_lock_table.allocated = size;
3813  }
3814  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3815  __kmp_user_lock_table.allocated);
3816  index = __kmp_user_lock_table.used;
3817  __kmp_user_lock_table.table[index] = lck;
3818  ++__kmp_user_lock_table.used;
3819  return index;
3820 }
3821 
3822 static kmp_user_lock_p __kmp_lock_block_allocate() {
3823  // Assume that kmp_global_lock is held upon entry/exit.
3824  static int last_index = 0;
3825  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3826  // Restart the index.
3827  last_index = 0;
3828  // Need to allocate a new block.
3829  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3830  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3831  char *buffer =
3832  (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3833  // Set up the new block.
3834  kmp_block_of_locks *new_block =
3835  (kmp_block_of_locks *)(&buffer[space_for_locks]);
3836  new_block->next_block = __kmp_lock_blocks;
3837  new_block->locks = (void *)buffer;
3838  // Publish the new block.
3839  KMP_MB();
3840  __kmp_lock_blocks = new_block;
3841  }
3842  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3843  ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3844  last_index++;
3845  return ret;
3846 }
3847 
3848 // Get memory for a lock. It may be freshly allocated memory or reused memory
3849 // from lock pool.
3850 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3851  kmp_lock_flags_t flags) {
3852  kmp_user_lock_p lck;
3853  kmp_lock_index_t index;
3854  KMP_DEBUG_ASSERT(user_lock);
3855 
3856  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3857 
3858  if (__kmp_lock_pool == NULL) {
3859  // Lock pool is empty. Allocate new memory.
3860 
3861  if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3862  lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3863  } else {
3864  lck = __kmp_lock_block_allocate();
3865  }
3866 
3867  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3868  // and debugger has info on all allocated locks.
3869  index = __kmp_lock_table_insert(lck);
3870  } else {
3871  // Pick up lock from pool.
3872  lck = __kmp_lock_pool;
3873  index = __kmp_lock_pool->pool.index;
3874  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3875  }
3876 
3877  // We could potentially differentiate between nested and regular locks
3878  // here, and do the lock table lookup for regular locks only.
3879  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3880  *((kmp_lock_index_t *)user_lock) = index;
3881  } else {
3882  *((kmp_user_lock_p *)user_lock) = lck;
3883  }
3884 
3885  // mark the lock if it is critical section lock.
3886  __kmp_set_user_lock_flags(lck, flags);
3887 
3888  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3889 
3890  return lck;
3891 }
3892 
3893 // Put lock's memory to pool for reusing.
3894 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3895  kmp_user_lock_p lck) {
3896  KMP_DEBUG_ASSERT(user_lock != NULL);
3897  KMP_DEBUG_ASSERT(lck != NULL);
3898 
3899  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3900 
3901  lck->pool.next = __kmp_lock_pool;
3902  __kmp_lock_pool = lck;
3903  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3904  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3905  KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3906  lck->pool.index = index;
3907  }
3908 
3909  __kmp_release_lock(&__kmp_global_lock, gtid);
3910 }
3911 
3912 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3913  kmp_user_lock_p lck = NULL;
3914 
3915  if (__kmp_env_consistency_check) {
3916  if (user_lock == NULL) {
3917  KMP_FATAL(LockIsUninitialized, func);
3918  }
3919  }
3920 
3921  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3922  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3923  if (__kmp_env_consistency_check) {
3924  if (!(0 < index && index < __kmp_user_lock_table.used)) {
3925  KMP_FATAL(LockIsUninitialized, func);
3926  }
3927  }
3928  KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3929  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3930  lck = __kmp_user_lock_table.table[index];
3931  } else {
3932  lck = *((kmp_user_lock_p *)user_lock);
3933  }
3934 
3935  if (__kmp_env_consistency_check) {
3936  if (lck == NULL) {
3937  KMP_FATAL(LockIsUninitialized, func);
3938  }
3939  }
3940 
3941  return lck;
3942 }
3943 
3944 void __kmp_cleanup_user_locks(void) {
3945  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3946  // iterating through lock table (it includes all the locks, dead or alive).
3947  __kmp_lock_pool = NULL;
3948 
3949 #define IS_CRITICAL(lck) \
3950  ((__kmp_get_user_lock_flags_ != NULL) && \
3951  ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3952 
3953  // Loop through lock table, free all locks.
3954  // Do not free item [0], it is reserved for lock tables list.
3955  //
3956  // FIXME - we are iterating through a list of (pointers to) objects of type
3957  // union kmp_user_lock, but we have no way of knowing whether the base type is
3958  // currently "pool" or whatever the global user lock type is.
3959  //
3960  // We are relying on the fact that for all of the user lock types
3961  // (except "tas"), the first field in the lock struct is the "initialized"
3962  // field, which is set to the address of the lock object itself when
3963  // the lock is initialized. When the union is of type "pool", the
3964  // first field is a pointer to the next object in the free list, which
3965  // will not be the same address as the object itself.
3966  //
3967  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3968  // for "pool" objects on the free list. This must happen as the "location"
3969  // field of real user locks overlaps the "index" field of "pool" objects.
3970  //
3971  // It would be better to run through the free list, and remove all "pool"
3972  // objects from the lock table before executing this loop. However,
3973  // "pool" objects do not always have their index field set (only on
3974  // lin_32e), and I don't want to search the lock table for the address
3975  // of every "pool" object on the free list.
3976  while (__kmp_user_lock_table.used > 1) {
3977  const ident *loc;
3978 
3979  // reduce __kmp_user_lock_table.used before freeing the lock,
3980  // so that state of locks is consistent
3981  kmp_user_lock_p lck =
3982  __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3983 
3984  if ((__kmp_is_user_lock_initialized_ != NULL) &&
3985  (*__kmp_is_user_lock_initialized_)(lck)) {
3986  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3987  // it is NOT a critical section (user is not responsible for destroying
3988  // criticals) AND we know source location to report.
3989  if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3990  ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3991  (loc->psource != NULL)) {
3992  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3993  KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3994  __kmp_str_loc_free(&str_loc);
3995  }
3996 
3997 #ifdef KMP_DEBUG
3998  if (IS_CRITICAL(lck)) {
3999  KA_TRACE(
4000  20,
4001  ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4002  lck, *(void **)lck));
4003  } else {
4004  KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4005  *(void **)lck));
4006  }
4007 #endif // KMP_DEBUG
4008 
4009  // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4010  __kmp_destroy_user_lock(lck);
4011  }
4012 
4013  // Free the lock if block allocation of locks is not used.
4014  if (__kmp_lock_blocks == NULL) {
4015  __kmp_free(lck);
4016  }
4017  }
4018 
4019 #undef IS_CRITICAL
4020 
4021  // delete lock table(s).
4022  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4023  __kmp_user_lock_table.table = NULL;
4024  __kmp_user_lock_table.allocated = 0;
4025 
4026  while (table_ptr != NULL) {
4027  // In the first element we saved the pointer to the previous
4028  // (smaller) lock table.
4029  kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4030  __kmp_free(table_ptr);
4031  table_ptr = next;
4032  }
4033 
4034  // Free buffers allocated for blocks of locks.
4035  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4036  __kmp_lock_blocks = NULL;
4037 
4038  while (block_ptr != NULL) {
4039  kmp_block_of_locks_t *next = block_ptr->next_block;
4040  __kmp_free(block_ptr->locks);
4041  // *block_ptr itself was allocated at the end of the locks vector.
4042  block_ptr = next;
4043  }
4044 
4045  TCW_4(__kmp_init_user_locks, FALSE);
4046 }
4047 
4048 #endif // KMP_USE_DYNAMIC_LOCK
void set_stdout()
Definition: kmp.h:4759
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition: kmp.h:4733
Definition: kmp.h:227
char const * psource
Definition: kmp.h:237