LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_io.h"
15 #include "kmp_wrapper_malloc.h"
16 
17 #if KMP_USE_HWLOC
18 #if HWLOC_API_VERSION > 0x00020300
19 #define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20 #elif HWLOC_API_VERSION == 0x00020300
21 #define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22  hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
23 #else
24 enum hwloc_memattr_id_e {
25  HWLOC_MEMATTR_ID_BANDWIDTH,
26  HWLOC_MEMATTR_ID_CAPACITY
27 };
28 #endif
29 #endif // KMP_USE_HWLOC
30 
31 // Disable bget when it is not used
32 #if KMP_USE_BGET
33 
34 /* Thread private buffer management code */
35 
36 typedef int (*bget_compact_t)(size_t, int);
37 typedef void *(*bget_acquire_t)(size_t);
38 typedef void (*bget_release_t)(void *);
39 
40 /* NOTE: bufsize must be a signed datatype */
41 
42 #if KMP_OS_WINDOWS
43 #if KMP_ARCH_X86 || KMP_ARCH_ARM
44 typedef kmp_int32 bufsize;
45 #else
46 typedef kmp_int64 bufsize;
47 #endif
48 #else
49 typedef ssize_t bufsize;
50 #endif // KMP_OS_WINDOWS
51 
52 /* The three modes of operation are, fifo search, lifo search, and best-fit */
53 
54 typedef enum bget_mode {
55  bget_mode_fifo = 0,
56  bget_mode_lifo = 1,
57  bget_mode_best = 2
58 } bget_mode_t;
59 
60 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
61 static void *bget(kmp_info_t *th, bufsize size);
62 static void *bgetz(kmp_info_t *th, bufsize size);
63 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
64 static void brel(kmp_info_t *th, void *buf);
65 static void bectl(kmp_info_t *th, bget_compact_t compact,
66  bget_acquire_t acquire, bget_release_t release,
67  bufsize pool_incr);
68 
69 /* BGET CONFIGURATION */
70 /* Buffer allocation size quantum: all buffers allocated are a
71  multiple of this size. This MUST be a power of two. */
72 
73 /* On IA-32 architecture with Linux* OS, malloc() does not
74  ensure 16 byte alignment */
75 
76 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
77 
78 #define SizeQuant 8
79 #define AlignType double
80 
81 #else
82 
83 #define SizeQuant 16
84 #define AlignType _Quad
85 
86 #endif
87 
88 // Define this symbol to enable the bstats() function which calculates the
89 // total free space in the buffer pool, the largest available buffer, and the
90 // total space currently allocated.
91 #define BufStats 1
92 
93 #ifdef KMP_DEBUG
94 
95 // Define this symbol to enable the bpoold() function which dumps the buffers
96 // in a buffer pool.
97 #define BufDump 1
98 
99 // Define this symbol to enable the bpoolv() function for validating a buffer
100 // pool.
101 #define BufValid 1
102 
103 // Define this symbol to enable the bufdump() function which allows dumping the
104 // contents of an allocated or free buffer.
105 #define DumpData 1
106 
107 #ifdef NOT_USED_NOW
108 
109 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
110 // who attempt to use pointers into released buffers.
111 #define FreeWipe 1
112 
113 // Use a best fit algorithm when searching for space for an allocation request.
114 // This uses memory more efficiently, but allocation will be much slower.
115 #define BestFit 1
116 
117 #endif /* NOT_USED_NOW */
118 #endif /* KMP_DEBUG */
119 
120 static bufsize bget_bin_size[] = {
121  0,
122  // 1 << 6, /* .5 Cache line */
123  1 << 7, /* 1 Cache line, new */
124  1 << 8, /* 2 Cache lines */
125  1 << 9, /* 4 Cache lines, new */
126  1 << 10, /* 8 Cache lines */
127  1 << 11, /* 16 Cache lines, new */
128  1 << 12, 1 << 13, /* new */
129  1 << 14, 1 << 15, /* new */
130  1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
131  1 << 21, /* 2MB */
132  1 << 22, /* 4MB */
133  1 << 23, /* 8MB */
134  1 << 24, /* 16MB */
135  1 << 25, /* 32MB */
136 };
137 
138 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
139 
140 struct bfhead;
141 
142 // Declare the interface, including the requested buffer size type, bufsize.
143 
144 /* Queue links */
145 typedef struct qlinks {
146  struct bfhead *flink; /* Forward link */
147  struct bfhead *blink; /* Backward link */
148 } qlinks_t;
149 
150 /* Header in allocated and free buffers */
151 typedef struct bhead2 {
152  kmp_info_t *bthr; /* The thread which owns the buffer pool */
153  bufsize prevfree; /* Relative link back to previous free buffer in memory or
154  0 if previous buffer is allocated. */
155  bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
156 } bhead2_t;
157 
158 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
159 typedef union bhead {
160  KMP_ALIGN(SizeQuant)
161  AlignType b_align;
162  char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
163  bhead2_t bb;
164 } bhead_t;
165 #define BH(p) ((bhead_t *)(p))
166 
167 /* Header in directly allocated buffers (by acqfcn) */
168 typedef struct bdhead {
169  bufsize tsize; /* Total size, including overhead */
170  bhead_t bh; /* Common header */
171 } bdhead_t;
172 #define BDH(p) ((bdhead_t *)(p))
173 
174 /* Header in free buffers */
175 typedef struct bfhead {
176  bhead_t bh; /* Common allocated/free header */
177  qlinks_t ql; /* Links on free list */
178 } bfhead_t;
179 #define BFH(p) ((bfhead_t *)(p))
180 
181 typedef struct thr_data {
182  bfhead_t freelist[MAX_BGET_BINS];
183 #if BufStats
184  size_t totalloc; /* Total space currently allocated */
185  long numget, numrel; /* Number of bget() and brel() calls */
186  long numpblk; /* Number of pool blocks */
187  long numpget, numprel; /* Number of block gets and rels */
188  long numdget, numdrel; /* Number of direct gets and rels */
189 #endif /* BufStats */
190 
191  /* Automatic expansion block management functions */
192  bget_compact_t compfcn;
193  bget_acquire_t acqfcn;
194  bget_release_t relfcn;
195 
196  bget_mode_t mode; /* what allocation mode to use? */
197 
198  bufsize exp_incr; /* Expansion block size */
199  bufsize pool_len; /* 0: no bpool calls have been made
200  -1: not all pool blocks are the same size
201  >0: (common) block size for all bpool calls made so far
202  */
203  bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
204 } thr_data_t;
205 
206 /* Minimum allocation quantum: */
207 #define QLSize (sizeof(qlinks_t))
208 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
209 #define MaxSize \
210  (bufsize)( \
211  ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
212 // Maximum for the requested size.
213 
214 /* End sentinel: value placed in bsize field of dummy block delimiting
215  end of pool block. The most negative number which will fit in a
216  bufsize, defined in a way that the compiler will accept. */
217 
218 #define ESent \
219  ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
220 
221 /* Thread Data management routines */
222 static int bget_get_bin(bufsize size) {
223  // binary chop bins
224  int lo = 0, hi = MAX_BGET_BINS - 1;
225 
226  KMP_DEBUG_ASSERT(size > 0);
227 
228  while ((hi - lo) > 1) {
229  int mid = (lo + hi) >> 1;
230  if (size < bget_bin_size[mid])
231  hi = mid - 1;
232  else
233  lo = mid;
234  }
235 
236  KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
237 
238  return lo;
239 }
240 
241 static void set_thr_data(kmp_info_t *th) {
242  int i;
243  thr_data_t *data;
244 
245  data = (thr_data_t *)((!th->th.th_local.bget_data)
246  ? __kmp_allocate(sizeof(*data))
247  : th->th.th_local.bget_data);
248 
249  memset(data, '\0', sizeof(*data));
250 
251  for (i = 0; i < MAX_BGET_BINS; ++i) {
252  data->freelist[i].ql.flink = &data->freelist[i];
253  data->freelist[i].ql.blink = &data->freelist[i];
254  }
255 
256  th->th.th_local.bget_data = data;
257  th->th.th_local.bget_list = 0;
258 #if !USE_CMP_XCHG_FOR_BGET
259 #ifdef USE_QUEUING_LOCK_FOR_BGET
260  __kmp_init_lock(&th->th.th_local.bget_lock);
261 #else
262  __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
263 #endif /* USE_LOCK_FOR_BGET */
264 #endif /* ! USE_CMP_XCHG_FOR_BGET */
265 }
266 
267 static thr_data_t *get_thr_data(kmp_info_t *th) {
268  thr_data_t *data;
269 
270  data = (thr_data_t *)th->th.th_local.bget_data;
271 
272  KMP_DEBUG_ASSERT(data != 0);
273 
274  return data;
275 }
276 
277 /* Walk the free list and release the enqueued buffers */
278 static void __kmp_bget_dequeue(kmp_info_t *th) {
279  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
280 
281  if (p != 0) {
282 #if USE_CMP_XCHG_FOR_BGET
283  {
284  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286  CCAST(void *, old_value), nullptr)) {
287  KMP_CPU_PAUSE();
288  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
289  }
290  p = CCAST(void *, old_value);
291  }
292 #else /* ! USE_CMP_XCHG_FOR_BGET */
293 #ifdef USE_QUEUING_LOCK_FOR_BGET
294  __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
295 #else
296  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
297 #endif /* USE_QUEUING_LOCK_FOR_BGET */
298 
299  p = (void *)th->th.th_local.bget_list;
300  th->th.th_local.bget_list = 0;
301 
302 #ifdef USE_QUEUING_LOCK_FOR_BGET
303  __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
304 #else
305  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
306 #endif
307 #endif /* USE_CMP_XCHG_FOR_BGET */
308 
309  /* Check again to make sure the list is not empty */
310  while (p != 0) {
311  void *buf = p;
312  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
313 
314  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
316  (kmp_uintptr_t)th); // clear possible mark
317  KMP_DEBUG_ASSERT(b->ql.blink == 0);
318 
319  p = (void *)b->ql.flink;
320 
321  brel(th, buf);
322  }
323  }
324 }
325 
326 /* Chain together the free buffers by using the thread owner field */
327 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
328 #ifdef USE_QUEUING_LOCK_FOR_BGET
329  ,
330  kmp_int32 rel_gtid
331 #endif
332 ) {
333  bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
334 
335  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
337  (kmp_uintptr_t)th); // clear possible mark
338 
339  b->ql.blink = 0;
340 
341  KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
342  __kmp_gtid_from_thread(th)));
343 
344 #if USE_CMP_XCHG_FOR_BGET
345  {
346  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
347  /* the next pointer must be set before setting bget_list to buf to avoid
348  exposing a broken list to other threads, even for an instant. */
349  b->ql.flink = BFH(CCAST(void *, old_value));
350 
351  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352  CCAST(void *, old_value), buf)) {
353  KMP_CPU_PAUSE();
354  old_value = TCR_PTR(th->th.th_local.bget_list);
355  /* the next pointer must be set before setting bget_list to buf to avoid
356  exposing a broken list to other threads, even for an instant. */
357  b->ql.flink = BFH(CCAST(void *, old_value));
358  }
359  }
360 #else /* ! USE_CMP_XCHG_FOR_BGET */
361 #ifdef USE_QUEUING_LOCK_FOR_BGET
362  __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
363 #else
364  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
365 #endif
366 
367  b->ql.flink = BFH(th->th.th_local.bget_list);
368  th->th.th_local.bget_list = (void *)buf;
369 
370 #ifdef USE_QUEUING_LOCK_FOR_BGET
371  __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
372 #else
373  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
374 #endif
375 #endif /* USE_CMP_XCHG_FOR_BGET */
376 }
377 
378 /* insert buffer back onto a new freelist */
379 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
380  int bin;
381 
382  KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
383  KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
384 
385  bin = bget_get_bin(b->bh.bb.bsize);
386 
387  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388  &thr->freelist[bin]);
389  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390  &thr->freelist[bin]);
391 
392  b->ql.flink = &thr->freelist[bin];
393  b->ql.blink = thr->freelist[bin].ql.blink;
394 
395  thr->freelist[bin].ql.blink = b;
396  b->ql.blink->ql.flink = b;
397 }
398 
399 /* unlink the buffer from the old freelist */
400 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
403 
404  b->ql.blink->ql.flink = b->ql.flink;
405  b->ql.flink->ql.blink = b->ql.blink;
406 }
407 
408 /* GET STATS -- check info on free list */
409 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410  thr_data_t *thr = get_thr_data(th);
411  int bin;
412 
413  *total_free = *max_free = 0;
414 
415  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
416  bfhead_t *b, *best;
417 
418  best = &thr->freelist[bin];
419  b = best->ql.flink;
420 
421  while (b != &thr->freelist[bin]) {
422  *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
423  if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
424  best = b;
425 
426  /* Link to next buffer */
427  b = b->ql.flink;
428  }
429 
430  if (*max_free < best->bh.bb.bsize)
431  *max_free = best->bh.bb.bsize;
432  }
433 
434  if (*max_free > (bufsize)sizeof(bhead_t))
435  *max_free -= sizeof(bhead_t);
436 }
437 
438 /* BGET -- Allocate a buffer. */
439 static void *bget(kmp_info_t *th, bufsize requested_size) {
440  thr_data_t *thr = get_thr_data(th);
441  bufsize size = requested_size;
442  bfhead_t *b;
443  void *buf;
444  int compactseq = 0;
445  int use_blink = 0;
446  /* For BestFit */
447  bfhead_t *best;
448 
449  if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
450  return NULL;
451  }
452 
453  __kmp_bget_dequeue(th); /* Release any queued buffers */
454 
455  if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
456  size = SizeQ;
457  }
458 #if defined(SizeQuant) && (SizeQuant > 1)
459  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
460 #endif
461 
462  size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
463  KMP_DEBUG_ASSERT(size >= 0);
464  KMP_DEBUG_ASSERT(size % SizeQuant == 0);
465 
466  use_blink = (thr->mode == bget_mode_lifo);
467 
468  /* If a compact function was provided in the call to bectl(), wrap
469  a loop around the allocation process to allow compaction to
470  intervene in case we don't find a suitable buffer in the chain. */
471 
472  for (;;) {
473  int bin;
474 
475  for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
476  /* Link to next buffer */
477  b = (use_blink ? thr->freelist[bin].ql.blink
478  : thr->freelist[bin].ql.flink);
479 
480  if (thr->mode == bget_mode_best) {
481  best = &thr->freelist[bin];
482 
483  /* Scan the free list searching for the first buffer big enough
484  to hold the requested size buffer. */
485  while (b != &thr->freelist[bin]) {
486  if (b->bh.bb.bsize >= (bufsize)size) {
487  if ((best == &thr->freelist[bin]) ||
488  (b->bh.bb.bsize < best->bh.bb.bsize)) {
489  best = b;
490  }
491  }
492 
493  /* Link to next buffer */
494  b = (use_blink ? b->ql.blink : b->ql.flink);
495  }
496  b = best;
497  }
498 
499  while (b != &thr->freelist[bin]) {
500  if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
501 
502  // Buffer is big enough to satisfy the request. Allocate it to the
503  // caller. We must decide whether the buffer is large enough to split
504  // into the part given to the caller and a free buffer that remains
505  // on the free list, or whether the entire buffer should be removed
506  // from the free list and given to the caller in its entirety. We
507  // only split the buffer if enough room remains for a header plus the
508  // minimum quantum of allocation.
509  if ((b->bh.bb.bsize - (bufsize)size) >
510  (bufsize)(SizeQ + (sizeof(bhead_t)))) {
511  bhead_t *ba, *bn;
512 
513  ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
514  bn = BH(((char *)ba) + size);
515 
516  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
517 
518  /* Subtract size from length of free block. */
519  b->bh.bb.bsize -= (bufsize)size;
520 
521  /* Link allocated buffer to the previous free buffer. */
522  ba->bb.prevfree = b->bh.bb.bsize;
523 
524  /* Plug negative size into user buffer. */
525  ba->bb.bsize = -size;
526 
527  /* Mark this buffer as owned by this thread. */
528  TCW_PTR(ba->bb.bthr,
529  th); // not an allocated address (do not mark it)
530  /* Mark buffer after this one not preceded by free block. */
531  bn->bb.prevfree = 0;
532 
533  // unlink buffer from old freelist, and reinsert into new freelist
534  __kmp_bget_remove_from_freelist(b);
535  __kmp_bget_insert_into_freelist(thr, b);
536 #if BufStats
537  thr->totalloc += (size_t)size;
538  thr->numget++; /* Increment number of bget() calls */
539 #endif
540  buf = (void *)((((char *)ba) + sizeof(bhead_t)));
541  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
542  return buf;
543  } else {
544  bhead_t *ba;
545 
546  ba = BH(((char *)b) + b->bh.bb.bsize);
547 
548  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
549 
550  /* The buffer isn't big enough to split. Give the whole
551  shebang to the caller and remove it from the free list. */
552 
553  __kmp_bget_remove_from_freelist(b);
554 #if BufStats
555  thr->totalloc += (size_t)b->bh.bb.bsize;
556  thr->numget++; /* Increment number of bget() calls */
557 #endif
558  /* Negate size to mark buffer allocated. */
559  b->bh.bb.bsize = -(b->bh.bb.bsize);
560 
561  /* Mark this buffer as owned by this thread. */
562  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
563  /* Zero the back pointer in the next buffer in memory
564  to indicate that this buffer is allocated. */
565  ba->bb.prevfree = 0;
566 
567  /* Give user buffer starting at queue links. */
568  buf = (void *)&(b->ql);
569  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
570  return buf;
571  }
572  }
573 
574  /* Link to next buffer */
575  b = (use_blink ? b->ql.blink : b->ql.flink);
576  }
577  }
578 
579  /* We failed to find a buffer. If there's a compact function defined,
580  notify it of the size requested. If it returns TRUE, try the allocation
581  again. */
582 
583  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
584  break;
585  }
586  }
587 
588  /* No buffer available with requested size free. */
589 
590  /* Don't give up yet -- look in the reserve supply. */
591  if (thr->acqfcn != 0) {
592  if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
593  /* Request is too large to fit in a single expansion block.
594  Try to satisfy it by a direct buffer acquisition. */
595  bdhead_t *bdh;
596 
597  size += sizeof(bdhead_t) - sizeof(bhead_t);
598 
599  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
600 
601  /* richryan */
602  bdh = BDH((*thr->acqfcn)((bufsize)size));
603  if (bdh != NULL) {
604 
605  // Mark the buffer special by setting size field of its header to zero.
606  bdh->bh.bb.bsize = 0;
607 
608  /* Mark this buffer as owned by this thread. */
609  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
610  // because direct buffer never goes to free list
611  bdh->bh.bb.prevfree = 0;
612  bdh->tsize = size;
613 #if BufStats
614  thr->totalloc += (size_t)size;
615  thr->numget++; /* Increment number of bget() calls */
616  thr->numdget++; /* Direct bget() call count */
617 #endif
618  buf = (void *)(bdh + 1);
619  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
620  return buf;
621  }
622 
623  } else {
624 
625  /* Try to obtain a new expansion block */
626  void *newpool;
627 
628  KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
629 
630  /* richryan */
631  newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632  KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
633  if (newpool != NULL) {
634  bpool(th, newpool, thr->exp_incr);
635  buf = bget(
636  th, requested_size); /* This can't, I say, can't get into a loop. */
637  return buf;
638  }
639  }
640  }
641 
642  /* Still no buffer available */
643 
644  return NULL;
645 }
646 
647 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
648  the entire contents of the buffer to zero, not just the
649  region requested by the caller. */
650 
651 static void *bgetz(kmp_info_t *th, bufsize size) {
652  char *buf = (char *)bget(th, size);
653 
654  if (buf != NULL) {
655  bhead_t *b;
656  bufsize rsize;
657 
658  b = BH(buf - sizeof(bhead_t));
659  rsize = -(b->bb.bsize);
660  if (rsize == 0) {
661  bdhead_t *bd;
662 
663  bd = BDH(buf - sizeof(bdhead_t));
664  rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
665  } else {
666  rsize -= sizeof(bhead_t);
667  }
668 
669  KMP_DEBUG_ASSERT(rsize >= size);
670 
671  (void)memset(buf, 0, (bufsize)rsize);
672  }
673  return ((void *)buf);
674 }
675 
676 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
677  simply in terms of brel() and bget(). It could be
678  enhanced to allow the buffer to grow into adjacent free
679  blocks and to avoid moving data unnecessarily. */
680 
681 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
682  void *nbuf;
683  bufsize osize; /* Old size of buffer */
684  bhead_t *b;
685 
686  nbuf = bget(th, size);
687  if (nbuf == NULL) { /* Acquire new buffer */
688  return NULL;
689  }
690  if (buf == NULL) {
691  return nbuf;
692  }
693  b = BH(((char *)buf) - sizeof(bhead_t));
694  osize = -b->bb.bsize;
695  if (osize == 0) {
696  /* Buffer acquired directly through acqfcn. */
697  bdhead_t *bd;
698 
699  bd = BDH(((char *)buf) - sizeof(bdhead_t));
700  osize = bd->tsize - (bufsize)sizeof(bdhead_t);
701  } else {
702  osize -= sizeof(bhead_t);
703  }
704 
705  KMP_DEBUG_ASSERT(osize > 0);
706 
707  (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
708  (size_t)((size < osize) ? size : osize));
709  brel(th, buf);
710 
711  return nbuf;
712 }
713 
714 /* BREL -- Release a buffer. */
715 static void brel(kmp_info_t *th, void *buf) {
716  thr_data_t *thr = get_thr_data(th);
717  bfhead_t *b, *bn;
718  kmp_info_t *bth;
719 
720  KMP_DEBUG_ASSERT(buf != NULL);
721  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
722 
723  b = BFH(((char *)buf) - sizeof(bhead_t));
724 
725  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
726  bdhead_t *bdh;
727 
728  bdh = BDH(((char *)buf) - sizeof(bdhead_t));
729  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
730 #if BufStats
731  thr->totalloc -= (size_t)bdh->tsize;
732  thr->numdrel++; /* Number of direct releases */
733  thr->numrel++; /* Increment number of brel() calls */
734 #endif /* BufStats */
735 #ifdef FreeWipe
736  (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
737 #endif /* FreeWipe */
738 
739  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
740 
741  KMP_DEBUG_ASSERT(thr->relfcn != 0);
742  (*thr->relfcn)((void *)bdh); /* Release it directly. */
743  return;
744  }
745 
746  bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
747  ~1); // clear possible mark before comparison
748  if (bth != th) {
749  /* Add this buffer to be released by the owning thread later */
750  __kmp_bget_enqueue(bth, buf
751 #ifdef USE_QUEUING_LOCK_FOR_BGET
752  ,
753  __kmp_gtid_from_thread(th)
754 #endif
755  );
756  return;
757  }
758 
759  /* Buffer size must be negative, indicating that the buffer is allocated. */
760  if (b->bh.bb.bsize >= 0) {
761  bn = NULL;
762  }
763  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
764 
765  /* Back pointer in next buffer must be zero, indicating the same thing: */
766 
767  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
768 
769 #if BufStats
770  thr->numrel++; /* Increment number of brel() calls */
771  thr->totalloc += (size_t)b->bh.bb.bsize;
772 #endif
773 
774  /* If the back link is nonzero, the previous buffer is free. */
775 
776  if (b->bh.bb.prevfree != 0) {
777  /* The previous buffer is free. Consolidate this buffer with it by adding
778  the length of this buffer to the previous free buffer. Note that we
779  subtract the size in the buffer being released, since it's negative to
780  indicate that the buffer is allocated. */
781  bufsize size = b->bh.bb.bsize;
782 
783  /* Make the previous buffer the one we're working on. */
784  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
785  b->bh.bb.prevfree);
786  b = BFH(((char *)b) - b->bh.bb.prevfree);
787  b->bh.bb.bsize -= size;
788 
789  /* unlink the buffer from the old freelist */
790  __kmp_bget_remove_from_freelist(b);
791  } else {
792  /* The previous buffer isn't allocated. Mark this buffer size as positive
793  (i.e. free) and fall through to place the buffer on the free list as an
794  isolated free block. */
795  b->bh.bb.bsize = -b->bh.bb.bsize;
796  }
797 
798  /* insert buffer back onto a new freelist */
799  __kmp_bget_insert_into_freelist(thr, b);
800 
801  /* Now we look at the next buffer in memory, located by advancing from
802  the start of this buffer by its size, to see if that buffer is
803  free. If it is, we combine this buffer with the next one in
804  memory, dechaining the second buffer from the free list. */
805  bn = BFH(((char *)b) + b->bh.bb.bsize);
806  if (bn->bh.bb.bsize > 0) {
807 
808  /* The buffer is free. Remove it from the free list and add
809  its size to that of our buffer. */
810  KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
811  bn->bh.bb.bsize);
812 
813  __kmp_bget_remove_from_freelist(bn);
814 
815  b->bh.bb.bsize += bn->bh.bb.bsize;
816 
817  /* unlink the buffer from the old freelist, and reinsert it into the new
818  * freelist */
819  __kmp_bget_remove_from_freelist(b);
820  __kmp_bget_insert_into_freelist(thr, b);
821 
822  /* Finally, advance to the buffer that follows the newly
823  consolidated free block. We must set its backpointer to the
824  head of the consolidated free block. We know the next block
825  must be an allocated block because the process of recombination
826  guarantees that two free blocks will never be contiguous in
827  memory. */
828  bn = BFH(((char *)b) + b->bh.bb.bsize);
829  }
830 #ifdef FreeWipe
831  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
832  (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
833 #endif
834  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
835 
836  /* The next buffer is allocated. Set the backpointer in it to point
837  to this buffer; the previous free buffer in memory. */
838 
839  bn->bh.bb.prevfree = b->bh.bb.bsize;
840 
841  /* If a block-release function is defined, and this free buffer
842  constitutes the entire block, release it. Note that pool_len
843  is defined in such a way that the test will fail unless all
844  pool blocks are the same size. */
845  if (thr->relfcn != 0 &&
846  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
847 #if BufStats
848  if (thr->numpblk !=
849  1) { /* Do not release the last buffer until finalization time */
850 #endif
851 
852  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
855  b->bh.bb.bsize);
856 
857  /* Unlink the buffer from the free list */
858  __kmp_bget_remove_from_freelist(b);
859 
860  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
861 
862  (*thr->relfcn)(b);
863 #if BufStats
864  thr->numprel++; /* Nr of expansion block releases */
865  thr->numpblk--; /* Total number of blocks */
866  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
867 
868  // avoid leaving stale last_pool pointer around if it is being dealloced
869  if (thr->last_pool == b)
870  thr->last_pool = 0;
871  } else {
872  thr->last_pool = b;
873  }
874 #endif /* BufStats */
875  }
876 }
877 
878 /* BECTL -- Establish automatic pool expansion control */
879 static void bectl(kmp_info_t *th, bget_compact_t compact,
880  bget_acquire_t acquire, bget_release_t release,
881  bufsize pool_incr) {
882  thr_data_t *thr = get_thr_data(th);
883 
884  thr->compfcn = compact;
885  thr->acqfcn = acquire;
886  thr->relfcn = release;
887  thr->exp_incr = pool_incr;
888 }
889 
890 /* BPOOL -- Add a region of memory to the buffer pool. */
891 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
892  /* int bin = 0; */
893  thr_data_t *thr = get_thr_data(th);
894  bfhead_t *b = BFH(buf);
895  bhead_t *bn;
896 
897  __kmp_bget_dequeue(th); /* Release any queued buffers */
898 
899 #ifdef SizeQuant
900  len &= ~((bufsize)(SizeQuant - 1));
901 #endif
902  if (thr->pool_len == 0) {
903  thr->pool_len = len;
904  } else if (len != thr->pool_len) {
905  thr->pool_len = -1;
906  }
907 #if BufStats
908  thr->numpget++; /* Number of block acquisitions */
909  thr->numpblk++; /* Number of blocks total */
910  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
911 #endif /* BufStats */
912 
913  /* Since the block is initially occupied by a single free buffer,
914  it had better not be (much) larger than the largest buffer
915  whose size we can store in bhead.bb.bsize. */
916  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
917 
918  /* Clear the backpointer at the start of the block to indicate that
919  there is no free block prior to this one. That blocks
920  recombination when the first block in memory is released. */
921  b->bh.bb.prevfree = 0;
922 
923  /* Create a dummy allocated buffer at the end of the pool. This dummy
924  buffer is seen when a buffer at the end of the pool is released and
925  blocks recombination of the last buffer with the dummy buffer at
926  the end. The length in the dummy buffer is set to the largest
927  negative number to denote the end of the pool for diagnostic
928  routines (this specific value is not counted on by the actual
929  allocation and release functions). */
930  len -= sizeof(bhead_t);
931  b->bh.bb.bsize = (bufsize)len;
932  /* Set the owner of this buffer */
933  TCW_PTR(b->bh.bb.bthr,
934  (kmp_info_t *)((kmp_uintptr_t)th |
935  1)); // mark the buffer as allocated address
936 
937  /* Chain the new block to the free list. */
938  __kmp_bget_insert_into_freelist(thr, b);
939 
940 #ifdef FreeWipe
941  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
942  (size_t)(len - sizeof(bfhead_t)));
943 #endif
944  bn = BH(((char *)b) + len);
945  bn->bb.prevfree = (bufsize)len;
946  /* Definition of ESent assumes two's complement! */
947  KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
948 
949  bn->bb.bsize = ESent;
950 }
951 
952 /* BFREED -- Dump the free lists for this thread. */
953 static void bfreed(kmp_info_t *th) {
954  int bin = 0, count = 0;
955  int gtid = __kmp_gtid_from_thread(th);
956  thr_data_t *thr = get_thr_data(th);
957 
958 #if BufStats
959  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960  " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
961  " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
962  " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
963  " drel=%" KMP_INT64_SPEC "\n",
964  gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965  (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966  (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967  (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
968 #endif
969 
970  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
971  bfhead_t *b;
972 
973  for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
974  b = b->ql.flink) {
975  bufsize bs = b->bh.bb.bsize;
976 
977  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979  KMP_DEBUG_ASSERT(bs > 0);
980 
981  count += 1;
982 
983  __kmp_printf_no_lock(
984  "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
985  (long)bs);
986 #ifdef FreeWipe
987  {
988  char *lerr = ((char *)b) + sizeof(bfhead_t);
989  if ((bs > sizeof(bfhead_t)) &&
990  ((*lerr != 0x55) ||
991  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
992  0))) {
993  __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
994  "free block have been overstored.)\n",
995  gtid);
996  }
997  }
998 #endif
999  }
1000  }
1001 
1002  if (count == 0)
1003  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
1004 }
1005 
1006 void __kmp_initialize_bget(kmp_info_t *th) {
1007  KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1008 
1009  set_thr_data(th);
1010 
1011  bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012  (bufsize)__kmp_malloc_pool_incr);
1013 }
1014 
1015 void __kmp_finalize_bget(kmp_info_t *th) {
1016  thr_data_t *thr;
1017  bfhead_t *b;
1018 
1019  KMP_DEBUG_ASSERT(th != 0);
1020 
1021 #if BufStats
1022  thr = (thr_data_t *)th->th.th_local.bget_data;
1023  KMP_DEBUG_ASSERT(thr != NULL);
1024  b = thr->last_pool;
1025 
1026  /* If a block-release function is defined, and this free buffer constitutes
1027  the entire block, release it. Note that pool_len is defined in such a way
1028  that the test will fail unless all pool blocks are the same size. */
1029 
1030  // Deallocate the last pool if one exists because we no longer do it in brel()
1031  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1033  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1036  b->bh.bb.bsize);
1037 
1038  /* Unlink the buffer from the free list */
1039  __kmp_bget_remove_from_freelist(b);
1040 
1041  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1042 
1043  (*thr->relfcn)(b);
1044  thr->numprel++; /* Nr of expansion block releases */
1045  thr->numpblk--; /* Total number of blocks */
1046  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1047  }
1048 #endif /* BufStats */
1049 
1050  /* Deallocate bget_data */
1051  if (th->th.th_local.bget_data != NULL) {
1052  __kmp_free(th->th.th_local.bget_data);
1053  th->th.th_local.bget_data = NULL;
1054  }
1055 }
1056 
1057 void kmpc_set_poolsize(size_t size) {
1058  bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059  (bget_release_t)free, (bufsize)size);
1060 }
1061 
1062 size_t kmpc_get_poolsize(void) {
1063  thr_data_t *p;
1064 
1065  p = get_thr_data(__kmp_get_thread());
1066 
1067  return p->exp_incr;
1068 }
1069 
1070 void kmpc_set_poolmode(int mode) {
1071  thr_data_t *p;
1072 
1073  if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074  mode == bget_mode_best) {
1075  p = get_thr_data(__kmp_get_thread());
1076  p->mode = (bget_mode_t)mode;
1077  }
1078 }
1079 
1080 int kmpc_get_poolmode(void) {
1081  thr_data_t *p;
1082 
1083  p = get_thr_data(__kmp_get_thread());
1084 
1085  return p->mode;
1086 }
1087 
1088 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1089  kmp_info_t *th = __kmp_get_thread();
1090  bufsize a, b;
1091 
1092  __kmp_bget_dequeue(th); /* Release any queued buffers */
1093 
1094  bcheck(th, &a, &b);
1095 
1096  *maxmem = a;
1097  *allmem = b;
1098 }
1099 
1100 void kmpc_poolprint(void) {
1101  kmp_info_t *th = __kmp_get_thread();
1102 
1103  __kmp_bget_dequeue(th); /* Release any queued buffers */
1104 
1105  bfreed(th);
1106 }
1107 
1108 #endif // #if KMP_USE_BGET
1109 
1110 void *kmpc_malloc(size_t size) {
1111  void *ptr;
1112  ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1113  if (ptr != NULL) {
1114  // save allocated pointer just before one returned to user
1115  *(void **)ptr = ptr;
1116  ptr = (void **)ptr + 1;
1117  }
1118  return ptr;
1119 }
1120 
1121 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1122 
1123 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1124  void *ptr;
1125  void *ptr_allocated;
1126  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1127  if (!IS_POWER_OF_TWO(alignment)) {
1128  // AC: do we need to issue a warning here?
1129  errno = EINVAL;
1130  return NULL;
1131  }
1132  size = size + sizeof(void *) + alignment;
1133  ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134  if (ptr_allocated != NULL) {
1135  // save allocated pointer just before one returned to user
1136  ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1137  ~(alignment - 1));
1138  *((void **)ptr - 1) = ptr_allocated;
1139  } else {
1140  ptr = NULL;
1141  }
1142  return ptr;
1143 }
1144 
1145 void *kmpc_calloc(size_t nelem, size_t elsize) {
1146  void *ptr;
1147  ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1148  if (ptr != NULL) {
1149  // save allocated pointer just before one returned to user
1150  *(void **)ptr = ptr;
1151  ptr = (void **)ptr + 1;
1152  }
1153  return ptr;
1154 }
1155 
1156 void *kmpc_realloc(void *ptr, size_t size) {
1157  void *result = NULL;
1158  if (ptr == NULL) {
1159  // If pointer is NULL, realloc behaves like malloc.
1160  result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1161  // save allocated pointer just before one returned to user
1162  if (result != NULL) {
1163  *(void **)result = result;
1164  result = (void **)result + 1;
1165  }
1166  } else if (size == 0) {
1167  // If size is 0, realloc behaves like free.
1168  // The thread must be registered by the call to kmpc_malloc() or
1169  // kmpc_calloc() before.
1170  // So it should be safe to call __kmp_get_thread(), not
1171  // __kmp_entry_thread().
1172  KMP_ASSERT(*((void **)ptr - 1));
1173  brel(__kmp_get_thread(), *((void **)ptr - 1));
1174  } else {
1175  result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1176  (bufsize)(size + sizeof(ptr)));
1177  if (result != NULL) {
1178  *(void **)result = result;
1179  result = (void **)result + 1;
1180  }
1181  }
1182  return result;
1183 }
1184 
1185 // NOTE: the library must have already been initialized by a previous allocate
1186 void kmpc_free(void *ptr) {
1187  if (!__kmp_init_serial) {
1188  return;
1189  }
1190  if (ptr != NULL) {
1191  kmp_info_t *th = __kmp_get_thread();
1192  __kmp_bget_dequeue(th); /* Release any queued buffers */
1193  // extract allocated pointer and free it
1194  KMP_ASSERT(*((void **)ptr - 1));
1195  brel(th, *((void **)ptr - 1));
1196  }
1197 }
1198 
1199 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1200  void *ptr;
1201  KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202  (int)size KMP_SRC_LOC_PARM));
1203  ptr = bget(th, (bufsize)size);
1204  KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1205  return ptr;
1206 }
1207 
1208 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1209  size_t elsize KMP_SRC_LOC_DECL) {
1210  void *ptr;
1211  KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212  (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1213  ptr = bgetz(th, (bufsize)(nelem * elsize));
1214  KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1215  return ptr;
1216 }
1217 
1218 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1219  size_t size KMP_SRC_LOC_DECL) {
1220  KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221  ptr, (int)size KMP_SRC_LOC_PARM));
1222  ptr = bgetr(th, ptr, (bufsize)size);
1223  KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1224  return ptr;
1225 }
1226 
1227 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1228  KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229  ptr KMP_SRC_LOC_PARM));
1230  if (ptr != NULL) {
1231  __kmp_bget_dequeue(th); /* Release any queued buffers */
1232  brel(th, ptr);
1233  }
1234  KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1235 }
1236 
1237 /* OMP 5.0 Memory Management support */
1238 static const char *kmp_mk_lib_name;
1239 static void *h_memkind;
1240 /* memkind experimental API: */
1241 // memkind_alloc
1242 static void *(*kmp_mk_alloc)(void *k, size_t sz);
1243 // memkind_free
1244 static void (*kmp_mk_free)(void *kind, void *ptr);
1245 // memkind_check_available
1246 static int (*kmp_mk_check)(void *kind);
1247 // kinds we are going to use
1248 static void **mk_default;
1249 static void **mk_interleave;
1250 static void **mk_hbw;
1251 static void **mk_hbw_interleave;
1252 static void **mk_hbw_preferred;
1253 static void **mk_hugetlb;
1254 static void **mk_hbw_hugetlb;
1255 static void **mk_hbw_preferred_hugetlb;
1256 static void **mk_dax_kmem;
1257 static void **mk_dax_kmem_all;
1258 static void **mk_dax_kmem_preferred;
1259 static void *(*kmp_target_alloc_host)(size_t size, int device);
1260 static void *(*kmp_target_alloc_shared)(size_t size, int device);
1261 static void *(*kmp_target_alloc_device)(size_t size, int device);
1262 static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1263 static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1264 static void *(*kmp_target_free_host)(void *ptr, int device);
1265 static void *(*kmp_target_free_shared)(void *ptr, int device);
1266 static void *(*kmp_target_free_device)(void *ptr, int device);
1267 static bool __kmp_target_mem_available;
1268 
1269 #define KMP_IS_TARGET_MEM_SPACE(MS) \
1270  (MS == llvm_omp_target_host_mem_space || \
1271  MS == llvm_omp_target_shared_mem_space || \
1272  MS == llvm_omp_target_device_mem_space)
1273 
1274 #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1275  (MA == llvm_omp_target_host_mem_alloc || \
1276  MA == llvm_omp_target_shared_mem_alloc || \
1277  MA == llvm_omp_target_device_mem_alloc)
1278 
1279 #define KMP_IS_PREDEF_MEM_SPACE(MS) \
1280  (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1281  MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1282  MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1283  KMP_IS_TARGET_MEM_SPACE(MS))
1284 
1302  bool supported = false;
1303  using get_mem_resources_t = int (*)(int, const int *, int,
1304  omp_memspace_handle_t, int *);
1305  using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t);
1306  using omp_free_t = void (*)(void *, omp_allocator_handle_t);
1307  get_mem_resources_t tgt_get_mem_resources = nullptr;
1308  omp_alloc_t tgt_omp_alloc = nullptr;
1309  omp_free_t tgt_omp_free = nullptr;
1310 
1311 public:
1313  void init() {
1314  tgt_get_mem_resources =
1315  (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources");
1316  tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc");
1317  tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free");
1318  supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1319  }
1322  int get_mem_resources(int ndevs, const int *devs, int host,
1323  omp_memspace_handle_t memspace, int *resources) {
1324  if (supported)
1325  return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1326  return 0;
1327  }
1329  void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
1330  if (supported)
1331  return tgt_omp_alloc(size, allocator);
1332  return nullptr;
1333  }
1335  void omp_free(void *ptr, omp_allocator_handle_t allocator) {
1336  if (supported)
1337  tgt_omp_free(ptr, allocator);
1338  }
1339 } __kmp_tgt_allocator;
1340 
1341 extern "C" int omp_get_num_devices(void);
1342 
1347  kmp_memspace_t *memspace_list = nullptr;
1348  KMP_LOCK_INIT(mtx);
1350  kmp_memspace_t *find(int num_resources, const int *resources,
1351  omp_memspace_handle_t memspace) {
1352  kmp_memspace_t *ms = memspace_list;
1353  while (ms) {
1354  if (ms->num_resources == num_resources && ms->memspace == memspace &&
1355  !memcmp(ms->resources, resources, sizeof(int) * num_resources))
1356  break;
1357  ms = ms->next;
1358  }
1359  return ms;
1360  }
1364  omp_memspace_handle_t get(int num_resources, const int *resources,
1365  omp_memspace_handle_t memspace) {
1366  int gtid = __kmp_entry_gtid();
1367  __kmp_acquire_lock(&mtx, gtid);
1368  // Sort absolute IDs in the resource list
1369  int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1370  KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int));
1371  qsort(sorted_resources, (size_t)num_resources, sizeof(int),
1372  [](const void *a, const void *b) {
1373  const int val_a = *(const int *)a;
1374  const int val_b = *(const int *)b;
1375  return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1376  });
1377  kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace);
1378  if (ms) {
1379  __kmp_free(sorted_resources);
1380  __kmp_release_lock(&mtx, gtid);
1381  return ms;
1382  }
1383  ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t));
1384  ms->memspace = memspace;
1385  ms->num_resources = num_resources;
1386  ms->resources = sorted_resources;
1387  ms->next = memspace_list;
1388  memspace_list = ms;
1389  __kmp_release_lock(&mtx, gtid);
1390  return ms;
1391  }
1392 
1393 public:
1395  void init() { __kmp_init_lock(&mtx); }
1397  void fini() {
1398  kmp_memspace_t *ms = memspace_list;
1399  while (ms) {
1400  if (ms->resources)
1401  __kmp_free(ms->resources);
1402  kmp_memspace_t *tmp = ms;
1403  ms = ms->next;
1404  __kmp_free(tmp);
1405  }
1406  __kmp_destroy_lock(&mtx);
1407  }
1409  omp_memspace_handle_t get_memspace(int num_devices, const int *devices,
1410  int host_access,
1411  omp_memspace_handle_t memspace) {
1412  int actual_num_devices = num_devices;
1413  int *actual_devices = const_cast<int *>(devices);
1414  if (actual_num_devices == 0) {
1415  actual_num_devices = omp_get_num_devices();
1416  if (actual_num_devices <= 0)
1417  return omp_null_mem_space;
1418  }
1419  if (actual_devices == NULL) {
1420  // Prepare list of all devices in this case.
1421  actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices);
1422  for (int i = 0; i < actual_num_devices; i++)
1423  actual_devices[i] = i;
1424  }
1425  // Get the number of available resources first
1426  int num_resources = __kmp_tgt_allocator.get_mem_resources(
1427  actual_num_devices, actual_devices, host_access, memspace, NULL);
1428  if (num_resources <= 0)
1429  return omp_null_mem_space; // No available resources
1430 
1431  omp_memspace_handle_t ms = omp_null_mem_space;
1432  if (num_resources > 0) {
1433  int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1434  // Let offload runtime write the resource IDs
1435  num_resources = __kmp_tgt_allocator.get_mem_resources(
1436  actual_num_devices, actual_devices, host_access, memspace, resources);
1437  ms = get(num_resources, resources, memspace);
1438  __kmp_free(resources);
1439  }
1440  if (!devices && actual_devices)
1441  __kmp_free(actual_devices);
1442  return ms;
1443  }
1445  omp_memspace_handle_t get_memspace(int num_resources, const int *resources,
1446  omp_memspace_handle_t parent) {
1447  kmp_memspace_t *ms = (kmp_memspace_t *)parent;
1448  return get(num_resources, resources, ms->memspace);
1449  }
1450 } __kmp_tgt_memspace_list;
1451 
1452 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1453 static inline void chk_kind(void ***pkind) {
1454  KMP_DEBUG_ASSERT(pkind);
1455  if (*pkind) // symbol found
1456  if (kmp_mk_check(**pkind)) // kind not available or error
1457  *pkind = NULL;
1458 }
1459 #endif
1460 
1461 void __kmp_init_memkind() {
1462 // as of 2018-07-31 memkind does not support Windows*, exclude it for now
1463 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1464  // use of statically linked memkind is problematic, as it depends on libnuma
1465  kmp_mk_lib_name = "libmemkind.so";
1466  h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1467  if (h_memkind) {
1468  kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1469  kmp_mk_alloc =
1470  (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1471  kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1472  mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1473  if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1474  !kmp_mk_check(*mk_default)) {
1475  __kmp_memkind_available = 1;
1476  mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1477  chk_kind(&mk_interleave);
1478  mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1479  chk_kind(&mk_hbw);
1480  mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1481  chk_kind(&mk_hbw_interleave);
1482  mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1483  chk_kind(&mk_hbw_preferred);
1484  mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1485  chk_kind(&mk_hugetlb);
1486  mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1487  chk_kind(&mk_hbw_hugetlb);
1488  mk_hbw_preferred_hugetlb =
1489  (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1490  chk_kind(&mk_hbw_preferred_hugetlb);
1491  mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1492  chk_kind(&mk_dax_kmem);
1493  mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1494  chk_kind(&mk_dax_kmem_all);
1495  mk_dax_kmem_preferred =
1496  (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1497  chk_kind(&mk_dax_kmem_preferred);
1498  KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1499  return; // success
1500  }
1501  dlclose(h_memkind); // failure
1502  }
1503 #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1504  kmp_mk_lib_name = "";
1505 #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1506  h_memkind = NULL;
1507  kmp_mk_check = NULL;
1508  kmp_mk_alloc = NULL;
1509  kmp_mk_free = NULL;
1510  mk_default = NULL;
1511  mk_interleave = NULL;
1512  mk_hbw = NULL;
1513  mk_hbw_interleave = NULL;
1514  mk_hbw_preferred = NULL;
1515  mk_hugetlb = NULL;
1516  mk_hbw_hugetlb = NULL;
1517  mk_hbw_preferred_hugetlb = NULL;
1518  mk_dax_kmem = NULL;
1519  mk_dax_kmem_all = NULL;
1520  mk_dax_kmem_preferred = NULL;
1521 }
1522 
1523 void __kmp_fini_memkind() {
1524 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1525  if (__kmp_memkind_available)
1526  KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1527  if (h_memkind) {
1528  dlclose(h_memkind);
1529  h_memkind = NULL;
1530  }
1531  kmp_mk_check = NULL;
1532  kmp_mk_alloc = NULL;
1533  kmp_mk_free = NULL;
1534  mk_default = NULL;
1535  mk_interleave = NULL;
1536  mk_hbw = NULL;
1537  mk_hbw_interleave = NULL;
1538  mk_hbw_preferred = NULL;
1539  mk_hugetlb = NULL;
1540  mk_hbw_hugetlb = NULL;
1541  mk_hbw_preferred_hugetlb = NULL;
1542  mk_dax_kmem = NULL;
1543  mk_dax_kmem_all = NULL;
1544  mk_dax_kmem_preferred = NULL;
1545 #endif
1546 }
1547 
1548 #if KMP_USE_HWLOC
1549 static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1550 #if HWLOC_API_VERSION >= 0x00020300
1551  const hwloc_topology_support *support;
1552  support = hwloc_topology_get_support(__kmp_hwloc_topology);
1553  if (support) {
1554  if (policy == HWLOC_MEMBIND_BIND)
1555  return (support->membind->alloc_membind &&
1556  support->membind->bind_membind);
1557  if (policy == HWLOC_MEMBIND_INTERLEAVE)
1558  return (support->membind->alloc_membind &&
1559  support->membind->interleave_membind);
1560  }
1561  return false;
1562 #else
1563  return false;
1564 #endif
1565 }
1566 
1567 void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
1568  hwloc_membind_policy_t policy) {
1569 #if HWLOC_API_VERSION >= 0x00020300
1570  void *ptr = NULL;
1571  hwloc_obj_t node;
1572  struct hwloc_location initiator;
1573  int ret;
1574  // TODO: We should make this more efficient by getting rid of the OS syscall
1575  // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead
1576  // use th_affin_mask field when it's capable of getting the underlying
1577  // mask implementation.
1578  hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1579  ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1580  if (ret < 0) {
1581  hwloc_bitmap_free(mask);
1582  return ptr;
1583  }
1584  initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1585  initiator.location.cpuset = mask;
1586  ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1587  &node, NULL);
1588  if (ret < 0) {
1589  return ptr;
1590  }
1591  return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1592  HWLOC_MEMBIND_BYNODESET);
1593 #else
1594  return NULL;
1595 #endif
1596 }
1597 
1598 void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
1599  hwloc_membind_policy_t policy) {
1600 #if HWLOC_API_VERSION >= 0x00020300
1601  void *ptr = NULL;
1602  if (ms == omp_high_bw_mem_space) {
1603  ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1604  } else if (ms == omp_large_cap_mem_space) {
1605  ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1606  } else {
1607  ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1608  }
1609  return ptr;
1610 #else
1611  return NULL;
1612 #endif
1613 }
1614 #endif // KMP_USE_HWLOC
1615 
1616 void __kmp_init_target_mem() {
1617  *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1618  *(void **)(&kmp_target_alloc_shared) =
1619  KMP_DLSYM("llvm_omp_target_alloc_shared");
1620  *(void **)(&kmp_target_alloc_device) =
1621  KMP_DLSYM("llvm_omp_target_alloc_device");
1622  *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1623  *(void **)(&kmp_target_free_shared) =
1624  KMP_DLSYM("llvm_omp_target_free_shared");
1625  *(void **)(&kmp_target_free_device) =
1626  KMP_DLSYM("llvm_omp_target_free_device");
1627  __kmp_target_mem_available =
1628  kmp_target_alloc_host && kmp_target_alloc_shared &&
1629  kmp_target_alloc_device && kmp_target_free_host &&
1630  kmp_target_free_shared && kmp_target_free_device;
1631  // lock/pin and unlock/unpin target calls
1632  *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1633  *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1634  __kmp_tgt_allocator.init();
1635  __kmp_tgt_memspace_list.init();
1636 }
1637 
1639 void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); }
1640 
1641 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1642  int ntraits,
1643  omp_alloctrait_t traits[]) {
1644  kmp_allocator_t *al;
1645  int i;
1646  al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1647  al->memspace = ms; // not used currently
1648 
1649  // Assign default values if applicable
1650  al->alignment = 1;
1651  al->pinned = false;
1652  al->partition = omp_atv_environment;
1653  al->pin_device = -1;
1654  al->preferred_device = -1;
1655  al->target_access = omp_atv_single;
1656  al->atomic_scope = omp_atv_device;
1657 
1658  for (i = 0; i < ntraits; ++i) {
1659  switch (traits[i].key) {
1660  case omp_atk_sync_hint:
1661  case omp_atk_access:
1662  break;
1663  case omp_atk_pinned:
1664  al->pinned = true;
1665  break;
1666  case omp_atk_alignment:
1667  __kmp_type_convert(traits[i].value, &(al->alignment));
1668  KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1669  break;
1670  case omp_atk_pool_size:
1671  al->pool_size = traits[i].value;
1672  break;
1673  case omp_atk_fallback:
1674  al->fb = (omp_alloctrait_value_t)traits[i].value;
1675  KMP_DEBUG_ASSERT(
1676  al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1677  al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1678  break;
1679  case omp_atk_fb_data:
1680  al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1681  break;
1682  case omp_atk_partition:
1683 #if KMP_USE_HWLOC
1684  al->membind = (omp_alloctrait_value_t)traits[i].value;
1685  KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1686  al->membind == omp_atv_nearest ||
1687  al->membind == omp_atv_blocked ||
1688  al->membind == omp_atv_interleaved);
1689 #endif
1690  al->memkind = RCAST(void **, traits[i].value);
1691  break;
1692  case omp_atk_pin_device:
1693  __kmp_type_convert(traits[i].value, &(al->pin_device));
1694  break;
1695  case omp_atk_preferred_device:
1696  __kmp_type_convert(traits[i].value, &(al->preferred_device));
1697  break;
1698  case omp_atk_target_access:
1699  al->target_access = (omp_alloctrait_value_t)traits[i].value;
1700  break;
1701  case omp_atk_atomic_scope:
1702  al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1703  break;
1704  case omp_atk_part_size:
1705  __kmp_type_convert(traits[i].value, &(al->part_size));
1706  break;
1707  default:
1708  KMP_ASSERT2(0, "Unexpected allocator trait");
1709  }
1710  }
1711 
1712  if (al->memspace > kmp_max_mem_space) {
1713  // Memory space has been allocated for targets.
1714  return (omp_allocator_handle_t)al;
1715  }
1716 
1717  KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1718 
1719  if (al->fb == 0) {
1720  // set default allocator
1721  al->fb = omp_atv_default_mem_fb;
1722  al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1723  } else if (al->fb == omp_atv_allocator_fb) {
1724  KMP_ASSERT(al->fb_data != NULL);
1725  } else if (al->fb == omp_atv_default_mem_fb) {
1726  al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1727  }
1728  if (__kmp_memkind_available) {
1729  // Let's use memkind library if available
1730  if (ms == omp_high_bw_mem_space) {
1731  if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1732  al->memkind = mk_hbw_interleave;
1733  } else if (mk_hbw_preferred) {
1734  // AC: do not try to use MEMKIND_HBW for now, because memkind library
1735  // cannot reliably detect exhaustion of HBW memory.
1736  // It could be possible using hbw_verify_memory_region() but memkind
1737  // manual says: "Using this function in production code may result in
1738  // serious performance penalty".
1739  al->memkind = mk_hbw_preferred;
1740  } else {
1741  // HBW is requested but not available --> return NULL allocator
1742  __kmp_free(al);
1743  return omp_null_allocator;
1744  }
1745  } else if (ms == omp_large_cap_mem_space) {
1746  if (mk_dax_kmem_all) {
1747  // All pmem nodes are visited
1748  al->memkind = mk_dax_kmem_all;
1749  } else if (mk_dax_kmem) {
1750  // Only closest pmem node is visited
1751  al->memkind = mk_dax_kmem;
1752  } else {
1753  __kmp_free(al);
1754  return omp_null_allocator;
1755  }
1756  } else {
1757  if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1758  al->memkind = mk_interleave;
1759  } else {
1760  al->memkind = mk_default;
1761  }
1762  }
1763  } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1764  __kmp_free(al);
1765  return omp_null_allocator;
1766  } else {
1767  if (!__kmp_hwloc_available &&
1768  (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1769  // cannot detect HBW memory presence without memkind library
1770  __kmp_free(al);
1771  return omp_null_allocator;
1772  }
1773  }
1774  return (omp_allocator_handle_t)al;
1775 }
1776 
1777 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1778  if (allocator > kmp_max_mem_alloc)
1779  __kmp_free(allocator);
1780 }
1781 
1782 void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1783  if (allocator == omp_null_allocator)
1784  allocator = omp_default_mem_alloc;
1785  __kmp_threads[gtid]->th.th_def_allocator = allocator;
1786 }
1787 
1788 omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
1789  return __kmp_threads[gtid]->th.th_def_allocator;
1790 }
1791 
1792 omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs,
1793  omp_memspace_handle_t memspace,
1794  int host) {
1795  if (!__kmp_init_serial)
1796  __kmp_serial_initialize();
1797  // Only accept valid device description and predefined memory space
1798  if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1799  return omp_null_mem_space;
1800 
1801  return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace);
1802 }
1803 
1804 omp_allocator_handle_t
1805 __kmp_get_devices_allocator(int ndevs, const int *devs,
1806  omp_memspace_handle_t memspace, int host) {
1807  if (!__kmp_init_serial)
1808  __kmp_serial_initialize();
1809  // Only accept valid device description and predefined memory space
1810  if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1811  return omp_null_allocator;
1812 
1813  omp_memspace_handle_t mspace =
1814  __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1815  if (mspace == omp_null_mem_space)
1816  return omp_null_allocator;
1817 
1818  return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1819 }
1820 
1821 int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1822  if (!__kmp_init_serial)
1823  __kmp_serial_initialize();
1824  if (memspace == omp_null_mem_space)
1825  return 0;
1826  if (memspace < kmp_max_mem_space)
1827  return 1; // return 1 for predefined memory space
1828  kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1829  return ms->num_resources;
1830 }
1831 
1832 omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1833  int num_resources, int *resources) {
1834  if (!__kmp_init_serial)
1835  __kmp_serial_initialize();
1836  if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1837  return memspace; // return input memory space for predefined memory space
1838  kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1839  if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1840  return omp_null_mem_space; // input memory space cannot satisfy the request
1841 
1842  // The stored resource ID is an absolute ID only known to the offload backend,
1843  // and the returned memory space will still keep the property.
1844  int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources);
1845 
1846  // Collect absolute resource ID from the relative ID
1847  for (int i = 0; i < num_resources; i++)
1848  resources_abs[i] = ms->resources[resources[i]];
1849 
1850  omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace(
1851  num_resources, resources_abs, memspace);
1852  __kmp_free(resources_abs);
1853 
1854  return submemspace;
1855 }
1856 
1857 typedef struct kmp_mem_desc { // Memory block descriptor
1858  void *ptr_alloc; // Pointer returned by allocator
1859  size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1860  size_t size_orig; // Original size requested
1861  void *ptr_align; // Pointer to aligned memory, returned
1862  kmp_allocator_t *allocator; // allocator
1863 } kmp_mem_desc_t;
1864 static int alignment = sizeof(void *); // align to pointer size by default
1865 
1866 // external interfaces are wrappers over internal implementation
1867 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1868  KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1869  void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1870  KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1871  return ptr;
1872 }
1873 
1874 void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1875  omp_allocator_handle_t allocator) {
1876  KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1877  (int)size, allocator));
1878  void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1879  KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1880  return ptr;
1881 }
1882 
1883 void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1884  omp_allocator_handle_t allocator) {
1885  KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1886  (int)size, allocator));
1887  void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1888  KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1889  return ptr;
1890 }
1891 
1892 void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1893  omp_allocator_handle_t allocator,
1894  omp_allocator_handle_t free_allocator) {
1895  KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1896  allocator, free_allocator));
1897  void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1898  KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1899  return nptr;
1900 }
1901 
1902 void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1903  KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1904  ___kmpc_free(gtid, ptr, allocator);
1905  KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1906  return;
1907 }
1908 
1909 // internal implementation, called from inside the library
1910 void *__kmp_alloc(int gtid, size_t algn, size_t size,
1911  omp_allocator_handle_t allocator) {
1912  void *ptr = NULL;
1913  kmp_allocator_t *al;
1914  KMP_DEBUG_ASSERT(__kmp_init_serial);
1915  if (size == 0)
1916  return NULL;
1917  if (allocator == omp_null_allocator)
1918  allocator = __kmp_threads[gtid]->th.th_def_allocator;
1919  kmp_int32 default_device =
1920  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1921 
1922  al = RCAST(kmp_allocator_t *, allocator);
1923 
1924  int sz_desc = sizeof(kmp_mem_desc_t);
1925  kmp_mem_desc_t desc;
1926  kmp_uintptr_t addr; // address returned by allocator
1927  kmp_uintptr_t addr_align; // address to return to caller
1928  kmp_uintptr_t addr_descr; // address of memory block descriptor
1929  size_t align = alignment; // default alignment
1930  if (allocator > kmp_max_mem_alloc && al->alignment > align)
1931  align = al->alignment; // alignment required by allocator trait
1932  if (align < algn)
1933  align = algn; // max of allocator trait, parameter and sizeof(void*)
1934  desc.size_orig = size;
1935  desc.size_a = size + sz_desc + align;
1936  bool is_pinned = false;
1937  if (allocator > kmp_max_mem_alloc)
1938  is_pinned = al->pinned;
1939 
1940  // Use default allocator if hwloc and libmemkind are not available
1941  int use_default_allocator =
1942  (!__kmp_hwloc_available && !__kmp_memkind_available);
1943 
1944  if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1945  // Memspace has been allocated for targets.
1946  return __kmp_tgt_allocator.omp_alloc(size, allocator);
1947  }
1948 
1949  if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1950  // Use size input directly as the memory may not be accessible on host.
1951  // Use default device for now.
1952  if (__kmp_target_mem_available) {
1953  kmp_int32 device =
1954  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1955  if (allocator == llvm_omp_target_host_mem_alloc)
1956  ptr = kmp_target_alloc_host(size, device);
1957  else if (allocator == llvm_omp_target_shared_mem_alloc)
1958  ptr = kmp_target_alloc_shared(size, device);
1959  else // allocator == llvm_omp_target_device_mem_alloc
1960  ptr = kmp_target_alloc_device(size, device);
1961  return ptr;
1962  } else {
1963  KMP_INFORM(TargetMemNotAvailable);
1964  }
1965  }
1966 
1967  if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1968  if (__kmp_target_mem_available) {
1969  kmp_int32 device =
1970  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1971  if (al->memspace == llvm_omp_target_host_mem_space)
1972  ptr = kmp_target_alloc_host(size, device);
1973  else if (al->memspace == llvm_omp_target_shared_mem_space)
1974  ptr = kmp_target_alloc_shared(size, device);
1975  else // al->memspace == llvm_omp_target_device_mem_space
1976  ptr = kmp_target_alloc_device(size, device);
1977  return ptr;
1978  } else {
1979  KMP_INFORM(TargetMemNotAvailable);
1980  }
1981  }
1982 
1983 #if KMP_USE_HWLOC
1984  if (__kmp_hwloc_available) {
1985  if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1986  if (allocator < kmp_max_mem_alloc) {
1987  // pre-defined allocator
1988  if (allocator == omp_high_bw_mem_alloc) {
1989  ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1990  desc.size_a, HWLOC_MEMBIND_BIND);
1991  if (ptr == NULL)
1992  use_default_allocator = true;
1993  } else if (allocator == omp_large_cap_mem_alloc) {
1994  ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1995  desc.size_a, HWLOC_MEMBIND_BIND);
1996  if (ptr == NULL)
1997  use_default_allocator = true;
1998  } else {
1999  use_default_allocator = true;
2000  }
2001  if (use_default_allocator) {
2002  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2003  }
2004  } else if (al->pool_size > 0) {
2005  // custom allocator with pool size requested
2006  kmp_uint64 used =
2007  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2008  if (used + desc.size_a > al->pool_size) {
2009  // not enough space, need to go fallback path
2010  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2011  if (al->fb == omp_atv_default_mem_fb) {
2012  al = (kmp_allocator_t *)omp_default_mem_alloc;
2013  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2014  } else if (al->fb == omp_atv_abort_fb) {
2015  KMP_ASSERT(0); // abort fallback requested
2016  } else if (al->fb == omp_atv_allocator_fb) {
2017  KMP_ASSERT(al != al->fb_data);
2018  al = al->fb_data;
2019  return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2020  } // else ptr == NULL;
2021  } else {
2022  // pool has enough space
2023  if (al->membind == omp_atv_interleaved) {
2024  if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2025  ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2026  HWLOC_MEMBIND_INTERLEAVE);
2027  }
2028  } else if (al->membind == omp_atv_environment) {
2029  ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2030  HWLOC_MEMBIND_DEFAULT);
2031  } else {
2032  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2033  }
2034  if (ptr == NULL) {
2035  if (al->fb == omp_atv_default_mem_fb) {
2036  al = (kmp_allocator_t *)omp_default_mem_alloc;
2037  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038  } else if (al->fb == omp_atv_abort_fb) {
2039  KMP_ASSERT(0); // abort fallback requested
2040  } else if (al->fb == omp_atv_allocator_fb) {
2041  KMP_ASSERT(al != al->fb_data);
2042  al = al->fb_data;
2043  return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2044  }
2045  }
2046  }
2047  } else {
2048  // custom allocator, pool size not requested
2049  if (al->membind == omp_atv_interleaved) {
2050  if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2051  ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2052  HWLOC_MEMBIND_INTERLEAVE);
2053  }
2054  } else if (al->membind == omp_atv_environment) {
2055  ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2056  HWLOC_MEMBIND_DEFAULT);
2057  } else {
2058  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2059  }
2060  if (ptr == NULL) {
2061  if (al->fb == omp_atv_default_mem_fb) {
2062  al = (kmp_allocator_t *)omp_default_mem_alloc;
2063  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064  } else if (al->fb == omp_atv_abort_fb) {
2065  KMP_ASSERT(0); // abort fallback requested
2066  } else if (al->fb == omp_atv_allocator_fb) {
2067  KMP_ASSERT(al != al->fb_data);
2068  al = al->fb_data;
2069  return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2070  }
2071  }
2072  }
2073  } else { // alloc membind not supported, use hwloc_alloc
2074  ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2075  }
2076  } else {
2077 #endif
2078  if (__kmp_memkind_available) {
2079  if (allocator < kmp_max_mem_alloc) {
2080  // pre-defined allocator
2081  if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2082  ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2083  } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2084  ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2085  } else {
2086  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2087  }
2088  } else if (al->pool_size > 0) {
2089  // custom allocator with pool size requested
2090  kmp_uint64 used =
2091  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2092  if (used + desc.size_a > al->pool_size) {
2093  // not enough space, need to go fallback path
2094  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2095  if (al->fb == omp_atv_default_mem_fb) {
2096  al = (kmp_allocator_t *)omp_default_mem_alloc;
2097  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2098  } else if (al->fb == omp_atv_abort_fb) {
2099  KMP_ASSERT(0); // abort fallback requested
2100  } else if (al->fb == omp_atv_allocator_fb) {
2101  KMP_ASSERT(al != al->fb_data);
2102  al = al->fb_data;
2103  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2104  if (is_pinned && kmp_target_lock_mem)
2105  kmp_target_lock_mem(ptr, size, default_device);
2106  return ptr;
2107  } // else ptr == NULL;
2108  } else {
2109  // pool has enough space
2110  ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2111  if (ptr == NULL) {
2112  if (al->fb == omp_atv_default_mem_fb) {
2113  al = (kmp_allocator_t *)omp_default_mem_alloc;
2114  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2115  } else if (al->fb == omp_atv_abort_fb) {
2116  KMP_ASSERT(0); // abort fallback requested
2117  } else if (al->fb == omp_atv_allocator_fb) {
2118  KMP_ASSERT(al != al->fb_data);
2119  al = al->fb_data;
2120  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2121  if (is_pinned && kmp_target_lock_mem)
2122  kmp_target_lock_mem(ptr, size, default_device);
2123  return ptr;
2124  }
2125  }
2126  }
2127  } else {
2128  // custom allocator, pool size not requested
2129  ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2130  if (ptr == NULL) {
2131  if (al->fb == omp_atv_default_mem_fb) {
2132  al = (kmp_allocator_t *)omp_default_mem_alloc;
2133  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2134  } else if (al->fb == omp_atv_abort_fb) {
2135  KMP_ASSERT(0); // abort fallback requested
2136  } else if (al->fb == omp_atv_allocator_fb) {
2137  KMP_ASSERT(al != al->fb_data);
2138  al = al->fb_data;
2139  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2140  if (is_pinned && kmp_target_lock_mem)
2141  kmp_target_lock_mem(ptr, size, default_device);
2142  return ptr;
2143  }
2144  }
2145  }
2146  } else if (allocator < kmp_max_mem_alloc) {
2147  // pre-defined allocator
2148  if (allocator == omp_high_bw_mem_alloc) {
2149  KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
2150  } else if (allocator == omp_large_cap_mem_alloc) {
2151  KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
2152  } else if (allocator == omp_const_mem_alloc) {
2153  KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
2154  } else if (allocator == omp_low_lat_mem_alloc) {
2155  KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
2156  } else if (allocator == omp_cgroup_mem_alloc) {
2157  KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
2158  } else if (allocator == omp_pteam_mem_alloc) {
2159  KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
2160  } else if (allocator == omp_thread_mem_alloc) {
2161  KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
2162  } else { // default allocator requested
2163  use_default_allocator = true;
2164  }
2165  if (use_default_allocator) {
2166  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2167  use_default_allocator = false;
2168  }
2169  } else if (al->pool_size > 0) {
2170  // custom allocator with pool size requested
2171  kmp_uint64 used =
2172  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2173  if (used + desc.size_a > al->pool_size) {
2174  // not enough space, need to go fallback path
2175  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2176  if (al->fb == omp_atv_default_mem_fb) {
2177  al = (kmp_allocator_t *)omp_default_mem_alloc;
2178  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2179  } else if (al->fb == omp_atv_abort_fb) {
2180  KMP_ASSERT(0); // abort fallback requested
2181  } else if (al->fb == omp_atv_allocator_fb) {
2182  KMP_ASSERT(al != al->fb_data);
2183  al = al->fb_data;
2184  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2185  if (is_pinned && kmp_target_lock_mem)
2186  kmp_target_lock_mem(ptr, size, default_device);
2187  return ptr;
2188  } // else ptr == NULL
2189  } else {
2190  // pool has enough space
2191  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2192  if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2193  KMP_ASSERT(0); // abort fallback requested
2194  } // no sense to look for another fallback because of same internal
2195  // alloc
2196  }
2197  } else {
2198  // custom allocator, pool size not requested
2199  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2200  if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2201  KMP_ASSERT(0); // abort fallback requested
2202  } // no sense to look for another fallback because of same internal alloc
2203  }
2204 #if KMP_USE_HWLOC
2205  }
2206 #endif
2207  KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2208  if (ptr == NULL)
2209  return NULL;
2210 
2211  if (is_pinned && kmp_target_lock_mem)
2212  kmp_target_lock_mem(ptr, desc.size_a, default_device);
2213 
2214  addr = (kmp_uintptr_t)ptr;
2215  addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2216  addr_descr = addr_align - sz_desc;
2217 
2218  desc.ptr_alloc = ptr;
2219  desc.ptr_align = (void *)addr_align;
2220  desc.allocator = al;
2221  *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
2222  KMP_MB();
2223 
2224  return desc.ptr_align;
2225 }
2226 
2227 void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
2228  omp_allocator_handle_t allocator) {
2229  void *ptr = NULL;
2230  kmp_allocator_t *al;
2231  KMP_DEBUG_ASSERT(__kmp_init_serial);
2232 
2233  if (allocator == omp_null_allocator)
2234  allocator = __kmp_threads[gtid]->th.th_def_allocator;
2235 
2236  al = RCAST(kmp_allocator_t *, allocator);
2237 
2238  if (nmemb == 0 || size == 0)
2239  return ptr;
2240 
2241  if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
2242  if (al->fb == omp_atv_abort_fb) {
2243  KMP_ASSERT(0);
2244  }
2245  return ptr;
2246  }
2247 
2248  ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2249 
2250  if (ptr) {
2251  memset(ptr, 0x00, nmemb * size);
2252  }
2253  return ptr;
2254 }
2255 
2256 void *__kmp_realloc(int gtid, void *ptr, size_t size,
2257  omp_allocator_handle_t allocator,
2258  omp_allocator_handle_t free_allocator) {
2259  void *nptr = NULL;
2260  KMP_DEBUG_ASSERT(__kmp_init_serial);
2261 
2262  if (size == 0) {
2263  if (ptr != NULL)
2264  ___kmpc_free(gtid, ptr, free_allocator);
2265  return nptr;
2266  }
2267 
2268  nptr = __kmp_alloc(gtid, 0, size, allocator);
2269 
2270  if (nptr != NULL && ptr != NULL) {
2271  kmp_mem_desc_t desc;
2272  kmp_uintptr_t addr_align; // address to return to caller
2273  kmp_uintptr_t addr_descr; // address of memory block descriptor
2274 
2275  addr_align = (kmp_uintptr_t)ptr;
2276  addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2277  desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2278 
2279  KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2280  KMP_DEBUG_ASSERT(desc.size_orig > 0);
2281  KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2282  KMP_MEMCPY((char *)nptr, (char *)ptr,
2283  (size_t)((size < desc.size_orig) ? size : desc.size_orig));
2284  }
2285 
2286  if (nptr != NULL) {
2287  ___kmpc_free(gtid, ptr, free_allocator);
2288  }
2289 
2290  return nptr;
2291 }
2292 
2293 void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
2294  if (ptr == NULL)
2295  return;
2296 
2297  kmp_allocator_t *al;
2298  omp_allocator_handle_t oal;
2299  al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2300  kmp_mem_desc_t desc;
2301  kmp_uintptr_t addr_align; // address to return to caller
2302  kmp_uintptr_t addr_descr; // address of memory block descriptor
2303 
2304  if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2305  __kmp_tgt_allocator.omp_free(ptr, allocator);
2306  return;
2307  }
2308 
2309  if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2310  (allocator > kmp_max_mem_alloc &&
2311  KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2312  kmp_int32 device =
2313  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2314  if (allocator == llvm_omp_target_host_mem_alloc) {
2315  kmp_target_free_host(ptr, device);
2316  } else if (allocator == llvm_omp_target_shared_mem_alloc) {
2317  kmp_target_free_shared(ptr, device);
2318  } else if (allocator == llvm_omp_target_device_mem_alloc) {
2319  kmp_target_free_device(ptr, device);
2320  }
2321  return;
2322  }
2323 
2324  addr_align = (kmp_uintptr_t)ptr;
2325  addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2326  desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2327 
2328  KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2329  if (allocator) {
2330  KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2331  }
2332  al = desc.allocator;
2333  oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
2334  KMP_DEBUG_ASSERT(al);
2335 
2336  if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2337  kmp_int32 device =
2338  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2339  kmp_target_unlock_mem(desc.ptr_alloc, device);
2340  }
2341 
2342 #if KMP_USE_HWLOC
2343  if (__kmp_hwloc_available) {
2344  if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2345  kmp_uint64 used =
2346  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2347  (void)used; // to suppress compiler warning
2348  KMP_DEBUG_ASSERT(used >= desc.size_a);
2349  }
2350  hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2351  } else {
2352 #endif
2353  if (__kmp_memkind_available) {
2354  if (oal < kmp_max_mem_alloc) {
2355  // pre-defined allocator
2356  if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2357  kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2358  } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2359  kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2360  } else {
2361  kmp_mk_free(*mk_default, desc.ptr_alloc);
2362  }
2363  } else {
2364  if (al->pool_size > 0) { // custom allocator with pool size requested
2365  kmp_uint64 used =
2366  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2367  (void)used; // to suppress compiler warning
2368  KMP_DEBUG_ASSERT(used >= desc.size_a);
2369  }
2370  kmp_mk_free(*al->memkind, desc.ptr_alloc);
2371  }
2372  } else {
2373  if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2374  kmp_uint64 used =
2375  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2376  (void)used; // to suppress compiler warning
2377  KMP_DEBUG_ASSERT(used >= desc.size_a);
2378  }
2379  __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2380  }
2381 #if KMP_USE_HWLOC
2382  }
2383 #endif
2384 }
2385 
2386 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
2387  memory leaks, but it may be useful for debugging memory corruptions, used
2388  freed pointers, etc. */
2389 /* #define LEAK_MEMORY */
2390 struct kmp_mem_descr { // Memory block descriptor.
2391  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
2392  size_t size_allocated; // Size of allocated memory block.
2393  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
2394  size_t size_aligned; // Size of aligned memory block.
2395 };
2396 typedef struct kmp_mem_descr kmp_mem_descr_t;
2397 
2398 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
2399  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2400  error. Must use __kmp_free when freeing memory allocated by this routine! */
2401 static void *___kmp_allocate_align(size_t size,
2402  size_t alignment KMP_SRC_LOC_DECL) {
2403  /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
2404  requested to return properly aligned pointer. Original pointer returned
2405  by malloc() and size of allocated block is saved in descriptor just
2406  before the aligned pointer. This information used by __kmp_free() -- it
2407  has to pass to free() original pointer, not aligned one.
2408 
2409  +---------+------------+-----------------------------------+---------+
2410  | padding | descriptor | aligned block | padding |
2411  +---------+------------+-----------------------------------+---------+
2412  ^ ^
2413  | |
2414  | +- Aligned pointer returned to caller
2415  +- Pointer returned by malloc()
2416 
2417  Aligned block is filled with zeros, paddings are filled with 0xEF. */
2418 
2419  kmp_mem_descr_t descr;
2420  kmp_uintptr_t addr_allocated; // Address returned by malloc().
2421  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
2422  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
2423 
2424  KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2425  (int)size, (int)alignment KMP_SRC_LOC_PARM));
2426 
2427  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
2428  KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
2429  // Make sure kmp_uintptr_t is enough to store addresses.
2430 
2431  descr.size_aligned = size;
2432  descr.size_allocated =
2433  descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
2434 
2435 #if KMP_DEBUG
2436  descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2437 #else
2438  descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2439 #endif
2440  KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
2441  descr.ptr_allocated));
2442  if (descr.ptr_allocated == NULL) {
2443  KMP_FATAL(OutOfHeapMemory);
2444  }
2445 
2446  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2447  addr_aligned =
2448  (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2449  addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
2450 
2451  descr.ptr_aligned = (void *)addr_aligned;
2452 
2453  KE_TRACE(26, (" ___kmp_allocate_align: "
2454  "ptr_allocated=%p, size_allocated=%d, "
2455  "ptr_aligned=%p, size_aligned=%d\n",
2456  descr.ptr_allocated, (int)descr.size_allocated,
2457  descr.ptr_aligned, (int)descr.size_aligned));
2458 
2459  KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2460  KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
2461  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2462  addr_allocated + descr.size_allocated);
2463  KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2464 #ifdef KMP_DEBUG
2465  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2466 // Fill allocated memory block with 0xEF.
2467 #endif
2468  memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2469  // Fill the aligned memory block (which is intended for using by caller) with
2470  // 0x00. Do not
2471  // put this filling under KMP_DEBUG condition! Many callers expect zeroed
2472  // memory. (Padding
2473  // bytes remain filled with 0xEF in debugging library.)
2474  *((kmp_mem_descr_t *)addr_descr) = descr;
2475 
2476  KMP_MB();
2477 
2478  KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2479  return descr.ptr_aligned;
2480 } // func ___kmp_allocate_align
2481 
2482 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
2483  Do not call this func directly! Use __kmp_allocate macro instead.
2484  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2485  error. Must use __kmp_free when freeing memory allocated by this routine! */
2486 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
2487  void *ptr;
2488  KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2489  (int)size KMP_SRC_LOC_PARM));
2490  ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2491  KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2492  return ptr;
2493 } // func ___kmp_allocate
2494 
2495 /* Allocate memory on page boundary, fill allocated memory with 0x00.
2496  Does not call this func directly! Use __kmp_page_allocate macro instead.
2497  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2498  error. Must use __kmp_free when freeing memory allocated by this routine! */
2499 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
2500  int page_size = 8 * 1024;
2501  void *ptr;
2502 
2503  KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2504  (int)size KMP_SRC_LOC_PARM));
2505  ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2506  KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2507  return ptr;
2508 } // ___kmp_page_allocate
2509 
2510 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2511  In debug mode, fill the memory block with 0xEF before call to free(). */
2512 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
2513  kmp_mem_descr_t descr;
2514 #if KMP_DEBUG
2515  kmp_uintptr_t addr_allocated; // Address returned by malloc().
2516  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2517 #endif
2518  KE_TRACE(25,
2519  ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2520  KMP_ASSERT(ptr != NULL);
2521 
2522  descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2523 
2524  KE_TRACE(26, (" __kmp_free: "
2525  "ptr_allocated=%p, size_allocated=%d, "
2526  "ptr_aligned=%p, size_aligned=%d\n",
2527  descr.ptr_allocated, (int)descr.size_allocated,
2528  descr.ptr_aligned, (int)descr.size_aligned));
2529 #if KMP_DEBUG
2530  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2531  addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2532  KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2533  KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2534  KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2535  KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2536  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2537  addr_allocated + descr.size_allocated);
2538  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2539 // Fill memory block with 0xEF, it helps catch using freed memory.
2540 #endif
2541 
2542 #ifndef LEAK_MEMORY
2543  KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2544 #ifdef KMP_DEBUG
2545  _free_src_loc(descr.ptr_allocated, _file_, _line_);
2546 #else
2547  free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2548 #endif
2549 #endif
2550  KMP_MB();
2551  KE_TRACE(25, ("<- __kmp_free() returns\n"));
2552 } // func ___kmp_free
2553 
2554 #if USE_FAST_MEMORY == 3
2555 // Allocate fast memory by first scanning the thread's free lists
2556 // If a chunk the right size exists, grab it off the free list.
2557 // Otherwise allocate normally using kmp_thread_malloc.
2558 
2559 // AC: How to choose the limit? Just get 16 for now...
2560 #define KMP_FREE_LIST_LIMIT 16
2561 
2562 // Always use 128 bytes for determining buckets for caching memory blocks
2563 #define DCACHE_LINE 128
2564 
2565 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2566  void *ptr;
2567  size_t num_lines, idx;
2568  int index;
2569  void *alloc_ptr;
2570  size_t alloc_size;
2571  kmp_mem_descr_t *descr;
2572 
2573  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2574  __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
2575 
2576  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2577  idx = num_lines - 1;
2578  KMP_DEBUG_ASSERT(idx >= 0);
2579  if (idx < 2) {
2580  index = 0; // idx is [ 0, 1 ], use first free list
2581  num_lines = 2; // 1, 2 cache lines or less than cache line
2582  } else if ((idx >>= 2) == 0) {
2583  index = 1; // idx is [ 2, 3 ], use second free list
2584  num_lines = 4; // 3, 4 cache lines
2585  } else if ((idx >>= 2) == 0) {
2586  index = 2; // idx is [ 4, 15 ], use third free list
2587  num_lines = 16; // 5, 6, ..., 16 cache lines
2588  } else if ((idx >>= 2) == 0) {
2589  index = 3; // idx is [ 16, 63 ], use fourth free list
2590  num_lines = 64; // 17, 18, ..., 64 cache lines
2591  } else {
2592  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2593  }
2594 
2595  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2596  if (ptr != NULL) {
2597  // pop the head of no-sync free list
2598  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2599  KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2600  sizeof(kmp_mem_descr_t)))
2601  ->ptr_aligned);
2602  goto end;
2603  }
2604  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2605  if (ptr != NULL) {
2606  // no-sync free list is empty, use sync free list (filled in by other
2607  // threads only)
2608  // pop the head of the sync free list, push NULL instead
2609  while (!KMP_COMPARE_AND_STORE_PTR(
2610  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2611  KMP_CPU_PAUSE();
2612  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2613  }
2614  // push the rest of chain into no-sync free list (can be NULL if there was
2615  // the only block)
2616  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2617  KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2618  sizeof(kmp_mem_descr_t)))
2619  ->ptr_aligned);
2620  goto end;
2621  }
2622 
2623 alloc_call:
2624  // haven't found block in the free lists, thus allocate it
2625  size = num_lines * DCACHE_LINE;
2626 
2627  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2628  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2629  "alloc_size %d\n",
2630  __kmp_gtid_from_thread(this_thr), alloc_size));
2631  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2632 
2633  // align ptr to DCACHE_LINE
2634  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2635  DCACHE_LINE) &
2636  ~(DCACHE_LINE - 1));
2637  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2638 
2639  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2640  // we don't need size_allocated
2641  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2642  // (it is already saved in bget buffer,
2643  // but we may want to use another allocator in future)
2644  descr->size_aligned = size;
2645 
2646 end:
2647  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2648  __kmp_gtid_from_thread(this_thr), ptr));
2649  return ptr;
2650 } // func __kmp_fast_allocate
2651 
2652 // Free fast memory and place it on the thread's free list if it is of
2653 // the correct size.
2654 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2655  kmp_mem_descr_t *descr;
2656  kmp_info_t *alloc_thr;
2657  size_t size;
2658  size_t idx;
2659  int index;
2660 
2661  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2662  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2663  KMP_ASSERT(ptr != NULL);
2664 
2665  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2666 
2667  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2668  (int)descr->size_aligned));
2669 
2670  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2671 
2672  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2673  if (idx == size) {
2674  index = 0; // 2 cache lines
2675  } else if ((idx <<= 1) == size) {
2676  index = 1; // 4 cache lines
2677  } else if ((idx <<= 2) == size) {
2678  index = 2; // 16 cache lines
2679  } else if ((idx <<= 2) == size) {
2680  index = 3; // 64 cache lines
2681  } else {
2682  KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2683  goto free_call; // 65 or more cache lines ( > 8KB )
2684  }
2685 
2686  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2687  if (alloc_thr == this_thr) {
2688  // push block to self no-sync free list, linking previous head (LIFO)
2689  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2690  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2691  } else {
2692  void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2693  if (head == NULL) {
2694  // Create new free list
2695  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2696  *((void **)ptr) = NULL; // mark the tail of the list
2697  descr->size_allocated = (size_t)1; // head of the list keeps its length
2698  } else {
2699  // need to check existed "other" list's owner thread and size of queue
2700  kmp_mem_descr_t *dsc =
2701  (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2702  // allocating thread, same for all queue nodes
2703  kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2704  size_t q_sz =
2705  dsc->size_allocated + 1; // new size in case we add current task
2706  if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2707  // we can add current task to "other" list, no sync needed
2708  *((void **)ptr) = head;
2709  descr->size_allocated = q_sz;
2710  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2711  } else {
2712  // either queue blocks owner is changing or size limit exceeded
2713  // return old queue to allocating thread (q_th) synchronously,
2714  // and start new list for alloc_thr's tasks
2715  void *old_ptr;
2716  void *tail = head;
2717  void *next = *((void **)head);
2718  while (next != NULL) {
2719  KMP_DEBUG_ASSERT(
2720  // queue size should decrease by 1 each step through the list
2721  ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2722  ->size_allocated +
2723  1 ==
2724  ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2725  ->size_allocated);
2726  tail = next; // remember tail node
2727  next = *((void **)next);
2728  }
2729  KMP_DEBUG_ASSERT(q_th != NULL);
2730  // push block to owner's sync free list
2731  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2732  /* the next pointer must be set before setting free_list to ptr to avoid
2733  exposing a broken list to other threads, even for an instant. */
2734  *((void **)tail) = old_ptr;
2735 
2736  while (!KMP_COMPARE_AND_STORE_PTR(
2737  &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2738  KMP_CPU_PAUSE();
2739  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2740  *((void **)tail) = old_ptr;
2741  }
2742 
2743  // start new list of not-selt tasks
2744  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2745  *((void **)ptr) = NULL;
2746  descr->size_allocated = (size_t)1; // head of queue keeps its length
2747  }
2748  }
2749  }
2750  goto end;
2751 
2752 free_call:
2753  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2754  __kmp_gtid_from_thread(this_thr), size));
2755  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2756  brel(this_thr, descr->ptr_allocated);
2757 
2758 end:
2759  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2760 
2761 } // func __kmp_fast_free
2762 
2763 // Initialize the thread free lists related to fast memory
2764 // Only do this when a thread is initially created.
2765 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2766  KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2767 
2768  memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
2769 }
2770 
2771 // Free the memory in the thread free lists related to fast memory
2772 // Only do this when a thread is being reaped (destroyed).
2773 void __kmp_free_fast_memory(kmp_info_t *th) {
2774  // Suppose we use BGET underlying allocator, walk through its structures...
2775  int bin;
2776  thr_data_t *thr = get_thr_data(th);
2777  void **lst = NULL;
2778 
2779  KE_TRACE(
2780  5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2781 
2782  __kmp_bget_dequeue(th); // Release any queued buffers
2783 
2784  // Dig through free lists and extract all allocated blocks
2785  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2786  bfhead_t *b = thr->freelist[bin].ql.flink;
2787  while (b != &thr->freelist[bin]) {
2788  if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2789  *((void **)b) =
2790  lst; // link the list (override bthr, but keep flink yet)
2791  lst = (void **)b; // push b into lst
2792  }
2793  b = b->ql.flink; // get next buffer
2794  }
2795  }
2796  while (lst != NULL) {
2797  void *next = *lst;
2798  KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2799  lst, next, th, __kmp_gtid_from_thread(th)));
2800  (*thr->relfcn)(lst);
2801 #if BufStats
2802  // count blocks to prevent problems in __kmp_finalize_bget()
2803  thr->numprel++; /* Nr of expansion block releases */
2804  thr->numpblk--; /* Total number of blocks */
2805 #endif
2806  lst = (void **)next;
2807  }
2808 
2809  KE_TRACE(
2810  5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2811 }
2812 
2813 #endif // USE_FAST_MEMORY
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
Definition: kmp_alloc.cpp:1329
void init()
Initialize interface with offload runtime.
Definition: kmp_alloc.cpp:1313
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
Definition: kmp_alloc.cpp:1322
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
Definition: kmp_alloc.cpp:1335
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
Definition: kmp_alloc.cpp:1445
omp_memspace_handle_t get(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Definition: kmp_alloc.cpp:1364
kmp_memspace_t * find(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Find memory space that matches the provided input.
Definition: kmp_alloc.cpp:1350
void init()
Initialize memory space list.
Definition: kmp_alloc.cpp:1395
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
Definition: kmp_alloc.cpp:1409
void fini()
Release resources for the memory space list.
Definition: kmp_alloc.cpp:1397
Memory allocator information is shared with offload runtime.
Definition: kmp.h:1133
Memory space informaition is shared with offload runtime.
Definition: kmp.h:1125