LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_io.h"
15 #include "kmp_wrapper_malloc.h"
16 
17 // Disable bget when it is not used
18 #if KMP_USE_BGET
19 
20 /* Thread private buffer management code */
21 
22 typedef int (*bget_compact_t)(size_t, int);
23 typedef void *(*bget_acquire_t)(size_t);
24 typedef void (*bget_release_t)(void *);
25 
26 /* NOTE: bufsize must be a signed datatype */
27 
28 #if KMP_OS_WINDOWS
29 #if KMP_ARCH_X86 || KMP_ARCH_ARM
30 typedef kmp_int32 bufsize;
31 #else
32 typedef kmp_int64 bufsize;
33 #endif
34 #else
35 typedef ssize_t bufsize;
36 #endif // KMP_OS_WINDOWS
37 
38 /* The three modes of operation are, fifo search, lifo search, and best-fit */
39 
40 typedef enum bget_mode {
41  bget_mode_fifo = 0,
42  bget_mode_lifo = 1,
43  bget_mode_best = 2
44 } bget_mode_t;
45 
46 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
47 static void *bget(kmp_info_t *th, bufsize size);
48 static void *bgetz(kmp_info_t *th, bufsize size);
49 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
50 static void brel(kmp_info_t *th, void *buf);
51 static void bectl(kmp_info_t *th, bget_compact_t compact,
52  bget_acquire_t acquire, bget_release_t release,
53  bufsize pool_incr);
54 
55 /* BGET CONFIGURATION */
56 /* Buffer allocation size quantum: all buffers allocated are a
57  multiple of this size. This MUST be a power of two. */
58 
59 /* On IA-32 architecture with Linux* OS, malloc() does not
60  ensure 16 byte alignment */
61 
62 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
63 
64 #define SizeQuant 8
65 #define AlignType double
66 
67 #else
68 
69 #define SizeQuant 16
70 #define AlignType _Quad
71 
72 #endif
73 
74 // Define this symbol to enable the bstats() function which calculates the
75 // total free space in the buffer pool, the largest available buffer, and the
76 // total space currently allocated.
77 #define BufStats 1
78 
79 #ifdef KMP_DEBUG
80 
81 // Define this symbol to enable the bpoold() function which dumps the buffers
82 // in a buffer pool.
83 #define BufDump 1
84 
85 // Define this symbol to enable the bpoolv() function for validating a buffer
86 // pool.
87 #define BufValid 1
88 
89 // Define this symbol to enable the bufdump() function which allows dumping the
90 // contents of an allocated or free buffer.
91 #define DumpData 1
92 
93 #ifdef NOT_USED_NOW
94 
95 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
96 // who attempt to use pointers into released buffers.
97 #define FreeWipe 1
98 
99 // Use a best fit algorithm when searching for space for an allocation request.
100 // This uses memory more efficiently, but allocation will be much slower.
101 #define BestFit 1
102 
103 #endif /* NOT_USED_NOW */
104 #endif /* KMP_DEBUG */
105 
106 static bufsize bget_bin_size[] = {
107  0,
108  // 1 << 6, /* .5 Cache line */
109  1 << 7, /* 1 Cache line, new */
110  1 << 8, /* 2 Cache lines */
111  1 << 9, /* 4 Cache lines, new */
112  1 << 10, /* 8 Cache lines */
113  1 << 11, /* 16 Cache lines, new */
114  1 << 12, 1 << 13, /* new */
115  1 << 14, 1 << 15, /* new */
116  1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
117  1 << 21, /* 2MB */
118  1 << 22, /* 4MB */
119  1 << 23, /* 8MB */
120  1 << 24, /* 16MB */
121  1 << 25, /* 32MB */
122 };
123 
124 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
125 
126 struct bfhead;
127 
128 // Declare the interface, including the requested buffer size type, bufsize.
129 
130 /* Queue links */
131 typedef struct qlinks {
132  struct bfhead *flink; /* Forward link */
133  struct bfhead *blink; /* Backward link */
134 } qlinks_t;
135 
136 /* Header in allocated and free buffers */
137 typedef struct bhead2 {
138  kmp_info_t *bthr; /* The thread which owns the buffer pool */
139  bufsize prevfree; /* Relative link back to previous free buffer in memory or
140  0 if previous buffer is allocated. */
141  bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
142 } bhead2_t;
143 
144 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
145 typedef union bhead {
146  KMP_ALIGN(SizeQuant)
147  AlignType b_align;
148  char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
149  bhead2_t bb;
150 } bhead_t;
151 #define BH(p) ((bhead_t *)(p))
152 
153 /* Header in directly allocated buffers (by acqfcn) */
154 typedef struct bdhead {
155  bufsize tsize; /* Total size, including overhead */
156  bhead_t bh; /* Common header */
157 } bdhead_t;
158 #define BDH(p) ((bdhead_t *)(p))
159 
160 /* Header in free buffers */
161 typedef struct bfhead {
162  bhead_t bh; /* Common allocated/free header */
163  qlinks_t ql; /* Links on free list */
164 } bfhead_t;
165 #define BFH(p) ((bfhead_t *)(p))
166 
167 typedef struct thr_data {
168  bfhead_t freelist[MAX_BGET_BINS];
169 #if BufStats
170  size_t totalloc; /* Total space currently allocated */
171  long numget, numrel; /* Number of bget() and brel() calls */
172  long numpblk; /* Number of pool blocks */
173  long numpget, numprel; /* Number of block gets and rels */
174  long numdget, numdrel; /* Number of direct gets and rels */
175 #endif /* BufStats */
176 
177  /* Automatic expansion block management functions */
178  bget_compact_t compfcn;
179  bget_acquire_t acqfcn;
180  bget_release_t relfcn;
181 
182  bget_mode_t mode; /* what allocation mode to use? */
183 
184  bufsize exp_incr; /* Expansion block size */
185  bufsize pool_len; /* 0: no bpool calls have been made
186  -1: not all pool blocks are the same size
187  >0: (common) block size for all bpool calls made so far
188  */
189  bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
190 } thr_data_t;
191 
192 /* Minimum allocation quantum: */
193 #define QLSize (sizeof(qlinks_t))
194 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
195 #define MaxSize \
196  (bufsize)( \
197  ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
198 // Maximum for the requested size.
199 
200 /* End sentinel: value placed in bsize field of dummy block delimiting
201  end of pool block. The most negative number which will fit in a
202  bufsize, defined in a way that the compiler will accept. */
203 
204 #define ESent \
205  ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
206 
207 /* Thread Data management routines */
208 static int bget_get_bin(bufsize size) {
209  // binary chop bins
210  int lo = 0, hi = MAX_BGET_BINS - 1;
211 
212  KMP_DEBUG_ASSERT(size > 0);
213 
214  while ((hi - lo) > 1) {
215  int mid = (lo + hi) >> 1;
216  if (size < bget_bin_size[mid])
217  hi = mid - 1;
218  else
219  lo = mid;
220  }
221 
222  KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
223 
224  return lo;
225 }
226 
227 static void set_thr_data(kmp_info_t *th) {
228  int i;
229  thr_data_t *data;
230 
231  data = (thr_data_t *)((!th->th.th_local.bget_data)
232  ? __kmp_allocate(sizeof(*data))
233  : th->th.th_local.bget_data);
234 
235  memset(data, '\0', sizeof(*data));
236 
237  for (i = 0; i < MAX_BGET_BINS; ++i) {
238  data->freelist[i].ql.flink = &data->freelist[i];
239  data->freelist[i].ql.blink = &data->freelist[i];
240  }
241 
242  th->th.th_local.bget_data = data;
243  th->th.th_local.bget_list = 0;
244 #if !USE_CMP_XCHG_FOR_BGET
245 #ifdef USE_QUEUING_LOCK_FOR_BGET
246  __kmp_init_lock(&th->th.th_local.bget_lock);
247 #else
248  __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
249 #endif /* USE_LOCK_FOR_BGET */
250 #endif /* ! USE_CMP_XCHG_FOR_BGET */
251 }
252 
253 static thr_data_t *get_thr_data(kmp_info_t *th) {
254  thr_data_t *data;
255 
256  data = (thr_data_t *)th->th.th_local.bget_data;
257 
258  KMP_DEBUG_ASSERT(data != 0);
259 
260  return data;
261 }
262 
263 /* Walk the free list and release the enqueued buffers */
264 static void __kmp_bget_dequeue(kmp_info_t *th) {
265  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
266 
267  if (p != 0) {
268 #if USE_CMP_XCHG_FOR_BGET
269  {
270  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
271  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
272  CCAST(void *, old_value), nullptr)) {
273  KMP_CPU_PAUSE();
274  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
275  }
276  p = CCAST(void *, old_value);
277  }
278 #else /* ! USE_CMP_XCHG_FOR_BGET */
279 #ifdef USE_QUEUING_LOCK_FOR_BGET
280  __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
281 #else
282  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
283 #endif /* USE_QUEUING_LOCK_FOR_BGET */
284 
285  p = (void *)th->th.th_local.bget_list;
286  th->th.th_local.bget_list = 0;
287 
288 #ifdef USE_QUEUING_LOCK_FOR_BGET
289  __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
290 #else
291  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
292 #endif
293 #endif /* USE_CMP_XCHG_FOR_BGET */
294 
295  /* Check again to make sure the list is not empty */
296  while (p != 0) {
297  void *buf = p;
298  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
299 
300  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
301  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
302  (kmp_uintptr_t)th); // clear possible mark
303  KMP_DEBUG_ASSERT(b->ql.blink == 0);
304 
305  p = (void *)b->ql.flink;
306 
307  brel(th, buf);
308  }
309  }
310 }
311 
312 /* Chain together the free buffers by using the thread owner field */
313 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
314 #ifdef USE_QUEUING_LOCK_FOR_BGET
315  ,
316  kmp_int32 rel_gtid
317 #endif
318 ) {
319  bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
320 
321  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
323  (kmp_uintptr_t)th); // clear possible mark
324 
325  b->ql.blink = 0;
326 
327  KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
328  __kmp_gtid_from_thread(th)));
329 
330 #if USE_CMP_XCHG_FOR_BGET
331  {
332  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
333  /* the next pointer must be set before setting bget_list to buf to avoid
334  exposing a broken list to other threads, even for an instant. */
335  b->ql.flink = BFH(CCAST(void *, old_value));
336 
337  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338  CCAST(void *, old_value), buf)) {
339  KMP_CPU_PAUSE();
340  old_value = TCR_PTR(th->th.th_local.bget_list);
341  /* the next pointer must be set before setting bget_list to buf to avoid
342  exposing a broken list to other threads, even for an instant. */
343  b->ql.flink = BFH(CCAST(void *, old_value));
344  }
345  }
346 #else /* ! USE_CMP_XCHG_FOR_BGET */
347 #ifdef USE_QUEUING_LOCK_FOR_BGET
348  __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
349 #else
350  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
351 #endif
352 
353  b->ql.flink = BFH(th->th.th_local.bget_list);
354  th->th.th_local.bget_list = (void *)buf;
355 
356 #ifdef USE_QUEUING_LOCK_FOR_BGET
357  __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
358 #else
359  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
360 #endif
361 #endif /* USE_CMP_XCHG_FOR_BGET */
362 }
363 
364 /* insert buffer back onto a new freelist */
365 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
366  int bin;
367 
368  KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
369  KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
370 
371  bin = bget_get_bin(b->bh.bb.bsize);
372 
373  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
374  &thr->freelist[bin]);
375  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
376  &thr->freelist[bin]);
377 
378  b->ql.flink = &thr->freelist[bin];
379  b->ql.blink = thr->freelist[bin].ql.blink;
380 
381  thr->freelist[bin].ql.blink = b;
382  b->ql.blink->ql.flink = b;
383 }
384 
385 /* unlink the buffer from the old freelist */
386 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
387  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
388  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
389 
390  b->ql.blink->ql.flink = b->ql.flink;
391  b->ql.flink->ql.blink = b->ql.blink;
392 }
393 
394 /* GET STATS -- check info on free list */
395 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
396  thr_data_t *thr = get_thr_data(th);
397  int bin;
398 
399  *total_free = *max_free = 0;
400 
401  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
402  bfhead_t *b, *best;
403 
404  best = &thr->freelist[bin];
405  b = best->ql.flink;
406 
407  while (b != &thr->freelist[bin]) {
408  *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
409  if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
410  best = b;
411 
412  /* Link to next buffer */
413  b = b->ql.flink;
414  }
415 
416  if (*max_free < best->bh.bb.bsize)
417  *max_free = best->bh.bb.bsize;
418  }
419 
420  if (*max_free > (bufsize)sizeof(bhead_t))
421  *max_free -= sizeof(bhead_t);
422 }
423 
424 /* BGET -- Allocate a buffer. */
425 static void *bget(kmp_info_t *th, bufsize requested_size) {
426  thr_data_t *thr = get_thr_data(th);
427  bufsize size = requested_size;
428  bfhead_t *b;
429  void *buf;
430  int compactseq = 0;
431  int use_blink = 0;
432  /* For BestFit */
433  bfhead_t *best;
434 
435  if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
436  return NULL;
437  }
438 
439  __kmp_bget_dequeue(th); /* Release any queued buffers */
440 
441  if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
442  size = SizeQ;
443  }
444 #if defined(SizeQuant) && (SizeQuant > 1)
445  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
446 #endif
447 
448  size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
449  KMP_DEBUG_ASSERT(size >= 0);
450  KMP_DEBUG_ASSERT(size % SizeQuant == 0);
451 
452  use_blink = (thr->mode == bget_mode_lifo);
453 
454  /* If a compact function was provided in the call to bectl(), wrap
455  a loop around the allocation process to allow compaction to
456  intervene in case we don't find a suitable buffer in the chain. */
457 
458  for (;;) {
459  int bin;
460 
461  for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
462  /* Link to next buffer */
463  b = (use_blink ? thr->freelist[bin].ql.blink
464  : thr->freelist[bin].ql.flink);
465 
466  if (thr->mode == bget_mode_best) {
467  best = &thr->freelist[bin];
468 
469  /* Scan the free list searching for the first buffer big enough
470  to hold the requested size buffer. */
471  while (b != &thr->freelist[bin]) {
472  if (b->bh.bb.bsize >= (bufsize)size) {
473  if ((best == &thr->freelist[bin]) ||
474  (b->bh.bb.bsize < best->bh.bb.bsize)) {
475  best = b;
476  }
477  }
478 
479  /* Link to next buffer */
480  b = (use_blink ? b->ql.blink : b->ql.flink);
481  }
482  b = best;
483  }
484 
485  while (b != &thr->freelist[bin]) {
486  if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
487 
488  // Buffer is big enough to satisfy the request. Allocate it to the
489  // caller. We must decide whether the buffer is large enough to split
490  // into the part given to the caller and a free buffer that remains
491  // on the free list, or whether the entire buffer should be removed
492  // from the free list and given to the caller in its entirety. We
493  // only split the buffer if enough room remains for a header plus the
494  // minimum quantum of allocation.
495  if ((b->bh.bb.bsize - (bufsize)size) >
496  (bufsize)(SizeQ + (sizeof(bhead_t)))) {
497  bhead_t *ba, *bn;
498 
499  ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
500  bn = BH(((char *)ba) + size);
501 
502  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
503 
504  /* Subtract size from length of free block. */
505  b->bh.bb.bsize -= (bufsize)size;
506 
507  /* Link allocated buffer to the previous free buffer. */
508  ba->bb.prevfree = b->bh.bb.bsize;
509 
510  /* Plug negative size into user buffer. */
511  ba->bb.bsize = -size;
512 
513  /* Mark this buffer as owned by this thread. */
514  TCW_PTR(ba->bb.bthr,
515  th); // not an allocated address (do not mark it)
516  /* Mark buffer after this one not preceded by free block. */
517  bn->bb.prevfree = 0;
518 
519  // unlink buffer from old freelist, and reinsert into new freelist
520  __kmp_bget_remove_from_freelist(b);
521  __kmp_bget_insert_into_freelist(thr, b);
522 #if BufStats
523  thr->totalloc += (size_t)size;
524  thr->numget++; /* Increment number of bget() calls */
525 #endif
526  buf = (void *)((((char *)ba) + sizeof(bhead_t)));
527  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
528  return buf;
529  } else {
530  bhead_t *ba;
531 
532  ba = BH(((char *)b) + b->bh.bb.bsize);
533 
534  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
535 
536  /* The buffer isn't big enough to split. Give the whole
537  shebang to the caller and remove it from the free list. */
538 
539  __kmp_bget_remove_from_freelist(b);
540 #if BufStats
541  thr->totalloc += (size_t)b->bh.bb.bsize;
542  thr->numget++; /* Increment number of bget() calls */
543 #endif
544  /* Negate size to mark buffer allocated. */
545  b->bh.bb.bsize = -(b->bh.bb.bsize);
546 
547  /* Mark this buffer as owned by this thread. */
548  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
549  /* Zero the back pointer in the next buffer in memory
550  to indicate that this buffer is allocated. */
551  ba->bb.prevfree = 0;
552 
553  /* Give user buffer starting at queue links. */
554  buf = (void *)&(b->ql);
555  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556  return buf;
557  }
558  }
559 
560  /* Link to next buffer */
561  b = (use_blink ? b->ql.blink : b->ql.flink);
562  }
563  }
564 
565  /* We failed to find a buffer. If there's a compact function defined,
566  notify it of the size requested. If it returns TRUE, try the allocation
567  again. */
568 
569  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
570  break;
571  }
572  }
573 
574  /* No buffer available with requested size free. */
575 
576  /* Don't give up yet -- look in the reserve supply. */
577  if (thr->acqfcn != 0) {
578  if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
579  /* Request is too large to fit in a single expansion block.
580  Try to satisfy it by a direct buffer acquisition. */
581  bdhead_t *bdh;
582 
583  size += sizeof(bdhead_t) - sizeof(bhead_t);
584 
585  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
586 
587  /* richryan */
588  bdh = BDH((*thr->acqfcn)((bufsize)size));
589  if (bdh != NULL) {
590 
591  // Mark the buffer special by setting size field of its header to zero.
592  bdh->bh.bb.bsize = 0;
593 
594  /* Mark this buffer as owned by this thread. */
595  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
596  // because direct buffer never goes to free list
597  bdh->bh.bb.prevfree = 0;
598  bdh->tsize = size;
599 #if BufStats
600  thr->totalloc += (size_t)size;
601  thr->numget++; /* Increment number of bget() calls */
602  thr->numdget++; /* Direct bget() call count */
603 #endif
604  buf = (void *)(bdh + 1);
605  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
606  return buf;
607  }
608 
609  } else {
610 
611  /* Try to obtain a new expansion block */
612  void *newpool;
613 
614  KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
615 
616  /* richryan */
617  newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
618  KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
619  if (newpool != NULL) {
620  bpool(th, newpool, thr->exp_incr);
621  buf = bget(
622  th, requested_size); /* This can't, I say, can't get into a loop. */
623  return buf;
624  }
625  }
626  }
627 
628  /* Still no buffer available */
629 
630  return NULL;
631 }
632 
633 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
634  the entire contents of the buffer to zero, not just the
635  region requested by the caller. */
636 
637 static void *bgetz(kmp_info_t *th, bufsize size) {
638  char *buf = (char *)bget(th, size);
639 
640  if (buf != NULL) {
641  bhead_t *b;
642  bufsize rsize;
643 
644  b = BH(buf - sizeof(bhead_t));
645  rsize = -(b->bb.bsize);
646  if (rsize == 0) {
647  bdhead_t *bd;
648 
649  bd = BDH(buf - sizeof(bdhead_t));
650  rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
651  } else {
652  rsize -= sizeof(bhead_t);
653  }
654 
655  KMP_DEBUG_ASSERT(rsize >= size);
656 
657  (void)memset(buf, 0, (bufsize)rsize);
658  }
659  return ((void *)buf);
660 }
661 
662 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
663  simply in terms of brel() and bget(). It could be
664  enhanced to allow the buffer to grow into adjacent free
665  blocks and to avoid moving data unnecessarily. */
666 
667 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
668  void *nbuf;
669  bufsize osize; /* Old size of buffer */
670  bhead_t *b;
671 
672  nbuf = bget(th, size);
673  if (nbuf == NULL) { /* Acquire new buffer */
674  return NULL;
675  }
676  if (buf == NULL) {
677  return nbuf;
678  }
679  b = BH(((char *)buf) - sizeof(bhead_t));
680  osize = -b->bb.bsize;
681  if (osize == 0) {
682  /* Buffer acquired directly through acqfcn. */
683  bdhead_t *bd;
684 
685  bd = BDH(((char *)buf) - sizeof(bdhead_t));
686  osize = bd->tsize - (bufsize)sizeof(bdhead_t);
687  } else {
688  osize -= sizeof(bhead_t);
689  }
690 
691  KMP_DEBUG_ASSERT(osize > 0);
692 
693  (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
694  (size_t)((size < osize) ? size : osize));
695  brel(th, buf);
696 
697  return nbuf;
698 }
699 
700 /* BREL -- Release a buffer. */
701 static void brel(kmp_info_t *th, void *buf) {
702  thr_data_t *thr = get_thr_data(th);
703  bfhead_t *b, *bn;
704  kmp_info_t *bth;
705 
706  KMP_DEBUG_ASSERT(buf != NULL);
707  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
708 
709  b = BFH(((char *)buf) - sizeof(bhead_t));
710 
711  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
712  bdhead_t *bdh;
713 
714  bdh = BDH(((char *)buf) - sizeof(bdhead_t));
715  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
716 #if BufStats
717  thr->totalloc -= (size_t)bdh->tsize;
718  thr->numdrel++; /* Number of direct releases */
719  thr->numrel++; /* Increment number of brel() calls */
720 #endif /* BufStats */
721 #ifdef FreeWipe
722  (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
723 #endif /* FreeWipe */
724 
725  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
726 
727  KMP_DEBUG_ASSERT(thr->relfcn != 0);
728  (*thr->relfcn)((void *)bdh); /* Release it directly. */
729  return;
730  }
731 
732  bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
733  ~1); // clear possible mark before comparison
734  if (bth != th) {
735  /* Add this buffer to be released by the owning thread later */
736  __kmp_bget_enqueue(bth, buf
737 #ifdef USE_QUEUING_LOCK_FOR_BGET
738  ,
739  __kmp_gtid_from_thread(th)
740 #endif
741  );
742  return;
743  }
744 
745  /* Buffer size must be negative, indicating that the buffer is allocated. */
746  if (b->bh.bb.bsize >= 0) {
747  bn = NULL;
748  }
749  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
750 
751  /* Back pointer in next buffer must be zero, indicating the same thing: */
752 
753  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
754 
755 #if BufStats
756  thr->numrel++; /* Increment number of brel() calls */
757  thr->totalloc += (size_t)b->bh.bb.bsize;
758 #endif
759 
760  /* If the back link is nonzero, the previous buffer is free. */
761 
762  if (b->bh.bb.prevfree != 0) {
763  /* The previous buffer is free. Consolidate this buffer with it by adding
764  the length of this buffer to the previous free buffer. Note that we
765  subtract the size in the buffer being released, since it's negative to
766  indicate that the buffer is allocated. */
767  bufsize size = b->bh.bb.bsize;
768 
769  /* Make the previous buffer the one we're working on. */
770  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
771  b->bh.bb.prevfree);
772  b = BFH(((char *)b) - b->bh.bb.prevfree);
773  b->bh.bb.bsize -= size;
774 
775  /* unlink the buffer from the old freelist */
776  __kmp_bget_remove_from_freelist(b);
777  } else {
778  /* The previous buffer isn't allocated. Mark this buffer size as positive
779  (i.e. free) and fall through to place the buffer on the free list as an
780  isolated free block. */
781  b->bh.bb.bsize = -b->bh.bb.bsize;
782  }
783 
784  /* insert buffer back onto a new freelist */
785  __kmp_bget_insert_into_freelist(thr, b);
786 
787  /* Now we look at the next buffer in memory, located by advancing from
788  the start of this buffer by its size, to see if that buffer is
789  free. If it is, we combine this buffer with the next one in
790  memory, dechaining the second buffer from the free list. */
791  bn = BFH(((char *)b) + b->bh.bb.bsize);
792  if (bn->bh.bb.bsize > 0) {
793 
794  /* The buffer is free. Remove it from the free list and add
795  its size to that of our buffer. */
796  KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
797  bn->bh.bb.bsize);
798 
799  __kmp_bget_remove_from_freelist(bn);
800 
801  b->bh.bb.bsize += bn->bh.bb.bsize;
802 
803  /* unlink the buffer from the old freelist, and reinsert it into the new
804  * freelist */
805  __kmp_bget_remove_from_freelist(b);
806  __kmp_bget_insert_into_freelist(thr, b);
807 
808  /* Finally, advance to the buffer that follows the newly
809  consolidated free block. We must set its backpointer to the
810  head of the consolidated free block. We know the next block
811  must be an allocated block because the process of recombination
812  guarantees that two free blocks will never be contiguous in
813  memory. */
814  bn = BFH(((char *)b) + b->bh.bb.bsize);
815  }
816 #ifdef FreeWipe
817  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
818  (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
819 #endif
820  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
821 
822  /* The next buffer is allocated. Set the backpointer in it to point
823  to this buffer; the previous free buffer in memory. */
824 
825  bn->bh.bb.prevfree = b->bh.bb.bsize;
826 
827  /* If a block-release function is defined, and this free buffer
828  constitutes the entire block, release it. Note that pool_len
829  is defined in such a way that the test will fail unless all
830  pool blocks are the same size. */
831  if (thr->relfcn != 0 &&
832  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
833 #if BufStats
834  if (thr->numpblk !=
835  1) { /* Do not release the last buffer until finalization time */
836 #endif
837 
838  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
839  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
840  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
841  b->bh.bb.bsize);
842 
843  /* Unlink the buffer from the free list */
844  __kmp_bget_remove_from_freelist(b);
845 
846  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
847 
848  (*thr->relfcn)(b);
849 #if BufStats
850  thr->numprel++; /* Nr of expansion block releases */
851  thr->numpblk--; /* Total number of blocks */
852  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
853 
854  // avoid leaving stale last_pool pointer around if it is being dealloced
855  if (thr->last_pool == b)
856  thr->last_pool = 0;
857  } else {
858  thr->last_pool = b;
859  }
860 #endif /* BufStats */
861  }
862 }
863 
864 /* BECTL -- Establish automatic pool expansion control */
865 static void bectl(kmp_info_t *th, bget_compact_t compact,
866  bget_acquire_t acquire, bget_release_t release,
867  bufsize pool_incr) {
868  thr_data_t *thr = get_thr_data(th);
869 
870  thr->compfcn = compact;
871  thr->acqfcn = acquire;
872  thr->relfcn = release;
873  thr->exp_incr = pool_incr;
874 }
875 
876 /* BPOOL -- Add a region of memory to the buffer pool. */
877 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
878  /* int bin = 0; */
879  thr_data_t *thr = get_thr_data(th);
880  bfhead_t *b = BFH(buf);
881  bhead_t *bn;
882 
883  __kmp_bget_dequeue(th); /* Release any queued buffers */
884 
885 #ifdef SizeQuant
886  len &= ~((bufsize)(SizeQuant - 1));
887 #endif
888  if (thr->pool_len == 0) {
889  thr->pool_len = len;
890  } else if (len != thr->pool_len) {
891  thr->pool_len = -1;
892  }
893 #if BufStats
894  thr->numpget++; /* Number of block acquisitions */
895  thr->numpblk++; /* Number of blocks total */
896  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
897 #endif /* BufStats */
898 
899  /* Since the block is initially occupied by a single free buffer,
900  it had better not be (much) larger than the largest buffer
901  whose size we can store in bhead.bb.bsize. */
902  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
903 
904  /* Clear the backpointer at the start of the block to indicate that
905  there is no free block prior to this one. That blocks
906  recombination when the first block in memory is released. */
907  b->bh.bb.prevfree = 0;
908 
909  /* Create a dummy allocated buffer at the end of the pool. This dummy
910  buffer is seen when a buffer at the end of the pool is released and
911  blocks recombination of the last buffer with the dummy buffer at
912  the end. The length in the dummy buffer is set to the largest
913  negative number to denote the end of the pool for diagnostic
914  routines (this specific value is not counted on by the actual
915  allocation and release functions). */
916  len -= sizeof(bhead_t);
917  b->bh.bb.bsize = (bufsize)len;
918  /* Set the owner of this buffer */
919  TCW_PTR(b->bh.bb.bthr,
920  (kmp_info_t *)((kmp_uintptr_t)th |
921  1)); // mark the buffer as allocated address
922 
923  /* Chain the new block to the free list. */
924  __kmp_bget_insert_into_freelist(thr, b);
925 
926 #ifdef FreeWipe
927  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
928  (size_t)(len - sizeof(bfhead_t)));
929 #endif
930  bn = BH(((char *)b) + len);
931  bn->bb.prevfree = (bufsize)len;
932  /* Definition of ESent assumes two's complement! */
933  KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
934 
935  bn->bb.bsize = ESent;
936 }
937 
938 /* BFREED -- Dump the free lists for this thread. */
939 static void bfreed(kmp_info_t *th) {
940  int bin = 0, count = 0;
941  int gtid = __kmp_gtid_from_thread(th);
942  thr_data_t *thr = get_thr_data(th);
943 
944 #if BufStats
945  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
946  " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
947  " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
948  " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
949  " drel=%" KMP_INT64_SPEC "\n",
950  gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
951  (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
952  (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
953  (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
954 #endif
955 
956  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
957  bfhead_t *b;
958 
959  for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
960  b = b->ql.flink) {
961  bufsize bs = b->bh.bb.bsize;
962 
963  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
964  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
965  KMP_DEBUG_ASSERT(bs > 0);
966 
967  count += 1;
968 
969  __kmp_printf_no_lock(
970  "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
971  (long)bs);
972 #ifdef FreeWipe
973  {
974  char *lerr = ((char *)b) + sizeof(bfhead_t);
975  if ((bs > sizeof(bfhead_t)) &&
976  ((*lerr != 0x55) ||
977  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
978  0))) {
979  __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
980  "free block have been overstored.)\n",
981  gtid);
982  }
983  }
984 #endif
985  }
986  }
987 
988  if (count == 0)
989  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
990 }
991 
992 void __kmp_initialize_bget(kmp_info_t *th) {
993  KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
994 
995  set_thr_data(th);
996 
997  bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
998  (bufsize)__kmp_malloc_pool_incr);
999 }
1000 
1001 void __kmp_finalize_bget(kmp_info_t *th) {
1002  thr_data_t *thr;
1003  bfhead_t *b;
1004 
1005  KMP_DEBUG_ASSERT(th != 0);
1006 
1007 #if BufStats
1008  thr = (thr_data_t *)th->th.th_local.bget_data;
1009  KMP_DEBUG_ASSERT(thr != NULL);
1010  b = thr->last_pool;
1011 
1012  /* If a block-release function is defined, and this free buffer constitutes
1013  the entire block, release it. Note that pool_len is defined in such a way
1014  that the test will fail unless all pool blocks are the same size. */
1015 
1016  // Deallocate the last pool if one exists because we no longer do it in brel()
1017  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1018  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1019  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1020  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1021  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1022  b->bh.bb.bsize);
1023 
1024  /* Unlink the buffer from the free list */
1025  __kmp_bget_remove_from_freelist(b);
1026 
1027  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1028 
1029  (*thr->relfcn)(b);
1030  thr->numprel++; /* Nr of expansion block releases */
1031  thr->numpblk--; /* Total number of blocks */
1032  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1033  }
1034 #endif /* BufStats */
1035 
1036  /* Deallocate bget_data */
1037  if (th->th.th_local.bget_data != NULL) {
1038  __kmp_free(th->th.th_local.bget_data);
1039  th->th.th_local.bget_data = NULL;
1040  }
1041 }
1042 
1043 void kmpc_set_poolsize(size_t size) {
1044  bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1045  (bget_release_t)free, (bufsize)size);
1046 }
1047 
1048 size_t kmpc_get_poolsize(void) {
1049  thr_data_t *p;
1050 
1051  p = get_thr_data(__kmp_get_thread());
1052 
1053  return p->exp_incr;
1054 }
1055 
1056 void kmpc_set_poolmode(int mode) {
1057  thr_data_t *p;
1058 
1059  if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1060  mode == bget_mode_best) {
1061  p = get_thr_data(__kmp_get_thread());
1062  p->mode = (bget_mode_t)mode;
1063  }
1064 }
1065 
1066 int kmpc_get_poolmode(void) {
1067  thr_data_t *p;
1068 
1069  p = get_thr_data(__kmp_get_thread());
1070 
1071  return p->mode;
1072 }
1073 
1074 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1075  kmp_info_t *th = __kmp_get_thread();
1076  bufsize a, b;
1077 
1078  __kmp_bget_dequeue(th); /* Release any queued buffers */
1079 
1080  bcheck(th, &a, &b);
1081 
1082  *maxmem = a;
1083  *allmem = b;
1084 }
1085 
1086 void kmpc_poolprint(void) {
1087  kmp_info_t *th = __kmp_get_thread();
1088 
1089  __kmp_bget_dequeue(th); /* Release any queued buffers */
1090 
1091  bfreed(th);
1092 }
1093 
1094 #endif // #if KMP_USE_BGET
1095 
1096 void *kmpc_malloc(size_t size) {
1097  void *ptr;
1098  ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1099  if (ptr != NULL) {
1100  // save allocated pointer just before one returned to user
1101  *(void **)ptr = ptr;
1102  ptr = (void **)ptr + 1;
1103  }
1104  return ptr;
1105 }
1106 
1107 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1108 
1109 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1110  void *ptr;
1111  void *ptr_allocated;
1112  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1113  if (!IS_POWER_OF_TWO(alignment)) {
1114  // AC: do we need to issue a warning here?
1115  errno = EINVAL;
1116  return NULL;
1117  }
1118  size = size + sizeof(void *) + alignment;
1119  ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1120  if (ptr_allocated != NULL) {
1121  // save allocated pointer just before one returned to user
1122  ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1123  ~(alignment - 1));
1124  *((void **)ptr - 1) = ptr_allocated;
1125  } else {
1126  ptr = NULL;
1127  }
1128  return ptr;
1129 }
1130 
1131 void *kmpc_calloc(size_t nelem, size_t elsize) {
1132  void *ptr;
1133  ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1134  if (ptr != NULL) {
1135  // save allocated pointer just before one returned to user
1136  *(void **)ptr = ptr;
1137  ptr = (void **)ptr + 1;
1138  }
1139  return ptr;
1140 }
1141 
1142 void *kmpc_realloc(void *ptr, size_t size) {
1143  void *result = NULL;
1144  if (ptr == NULL) {
1145  // If pointer is NULL, realloc behaves like malloc.
1146  result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1147  // save allocated pointer just before one returned to user
1148  if (result != NULL) {
1149  *(void **)result = result;
1150  result = (void **)result + 1;
1151  }
1152  } else if (size == 0) {
1153  // If size is 0, realloc behaves like free.
1154  // The thread must be registered by the call to kmpc_malloc() or
1155  // kmpc_calloc() before.
1156  // So it should be safe to call __kmp_get_thread(), not
1157  // __kmp_entry_thread().
1158  KMP_ASSERT(*((void **)ptr - 1));
1159  brel(__kmp_get_thread(), *((void **)ptr - 1));
1160  } else {
1161  result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1162  (bufsize)(size + sizeof(ptr)));
1163  if (result != NULL) {
1164  *(void **)result = result;
1165  result = (void **)result + 1;
1166  }
1167  }
1168  return result;
1169 }
1170 
1171 // NOTE: the library must have already been initialized by a previous allocate
1172 void kmpc_free(void *ptr) {
1173  if (!__kmp_init_serial) {
1174  return;
1175  }
1176  if (ptr != NULL) {
1177  kmp_info_t *th = __kmp_get_thread();
1178  __kmp_bget_dequeue(th); /* Release any queued buffers */
1179  // extract allocated pointer and free it
1180  KMP_ASSERT(*((void **)ptr - 1));
1181  brel(th, *((void **)ptr - 1));
1182  }
1183 }
1184 
1185 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1186  void *ptr;
1187  KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1188  (int)size KMP_SRC_LOC_PARM));
1189  ptr = bget(th, (bufsize)size);
1190  KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1191  return ptr;
1192 }
1193 
1194 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1195  size_t elsize KMP_SRC_LOC_DECL) {
1196  void *ptr;
1197  KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1198  (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1199  ptr = bgetz(th, (bufsize)(nelem * elsize));
1200  KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1201  return ptr;
1202 }
1203 
1204 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1205  size_t size KMP_SRC_LOC_DECL) {
1206  KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1207  ptr, (int)size KMP_SRC_LOC_PARM));
1208  ptr = bgetr(th, ptr, (bufsize)size);
1209  KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1210  return ptr;
1211 }
1212 
1213 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1214  KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1215  ptr KMP_SRC_LOC_PARM));
1216  if (ptr != NULL) {
1217  __kmp_bget_dequeue(th); /* Release any queued buffers */
1218  brel(th, ptr);
1219  }
1220  KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1221 }
1222 
1223 /* OMP 5.0 Memory Management support */
1224 static const char *kmp_mk_lib_name;
1225 static void *h_memkind;
1226 /* memkind experimental API: */
1227 // memkind_alloc
1228 static void *(*kmp_mk_alloc)(void *k, size_t sz);
1229 // memkind_free
1230 static void (*kmp_mk_free)(void *kind, void *ptr);
1231 // memkind_check_available
1232 static int (*kmp_mk_check)(void *kind);
1233 // kinds we are going to use
1234 static void **mk_default;
1235 static void **mk_interleave;
1236 static void **mk_hbw;
1237 static void **mk_hbw_interleave;
1238 static void **mk_hbw_preferred;
1239 static void **mk_hugetlb;
1240 static void **mk_hbw_hugetlb;
1241 static void **mk_hbw_preferred_hugetlb;
1242 static void **mk_dax_kmem;
1243 static void **mk_dax_kmem_all;
1244 static void **mk_dax_kmem_preferred;
1245 static void *(*kmp_target_alloc_host)(size_t size, int device);
1246 static void *(*kmp_target_alloc_shared)(size_t size, int device);
1247 static void *(*kmp_target_alloc_device)(size_t size, int device);
1248 static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1249 static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1250 static void *(*kmp_target_free_host)(void *ptr, int device);
1251 static void *(*kmp_target_free_shared)(void *ptr, int device);
1252 static void *(*kmp_target_free_device)(void *ptr, int device);
1253 static bool __kmp_target_mem_available;
1254 #define KMP_IS_TARGET_MEM_SPACE(MS) \
1255  (MS == llvm_omp_target_host_mem_space || \
1256  MS == llvm_omp_target_shared_mem_space || \
1257  MS == llvm_omp_target_device_mem_space)
1258 #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1259  (MA == llvm_omp_target_host_mem_alloc || \
1260  MA == llvm_omp_target_shared_mem_alloc || \
1261  MA == llvm_omp_target_device_mem_alloc)
1262 
1263 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1264 static inline void chk_kind(void ***pkind) {
1265  KMP_DEBUG_ASSERT(pkind);
1266  if (*pkind) // symbol found
1267  if (kmp_mk_check(**pkind)) // kind not available or error
1268  *pkind = NULL;
1269 }
1270 #endif
1271 
1272 void __kmp_init_memkind() {
1273 // as of 2018-07-31 memkind does not support Windows*, exclude it for now
1274 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1275  // use of statically linked memkind is problematic, as it depends on libnuma
1276  kmp_mk_lib_name = "libmemkind.so";
1277  h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1278  if (h_memkind) {
1279  kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1280  kmp_mk_alloc =
1281  (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1282  kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1283  mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1284  if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1285  !kmp_mk_check(*mk_default)) {
1286  __kmp_memkind_available = 1;
1287  mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1288  chk_kind(&mk_interleave);
1289  mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1290  chk_kind(&mk_hbw);
1291  mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1292  chk_kind(&mk_hbw_interleave);
1293  mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1294  chk_kind(&mk_hbw_preferred);
1295  mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1296  chk_kind(&mk_hugetlb);
1297  mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1298  chk_kind(&mk_hbw_hugetlb);
1299  mk_hbw_preferred_hugetlb =
1300  (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1301  chk_kind(&mk_hbw_preferred_hugetlb);
1302  mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1303  chk_kind(&mk_dax_kmem);
1304  mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1305  chk_kind(&mk_dax_kmem_all);
1306  mk_dax_kmem_preferred =
1307  (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1308  chk_kind(&mk_dax_kmem_preferred);
1309  KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1310  return; // success
1311  }
1312  dlclose(h_memkind); // failure
1313  }
1314 #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1315  kmp_mk_lib_name = "";
1316 #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1317  h_memkind = NULL;
1318  kmp_mk_check = NULL;
1319  kmp_mk_alloc = NULL;
1320  kmp_mk_free = NULL;
1321  mk_default = NULL;
1322  mk_interleave = NULL;
1323  mk_hbw = NULL;
1324  mk_hbw_interleave = NULL;
1325  mk_hbw_preferred = NULL;
1326  mk_hugetlb = NULL;
1327  mk_hbw_hugetlb = NULL;
1328  mk_hbw_preferred_hugetlb = NULL;
1329  mk_dax_kmem = NULL;
1330  mk_dax_kmem_all = NULL;
1331  mk_dax_kmem_preferred = NULL;
1332 }
1333 
1334 void __kmp_fini_memkind() {
1335 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1336  if (__kmp_memkind_available)
1337  KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1338  if (h_memkind) {
1339  dlclose(h_memkind);
1340  h_memkind = NULL;
1341  }
1342  kmp_mk_check = NULL;
1343  kmp_mk_alloc = NULL;
1344  kmp_mk_free = NULL;
1345  mk_default = NULL;
1346  mk_interleave = NULL;
1347  mk_hbw = NULL;
1348  mk_hbw_interleave = NULL;
1349  mk_hbw_preferred = NULL;
1350  mk_hugetlb = NULL;
1351  mk_hbw_hugetlb = NULL;
1352  mk_hbw_preferred_hugetlb = NULL;
1353  mk_dax_kmem = NULL;
1354  mk_dax_kmem_all = NULL;
1355  mk_dax_kmem_preferred = NULL;
1356 #endif
1357 }
1358 
1359 void __kmp_init_target_mem() {
1360  *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1361  *(void **)(&kmp_target_alloc_shared) =
1362  KMP_DLSYM("llvm_omp_target_alloc_shared");
1363  *(void **)(&kmp_target_alloc_device) =
1364  KMP_DLSYM("llvm_omp_target_alloc_device");
1365  *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1366  *(void **)(&kmp_target_free_shared) =
1367  KMP_DLSYM("llvm_omp_target_free_shared");
1368  *(void **)(&kmp_target_free_device) =
1369  KMP_DLSYM("llvm_omp_target_free_device");
1370  __kmp_target_mem_available =
1371  kmp_target_alloc_host && kmp_target_alloc_shared &&
1372  kmp_target_alloc_device && kmp_target_free_host &&
1373  kmp_target_free_shared && kmp_target_free_device;
1374  // lock/pin and unlock/unpin target calls
1375  *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1376  *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1377 }
1378 
1379 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1380  int ntraits,
1381  omp_alloctrait_t traits[]) {
1382  // OpenMP 5.0 only allows predefined memspaces
1383  KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1384  ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1385  ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
1386  kmp_allocator_t *al;
1387  int i;
1388  al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1389  al->memspace = ms; // not used currently
1390  for (i = 0; i < ntraits; ++i) {
1391  switch (traits[i].key) {
1392  case omp_atk_sync_hint:
1393  case omp_atk_access:
1394  break;
1395  case omp_atk_pinned:
1396  al->pinned = true;
1397  break;
1398  case omp_atk_alignment:
1399  __kmp_type_convert(traits[i].value, &(al->alignment));
1400  KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1401  break;
1402  case omp_atk_pool_size:
1403  al->pool_size = traits[i].value;
1404  break;
1405  case omp_atk_fallback:
1406  al->fb = (omp_alloctrait_value_t)traits[i].value;
1407  KMP_DEBUG_ASSERT(
1408  al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1409  al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1410  break;
1411  case omp_atk_fb_data:
1412  al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1413  break;
1414  case omp_atk_partition:
1415  al->memkind = RCAST(void **, traits[i].value);
1416  break;
1417  default:
1418  KMP_ASSERT2(0, "Unexpected allocator trait");
1419  }
1420  }
1421  if (al->fb == 0) {
1422  // set default allocator
1423  al->fb = omp_atv_default_mem_fb;
1424  al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1425  } else if (al->fb == omp_atv_allocator_fb) {
1426  KMP_ASSERT(al->fb_data != NULL);
1427  } else if (al->fb == omp_atv_default_mem_fb) {
1428  al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1429  }
1430  if (__kmp_memkind_available) {
1431  // Let's use memkind library if available
1432  if (ms == omp_high_bw_mem_space) {
1433  if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1434  al->memkind = mk_hbw_interleave;
1435  } else if (mk_hbw_preferred) {
1436  // AC: do not try to use MEMKIND_HBW for now, because memkind library
1437  // cannot reliably detect exhaustion of HBW memory.
1438  // It could be possible using hbw_verify_memory_region() but memkind
1439  // manual says: "Using this function in production code may result in
1440  // serious performance penalty".
1441  al->memkind = mk_hbw_preferred;
1442  } else {
1443  // HBW is requested but not available --> return NULL allocator
1444  __kmp_free(al);
1445  return omp_null_allocator;
1446  }
1447  } else if (ms == omp_large_cap_mem_space) {
1448  if (mk_dax_kmem_all) {
1449  // All pmem nodes are visited
1450  al->memkind = mk_dax_kmem_all;
1451  } else if (mk_dax_kmem) {
1452  // Only closest pmem node is visited
1453  al->memkind = mk_dax_kmem;
1454  } else {
1455  __kmp_free(al);
1456  return omp_null_allocator;
1457  }
1458  } else {
1459  if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1460  al->memkind = mk_interleave;
1461  } else {
1462  al->memkind = mk_default;
1463  }
1464  }
1465  } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1466  __kmp_free(al);
1467  return omp_null_allocator;
1468  } else {
1469  if (ms == omp_high_bw_mem_space) {
1470  // cannot detect HBW memory presence without memkind library
1471  __kmp_free(al);
1472  return omp_null_allocator;
1473  }
1474  }
1475  return (omp_allocator_handle_t)al;
1476 }
1477 
1478 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1479  if (allocator > kmp_max_mem_alloc)
1480  __kmp_free(allocator);
1481 }
1482 
1483 void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1484  if (allocator == omp_null_allocator)
1485  allocator = omp_default_mem_alloc;
1486  __kmp_threads[gtid]->th.th_def_allocator = allocator;
1487 }
1488 
1489 omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
1490  return __kmp_threads[gtid]->th.th_def_allocator;
1491 }
1492 
1493 typedef struct kmp_mem_desc { // Memory block descriptor
1494  void *ptr_alloc; // Pointer returned by allocator
1495  size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1496  size_t size_orig; // Original size requested
1497  void *ptr_align; // Pointer to aligned memory, returned
1498  kmp_allocator_t *allocator; // allocator
1499 } kmp_mem_desc_t;
1500 static int alignment = sizeof(void *); // align to pointer size by default
1501 
1502 // external interfaces are wrappers over internal implementation
1503 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1504  KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1505  void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1506  KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1507  return ptr;
1508 }
1509 
1510 void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1511  omp_allocator_handle_t allocator) {
1512  KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1513  (int)size, allocator));
1514  void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1515  KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1516  return ptr;
1517 }
1518 
1519 void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1520  omp_allocator_handle_t allocator) {
1521  KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1522  (int)size, allocator));
1523  void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1524  KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1525  return ptr;
1526 }
1527 
1528 void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1529  omp_allocator_handle_t allocator,
1530  omp_allocator_handle_t free_allocator) {
1531  KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1532  allocator, free_allocator));
1533  void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1534  KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1535  return nptr;
1536 }
1537 
1538 void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1539  KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1540  ___kmpc_free(gtid, ptr, allocator);
1541  KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1542  return;
1543 }
1544 
1545 // internal implementation, called from inside the library
1546 void *__kmp_alloc(int gtid, size_t algn, size_t size,
1547  omp_allocator_handle_t allocator) {
1548  void *ptr = NULL;
1549  kmp_allocator_t *al;
1550  KMP_DEBUG_ASSERT(__kmp_init_serial);
1551  if (size == 0)
1552  return NULL;
1553  if (allocator == omp_null_allocator)
1554  allocator = __kmp_threads[gtid]->th.th_def_allocator;
1555  kmp_int32 default_device =
1556  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1557 
1558  al = RCAST(kmp_allocator_t *, allocator);
1559 
1560  int sz_desc = sizeof(kmp_mem_desc_t);
1561  kmp_mem_desc_t desc;
1562  kmp_uintptr_t addr; // address returned by allocator
1563  kmp_uintptr_t addr_align; // address to return to caller
1564  kmp_uintptr_t addr_descr; // address of memory block descriptor
1565  size_t align = alignment; // default alignment
1566  if (allocator > kmp_max_mem_alloc && al->alignment > align)
1567  align = al->alignment; // alignment required by allocator trait
1568  if (align < algn)
1569  align = algn; // max of allocator trait, parameter and sizeof(void*)
1570  desc.size_orig = size;
1571  desc.size_a = size + sz_desc + align;
1572  bool is_pinned = false;
1573  if (allocator > kmp_max_mem_alloc)
1574  is_pinned = al->pinned;
1575 
1576  // Use default allocator if libmemkind is not available
1577  int use_default_allocator = (__kmp_memkind_available) ? false : true;
1578 
1579  if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1580  // Use size input directly as the memory may not be accessible on host.
1581  // Use default device for now.
1582  if (__kmp_target_mem_available) {
1583  kmp_int32 device =
1584  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1585  if (allocator == llvm_omp_target_host_mem_alloc)
1586  ptr = kmp_target_alloc_host(size, device);
1587  else if (allocator == llvm_omp_target_shared_mem_alloc)
1588  ptr = kmp_target_alloc_shared(size, device);
1589  else // allocator == llvm_omp_target_device_mem_alloc
1590  ptr = kmp_target_alloc_device(size, device);
1591  return ptr;
1592  } else {
1593  KMP_INFORM(TargetMemNotAvailable);
1594  }
1595  }
1596 
1597  if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1598  if (__kmp_target_mem_available) {
1599  kmp_int32 device =
1600  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1601  if (al->memspace == llvm_omp_target_host_mem_space)
1602  ptr = kmp_target_alloc_host(size, device);
1603  else if (al->memspace == llvm_omp_target_shared_mem_space)
1604  ptr = kmp_target_alloc_shared(size, device);
1605  else // al->memspace == llvm_omp_target_device_mem_space
1606  ptr = kmp_target_alloc_device(size, device);
1607  return ptr;
1608  } else {
1609  KMP_INFORM(TargetMemNotAvailable);
1610  }
1611  }
1612 
1613  if (__kmp_memkind_available) {
1614  if (allocator < kmp_max_mem_alloc) {
1615  // pre-defined allocator
1616  if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1617  ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1618  } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1619  ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
1620  } else {
1621  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1622  }
1623  } else if (al->pool_size > 0) {
1624  // custom allocator with pool size requested
1625  kmp_uint64 used =
1626  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1627  if (used + desc.size_a > al->pool_size) {
1628  // not enough space, need to go fallback path
1629  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1630  if (al->fb == omp_atv_default_mem_fb) {
1631  al = (kmp_allocator_t *)omp_default_mem_alloc;
1632  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1633  } else if (al->fb == omp_atv_abort_fb) {
1634  KMP_ASSERT(0); // abort fallback requested
1635  } else if (al->fb == omp_atv_allocator_fb) {
1636  KMP_ASSERT(al != al->fb_data);
1637  al = al->fb_data;
1638  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1639  if (is_pinned && kmp_target_lock_mem)
1640  kmp_target_lock_mem(ptr, size, default_device);
1641  return ptr;
1642  } // else ptr == NULL;
1643  } else {
1644  // pool has enough space
1645  ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1646  if (ptr == NULL) {
1647  if (al->fb == omp_atv_default_mem_fb) {
1648  al = (kmp_allocator_t *)omp_default_mem_alloc;
1649  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1650  } else if (al->fb == omp_atv_abort_fb) {
1651  KMP_ASSERT(0); // abort fallback requested
1652  } else if (al->fb == omp_atv_allocator_fb) {
1653  KMP_ASSERT(al != al->fb_data);
1654  al = al->fb_data;
1655  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1656  if (is_pinned && kmp_target_lock_mem)
1657  kmp_target_lock_mem(ptr, size, default_device);
1658  return ptr;
1659  }
1660  }
1661  }
1662  } else {
1663  // custom allocator, pool size not requested
1664  ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1665  if (ptr == NULL) {
1666  if (al->fb == omp_atv_default_mem_fb) {
1667  al = (kmp_allocator_t *)omp_default_mem_alloc;
1668  ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1669  } else if (al->fb == omp_atv_abort_fb) {
1670  KMP_ASSERT(0); // abort fallback requested
1671  } else if (al->fb == omp_atv_allocator_fb) {
1672  KMP_ASSERT(al != al->fb_data);
1673  al = al->fb_data;
1674  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1675  if (is_pinned && kmp_target_lock_mem)
1676  kmp_target_lock_mem(ptr, size, default_device);
1677  return ptr;
1678  }
1679  }
1680  }
1681  } else if (allocator < kmp_max_mem_alloc) {
1682  // pre-defined allocator
1683  if (allocator == omp_high_bw_mem_alloc) {
1684  KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
1685  } else if (allocator == omp_large_cap_mem_alloc) {
1686  KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
1687  } else if (allocator == omp_const_mem_alloc) {
1688  KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
1689  } else if (allocator == omp_low_lat_mem_alloc) {
1690  KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
1691  } else if (allocator == omp_cgroup_mem_alloc) {
1692  KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
1693  } else if (allocator == omp_pteam_mem_alloc) {
1694  KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
1695  } else if (allocator == omp_thread_mem_alloc) {
1696  KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
1697  } else { // default allocator requested
1698  use_default_allocator = true;
1699  }
1700  if (use_default_allocator) {
1701  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1702  use_default_allocator = false;
1703  }
1704  } else if (al->pool_size > 0) {
1705  // custom allocator with pool size requested
1706  kmp_uint64 used =
1707  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1708  if (used + desc.size_a > al->pool_size) {
1709  // not enough space, need to go fallback path
1710  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1711  if (al->fb == omp_atv_default_mem_fb) {
1712  al = (kmp_allocator_t *)omp_default_mem_alloc;
1713  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1714  } else if (al->fb == omp_atv_abort_fb) {
1715  KMP_ASSERT(0); // abort fallback requested
1716  } else if (al->fb == omp_atv_allocator_fb) {
1717  KMP_ASSERT(al != al->fb_data);
1718  al = al->fb_data;
1719  ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1720  if (is_pinned && kmp_target_lock_mem)
1721  kmp_target_lock_mem(ptr, size, default_device);
1722  return ptr;
1723  } // else ptr == NULL;
1724  } else {
1725  // pool has enough space
1726  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1727  if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1728  KMP_ASSERT(0); // abort fallback requested
1729  } // no sense to look for another fallback because of same internal alloc
1730  }
1731  } else {
1732  // custom allocator, pool size not requested
1733  ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1734  if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1735  KMP_ASSERT(0); // abort fallback requested
1736  } // no sense to look for another fallback because of same internal alloc
1737  }
1738  KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
1739  if (ptr == NULL)
1740  return NULL;
1741 
1742  if (is_pinned && kmp_target_lock_mem)
1743  kmp_target_lock_mem(ptr, desc.size_a, default_device);
1744 
1745  addr = (kmp_uintptr_t)ptr;
1746  addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
1747  addr_descr = addr_align - sz_desc;
1748 
1749  desc.ptr_alloc = ptr;
1750  desc.ptr_align = (void *)addr_align;
1751  desc.allocator = al;
1752  *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1753  KMP_MB();
1754 
1755  return desc.ptr_align;
1756 }
1757 
1758 void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
1759  omp_allocator_handle_t allocator) {
1760  void *ptr = NULL;
1761  kmp_allocator_t *al;
1762  KMP_DEBUG_ASSERT(__kmp_init_serial);
1763 
1764  if (allocator == omp_null_allocator)
1765  allocator = __kmp_threads[gtid]->th.th_def_allocator;
1766 
1767  al = RCAST(kmp_allocator_t *, allocator);
1768 
1769  if (nmemb == 0 || size == 0)
1770  return ptr;
1771 
1772  if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
1773  if (al->fb == omp_atv_abort_fb) {
1774  KMP_ASSERT(0);
1775  }
1776  return ptr;
1777  }
1778 
1779  ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1780 
1781  if (ptr) {
1782  memset(ptr, 0x00, nmemb * size);
1783  }
1784  return ptr;
1785 }
1786 
1787 void *__kmp_realloc(int gtid, void *ptr, size_t size,
1788  omp_allocator_handle_t allocator,
1789  omp_allocator_handle_t free_allocator) {
1790  void *nptr = NULL;
1791  KMP_DEBUG_ASSERT(__kmp_init_serial);
1792 
1793  if (size == 0) {
1794  if (ptr != NULL)
1795  ___kmpc_free(gtid, ptr, free_allocator);
1796  return nptr;
1797  }
1798 
1799  nptr = __kmp_alloc(gtid, 0, size, allocator);
1800 
1801  if (nptr != NULL && ptr != NULL) {
1802  kmp_mem_desc_t desc;
1803  kmp_uintptr_t addr_align; // address to return to caller
1804  kmp_uintptr_t addr_descr; // address of memory block descriptor
1805 
1806  addr_align = (kmp_uintptr_t)ptr;
1807  addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1808  desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1809 
1810  KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1811  KMP_DEBUG_ASSERT(desc.size_orig > 0);
1812  KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
1813  KMP_MEMCPY((char *)nptr, (char *)ptr,
1814  (size_t)((size < desc.size_orig) ? size : desc.size_orig));
1815  }
1816 
1817  if (nptr != NULL) {
1818  ___kmpc_free(gtid, ptr, free_allocator);
1819  }
1820 
1821  return nptr;
1822 }
1823 
1824 void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1825  if (ptr == NULL)
1826  return;
1827 
1828  kmp_allocator_t *al;
1829  omp_allocator_handle_t oal;
1830  al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
1831  kmp_mem_desc_t desc;
1832  kmp_uintptr_t addr_align; // address to return to caller
1833  kmp_uintptr_t addr_descr; // address of memory block descriptor
1834  if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1835  (allocator > kmp_max_mem_alloc &&
1836  KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
1837  kmp_int32 device =
1838  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1839  if (allocator == llvm_omp_target_host_mem_alloc) {
1840  kmp_target_free_host(ptr, device);
1841  } else if (allocator == llvm_omp_target_shared_mem_alloc) {
1842  kmp_target_free_shared(ptr, device);
1843  } else if (allocator == llvm_omp_target_device_mem_alloc) {
1844  kmp_target_free_device(ptr, device);
1845  }
1846  return;
1847  }
1848 
1849  addr_align = (kmp_uintptr_t)ptr;
1850  addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1851  desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1852 
1853  KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1854  if (allocator) {
1855  KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
1856  }
1857  al = desc.allocator;
1858  oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
1859  KMP_DEBUG_ASSERT(al);
1860 
1861  if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
1862  kmp_int32 device =
1863  __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1864  kmp_target_unlock_mem(desc.ptr_alloc, device);
1865  }
1866 
1867  if (__kmp_memkind_available) {
1868  if (oal < kmp_max_mem_alloc) {
1869  // pre-defined allocator
1870  if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1871  kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1872  } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1873  kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
1874  } else {
1875  kmp_mk_free(*mk_default, desc.ptr_alloc);
1876  }
1877  } else {
1878  if (al->pool_size > 0) { // custom allocator with pool size requested
1879  kmp_uint64 used =
1880  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1881  (void)used; // to suppress compiler warning
1882  KMP_DEBUG_ASSERT(used >= desc.size_a);
1883  }
1884  kmp_mk_free(*al->memkind, desc.ptr_alloc);
1885  }
1886  } else {
1887  if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1888  kmp_uint64 used =
1889  KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1890  (void)used; // to suppress compiler warning
1891  KMP_DEBUG_ASSERT(used >= desc.size_a);
1892  }
1893  __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
1894  }
1895 }
1896 
1897 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1898  memory leaks, but it may be useful for debugging memory corruptions, used
1899  freed pointers, etc. */
1900 /* #define LEAK_MEMORY */
1901 struct kmp_mem_descr { // Memory block descriptor.
1902  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1903  size_t size_allocated; // Size of allocated memory block.
1904  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1905  size_t size_aligned; // Size of aligned memory block.
1906 };
1907 typedef struct kmp_mem_descr kmp_mem_descr_t;
1908 
1909 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1910  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1911  error. Must use __kmp_free when freeing memory allocated by this routine! */
1912 static void *___kmp_allocate_align(size_t size,
1913  size_t alignment KMP_SRC_LOC_DECL) {
1914  /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1915  requested to return properly aligned pointer. Original pointer returned
1916  by malloc() and size of allocated block is saved in descriptor just
1917  before the aligned pointer. This information used by __kmp_free() -- it
1918  has to pass to free() original pointer, not aligned one.
1919 
1920  +---------+------------+-----------------------------------+---------+
1921  | padding | descriptor | aligned block | padding |
1922  +---------+------------+-----------------------------------+---------+
1923  ^ ^
1924  | |
1925  | +- Aligned pointer returned to caller
1926  +- Pointer returned by malloc()
1927 
1928  Aligned block is filled with zeros, paddings are filled with 0xEF. */
1929 
1930  kmp_mem_descr_t descr;
1931  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1932  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1933  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1934 
1935  KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1936  (int)size, (int)alignment KMP_SRC_LOC_PARM));
1937 
1938  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1939  KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1940  // Make sure kmp_uintptr_t is enough to store addresses.
1941 
1942  descr.size_aligned = size;
1943  descr.size_allocated =
1944  descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1945 
1946 #if KMP_DEBUG
1947  descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1948 #else
1949  descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1950 #endif
1951  KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1952  descr.ptr_allocated));
1953  if (descr.ptr_allocated == NULL) {
1954  KMP_FATAL(OutOfHeapMemory);
1955  }
1956 
1957  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1958  addr_aligned =
1959  (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1960  addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1961 
1962  descr.ptr_aligned = (void *)addr_aligned;
1963 
1964  KE_TRACE(26, (" ___kmp_allocate_align: "
1965  "ptr_allocated=%p, size_allocated=%d, "
1966  "ptr_aligned=%p, size_aligned=%d\n",
1967  descr.ptr_allocated, (int)descr.size_allocated,
1968  descr.ptr_aligned, (int)descr.size_aligned));
1969 
1970  KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1971  KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1972  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1973  addr_allocated + descr.size_allocated);
1974  KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1975 #ifdef KMP_DEBUG
1976  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1977 // Fill allocated memory block with 0xEF.
1978 #endif
1979  memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1980  // Fill the aligned memory block (which is intended for using by caller) with
1981  // 0x00. Do not
1982  // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1983  // memory. (Padding
1984  // bytes remain filled with 0xEF in debugging library.)
1985  *((kmp_mem_descr_t *)addr_descr) = descr;
1986 
1987  KMP_MB();
1988 
1989  KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1990  return descr.ptr_aligned;
1991 } // func ___kmp_allocate_align
1992 
1993 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1994  Do not call this func directly! Use __kmp_allocate macro instead.
1995  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1996  error. Must use __kmp_free when freeing memory allocated by this routine! */
1997 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1998  void *ptr;
1999  KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2000  (int)size KMP_SRC_LOC_PARM));
2001  ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2002  KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2003  return ptr;
2004 } // func ___kmp_allocate
2005 
2006 /* Allocate memory on page boundary, fill allocated memory with 0x00.
2007  Does not call this func directly! Use __kmp_page_allocate macro instead.
2008  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2009  error. Must use __kmp_free when freeing memory allocated by this routine! */
2010 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
2011  int page_size = 8 * 1024;
2012  void *ptr;
2013 
2014  KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2015  (int)size KMP_SRC_LOC_PARM));
2016  ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2017  KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2018  return ptr;
2019 } // ___kmp_page_allocate
2020 
2021 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2022  In debug mode, fill the memory block with 0xEF before call to free(). */
2023 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
2024  kmp_mem_descr_t descr;
2025 #if KMP_DEBUG
2026  kmp_uintptr_t addr_allocated; // Address returned by malloc().
2027  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2028 #endif
2029  KE_TRACE(25,
2030  ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2031  KMP_ASSERT(ptr != NULL);
2032 
2033  descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2034 
2035  KE_TRACE(26, (" __kmp_free: "
2036  "ptr_allocated=%p, size_allocated=%d, "
2037  "ptr_aligned=%p, size_aligned=%d\n",
2038  descr.ptr_allocated, (int)descr.size_allocated,
2039  descr.ptr_aligned, (int)descr.size_aligned));
2040 #if KMP_DEBUG
2041  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2042  addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2043  KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2044  KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2045  KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2046  KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2047  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2048  addr_allocated + descr.size_allocated);
2049  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2050 // Fill memory block with 0xEF, it helps catch using freed memory.
2051 #endif
2052 
2053 #ifndef LEAK_MEMORY
2054  KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2055 #ifdef KMP_DEBUG
2056  _free_src_loc(descr.ptr_allocated, _file_, _line_);
2057 #else
2058  free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2059 #endif
2060 #endif
2061  KMP_MB();
2062  KE_TRACE(25, ("<- __kmp_free() returns\n"));
2063 } // func ___kmp_free
2064 
2065 #if USE_FAST_MEMORY == 3
2066 // Allocate fast memory by first scanning the thread's free lists
2067 // If a chunk the right size exists, grab it off the free list.
2068 // Otherwise allocate normally using kmp_thread_malloc.
2069 
2070 // AC: How to choose the limit? Just get 16 for now...
2071 #define KMP_FREE_LIST_LIMIT 16
2072 
2073 // Always use 128 bytes for determining buckets for caching memory blocks
2074 #define DCACHE_LINE 128
2075 
2076 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2077  void *ptr;
2078  size_t num_lines, idx;
2079  int index;
2080  void *alloc_ptr;
2081  size_t alloc_size;
2082  kmp_mem_descr_t *descr;
2083 
2084  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2085  __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
2086 
2087  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2088  idx = num_lines - 1;
2089  KMP_DEBUG_ASSERT(idx >= 0);
2090  if (idx < 2) {
2091  index = 0; // idx is [ 0, 1 ], use first free list
2092  num_lines = 2; // 1, 2 cache lines or less than cache line
2093  } else if ((idx >>= 2) == 0) {
2094  index = 1; // idx is [ 2, 3 ], use second free list
2095  num_lines = 4; // 3, 4 cache lines
2096  } else if ((idx >>= 2) == 0) {
2097  index = 2; // idx is [ 4, 15 ], use third free list
2098  num_lines = 16; // 5, 6, ..., 16 cache lines
2099  } else if ((idx >>= 2) == 0) {
2100  index = 3; // idx is [ 16, 63 ], use fourth free list
2101  num_lines = 64; // 17, 18, ..., 64 cache lines
2102  } else {
2103  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2104  }
2105 
2106  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2107  if (ptr != NULL) {
2108  // pop the head of no-sync free list
2109  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2110  KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2111  sizeof(kmp_mem_descr_t)))
2112  ->ptr_aligned);
2113  goto end;
2114  }
2115  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2116  if (ptr != NULL) {
2117  // no-sync free list is empty, use sync free list (filled in by other
2118  // threads only)
2119  // pop the head of the sync free list, push NULL instead
2120  while (!KMP_COMPARE_AND_STORE_PTR(
2121  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2122  KMP_CPU_PAUSE();
2123  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2124  }
2125  // push the rest of chain into no-sync free list (can be NULL if there was
2126  // the only block)
2127  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2128  KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2129  sizeof(kmp_mem_descr_t)))
2130  ->ptr_aligned);
2131  goto end;
2132  }
2133 
2134 alloc_call:
2135  // haven't found block in the free lists, thus allocate it
2136  size = num_lines * DCACHE_LINE;
2137 
2138  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2139  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2140  "alloc_size %d\n",
2141  __kmp_gtid_from_thread(this_thr), alloc_size));
2142  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2143 
2144  // align ptr to DCACHE_LINE
2145  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2146  DCACHE_LINE) &
2147  ~(DCACHE_LINE - 1));
2148  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2149 
2150  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2151  // we don't need size_allocated
2152  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2153  // (it is already saved in bget buffer,
2154  // but we may want to use another allocator in future)
2155  descr->size_aligned = size;
2156 
2157 end:
2158  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2159  __kmp_gtid_from_thread(this_thr), ptr));
2160  return ptr;
2161 } // func __kmp_fast_allocate
2162 
2163 // Free fast memory and place it on the thread's free list if it is of
2164 // the correct size.
2165 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2166  kmp_mem_descr_t *descr;
2167  kmp_info_t *alloc_thr;
2168  size_t size;
2169  size_t idx;
2170  int index;
2171 
2172  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2173  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2174  KMP_ASSERT(ptr != NULL);
2175 
2176  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2177 
2178  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2179  (int)descr->size_aligned));
2180 
2181  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2182 
2183  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2184  if (idx == size) {
2185  index = 0; // 2 cache lines
2186  } else if ((idx <<= 1) == size) {
2187  index = 1; // 4 cache lines
2188  } else if ((idx <<= 2) == size) {
2189  index = 2; // 16 cache lines
2190  } else if ((idx <<= 2) == size) {
2191  index = 3; // 64 cache lines
2192  } else {
2193  KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2194  goto free_call; // 65 or more cache lines ( > 8KB )
2195  }
2196 
2197  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2198  if (alloc_thr == this_thr) {
2199  // push block to self no-sync free list, linking previous head (LIFO)
2200  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2201  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2202  } else {
2203  void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2204  if (head == NULL) {
2205  // Create new free list
2206  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2207  *((void **)ptr) = NULL; // mark the tail of the list
2208  descr->size_allocated = (size_t)1; // head of the list keeps its length
2209  } else {
2210  // need to check existed "other" list's owner thread and size of queue
2211  kmp_mem_descr_t *dsc =
2212  (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2213  // allocating thread, same for all queue nodes
2214  kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2215  size_t q_sz =
2216  dsc->size_allocated + 1; // new size in case we add current task
2217  if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2218  // we can add current task to "other" list, no sync needed
2219  *((void **)ptr) = head;
2220  descr->size_allocated = q_sz;
2221  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2222  } else {
2223  // either queue blocks owner is changing or size limit exceeded
2224  // return old queue to allocating thread (q_th) synchronously,
2225  // and start new list for alloc_thr's tasks
2226  void *old_ptr;
2227  void *tail = head;
2228  void *next = *((void **)head);
2229  while (next != NULL) {
2230  KMP_DEBUG_ASSERT(
2231  // queue size should decrease by 1 each step through the list
2232  ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2233  ->size_allocated +
2234  1 ==
2235  ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2236  ->size_allocated);
2237  tail = next; // remember tail node
2238  next = *((void **)next);
2239  }
2240  KMP_DEBUG_ASSERT(q_th != NULL);
2241  // push block to owner's sync free list
2242  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2243  /* the next pointer must be set before setting free_list to ptr to avoid
2244  exposing a broken list to other threads, even for an instant. */
2245  *((void **)tail) = old_ptr;
2246 
2247  while (!KMP_COMPARE_AND_STORE_PTR(
2248  &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2249  KMP_CPU_PAUSE();
2250  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2251  *((void **)tail) = old_ptr;
2252  }
2253 
2254  // start new list of not-selt tasks
2255  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2256  *((void **)ptr) = NULL;
2257  descr->size_allocated = (size_t)1; // head of queue keeps its length
2258  }
2259  }
2260  }
2261  goto end;
2262 
2263 free_call:
2264  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2265  __kmp_gtid_from_thread(this_thr), size));
2266  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2267  brel(this_thr, descr->ptr_allocated);
2268 
2269 end:
2270  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2271 
2272 } // func __kmp_fast_free
2273 
2274 // Initialize the thread free lists related to fast memory
2275 // Only do this when a thread is initially created.
2276 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2277  KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2278 
2279  memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
2280 }
2281 
2282 // Free the memory in the thread free lists related to fast memory
2283 // Only do this when a thread is being reaped (destroyed).
2284 void __kmp_free_fast_memory(kmp_info_t *th) {
2285  // Suppose we use BGET underlying allocator, walk through its structures...
2286  int bin;
2287  thr_data_t *thr = get_thr_data(th);
2288  void **lst = NULL;
2289 
2290  KE_TRACE(
2291  5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2292 
2293  __kmp_bget_dequeue(th); // Release any queued buffers
2294 
2295  // Dig through free lists and extract all allocated blocks
2296  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2297  bfhead_t *b = thr->freelist[bin].ql.flink;
2298  while (b != &thr->freelist[bin]) {
2299  if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2300  *((void **)b) =
2301  lst; // link the list (override bthr, but keep flink yet)
2302  lst = (void **)b; // push b into lst
2303  }
2304  b = b->ql.flink; // get next buffer
2305  }
2306  }
2307  while (lst != NULL) {
2308  void *next = *lst;
2309  KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2310  lst, next, th, __kmp_gtid_from_thread(th)));
2311  (*thr->relfcn)(lst);
2312 #if BufStats
2313  // count blocks to prevent problems in __kmp_finalize_bget()
2314  thr->numprel++; /* Nr of expansion block releases */
2315  thr->numpblk--; /* Total number of blocks */
2316 #endif
2317  lst = (void **)next;
2318  }
2319 
2320  KE_TRACE(
2321  5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2322 }
2323 
2324 #endif // USE_FAST_MEMORY