15 #include "kmp_wrapper_malloc.h"
18 #if HWLOC_API_VERSION > 0x00020300
19 #define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20 #elif HWLOC_API_VERSION == 0x00020300
21 #define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
24 enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
36 typedef int (*bget_compact_t)(size_t, int);
37 typedef void *(*bget_acquire_t)(size_t);
38 typedef void (*bget_release_t)(
void *);
43 #if KMP_ARCH_X86 || KMP_ARCH_ARM
44 typedef kmp_int32 bufsize;
46 typedef kmp_int64 bufsize;
49 typedef ssize_t bufsize;
54 typedef enum bget_mode {
60 static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
61 static void *bget(kmp_info_t *th, bufsize size);
62 static void *bgetz(kmp_info_t *th, bufsize size);
63 static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
64 static void brel(kmp_info_t *th,
void *buf);
65 static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
76 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
79 #define AlignType double
84 #define AlignType _Quad
120 static bufsize bget_bin_size[] = {
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
138 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
145 typedef struct qlinks {
146 struct bfhead *flink;
147 struct bfhead *blink;
151 typedef struct bhead2 {
159 typedef union bhead {
162 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
165 #define BH(p) ((bhead_t *)(p))
168 typedef struct bdhead {
172 #define BDH(p) ((bdhead_t *)(p))
175 typedef struct bfhead {
179 #define BFH(p) ((bfhead_t *)(p))
181 typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
187 long numpget, numprel;
188 long numdget, numdrel;
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
207 #define QLSize (sizeof(qlinks_t))
208 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
222 static int bget_get_bin(bufsize size) {
224 int lo = 0, hi = MAX_BGET_BINS - 1;
226 KMP_DEBUG_ASSERT(size > 0);
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
241 static void set_thr_data(kmp_info_t *th) {
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(
sizeof(*data))
247 : th->th.th_local.bget_data);
249 memset(data,
'\0',
sizeof(*data));
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258 #if !USE_CMP_XCHG_FOR_BGET
259 #ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
267 static thr_data_t *get_thr_data(kmp_info_t *th) {
270 data = (thr_data_t *)th->th.th_local.bget_data;
272 KMP_DEBUG_ASSERT(data != 0);
278 static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
282 #if USE_CMP_XCHG_FOR_BGET
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(
void *, old_value),
nullptr)) {
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
290 p = CCAST(
void *, old_value);
293 #ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
299 p = (
void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
302 #ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
312 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
319 p = (
void *)b->ql.flink;
327 static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
328 #ifdef USE_QUEUING_LOCK_FOR_BGET
333 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
341 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
342 __kmp_gtid_from_thread(th)));
344 #if USE_CMP_XCHG_FOR_BGET
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
349 b->ql.flink = BFH(CCAST(
void *, old_value));
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(
void *, old_value), buf)) {
354 old_value = TCR_PTR(th->th.th_local.bget_list);
357 b->ql.flink = BFH(CCAST(
void *, old_value));
361 #ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (
void *)buf;
370 #ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
379 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
382 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
385 bin = bget_get_bin(b->bh.bb.bsize);
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
400 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
409 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
413 *total_free = *max_free = 0;
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
418 best = &thr->freelist[bin];
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
434 if (*max_free > (bufsize)
sizeof(bhead_t))
435 *max_free -=
sizeof(bhead_t);
439 static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
449 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
453 __kmp_bget_dequeue(th);
455 if (size < (bufsize)SizeQ) {
458 #if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
462 size +=
sizeof(bhead_t);
463 KMP_DEBUG_ASSERT(size >= 0);
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
466 use_blink = (thr->mode == bget_mode_lifo);
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
494 b = (use_blink ? b->ql.blink : b->ql.flink);
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
513 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((
char *)ba) + size);
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
519 b->bh.bb.bsize -= (bufsize)size;
522 ba->bb.prevfree = b->bh.bb.bsize;
525 ba->bb.bsize = -size;
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
537 thr->totalloc += (size_t)size;
540 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
546 ba = BH(((
char *)b) + b->bh.bb.bsize);
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
553 __kmp_bget_remove_from_freelist(b);
555 thr->totalloc += (size_t)b->bh.bb.bsize;
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
562 TCW_PTR(ba->bb.bthr, th);
568 buf = (
void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
575 b = (use_blink ? b->ql.blink : b->ql.flink);
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
597 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
599 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
606 bdh->bh.bb.bsize = 0;
609 TCW_PTR(bdh->bh.bb.bthr, th);
611 bdh->bh.bb.prevfree = 0;
614 thr->totalloc += (size_t)size;
618 buf = (
void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
628 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
651 static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (
char *)bget(th, size);
658 b = BH(buf -
sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
663 bd = BDH(buf -
sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
666 rsize -=
sizeof(bhead_t);
669 KMP_DEBUG_ASSERT(rsize >= size);
671 (void)memset(buf, 0, (bufsize)rsize);
673 return ((
void *)buf);
681 static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
686 nbuf = bget(th, size);
693 b = BH(((
char *)buf) -
sizeof(bhead_t));
694 osize = -b->bb.bsize;
699 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
702 osize -=
sizeof(bhead_t);
705 KMP_DEBUG_ASSERT(osize > 0);
707 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
708 (
size_t)((size < osize) ? size : osize));
715 static void brel(kmp_info_t *th,
void *buf) {
716 thr_data_t *thr = get_thr_data(th);
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
723 b = BFH(((
char *)buf) -
sizeof(bhead_t));
725 if (b->bh.bb.bsize == 0) {
728 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
731 thr->totalloc -= (size_t)bdh->tsize;
736 (
void)memset((
char *)buf, 0x55, (
size_t)(bdh->tsize -
sizeof(bdhead_t)));
739 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((
void *)bdh);
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
750 __kmp_bget_enqueue(bth, buf
751 #ifdef USE_QUEUING_LOCK_FOR_BGET
753 __kmp_gtid_from_thread(th)
760 if (b->bh.bb.bsize >= 0) {
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
767 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
771 thr->totalloc += (size_t)b->bh.bb.bsize;
776 if (b->bh.bb.prevfree != 0) {
781 bufsize size = b->bh.bb.bsize;
784 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
786 b = BFH(((
char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
790 __kmp_bget_remove_from_freelist(b);
795 b->bh.bb.bsize = -b->bh.bb.bsize;
799 __kmp_bget_insert_into_freelist(thr, b);
805 bn = BFH(((
char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
810 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
813 __kmp_bget_remove_from_freelist(bn);
815 b->bh.bb.bsize += bn->bh.bb.bsize;
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
828 bn = BFH(((
char *)b) + b->bh.bb.bsize);
831 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
832 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
858 __kmp_bget_remove_from_freelist(b);
860 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
869 if (thr->last_pool == b)
879 static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
882 thr_data_t *thr = get_thr_data(th);
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
891 static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
897 __kmp_bget_dequeue(th);
900 len &= ~((bufsize)(SizeQuant - 1));
902 if (thr->pool_len == 0) {
904 }
else if (len != thr->pool_len) {
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
916 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
921 b->bh.bb.prevfree = 0;
930 len -=
sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
938 __kmp_bget_insert_into_freelist(thr, b);
941 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
942 (
size_t)(len -
sizeof(bfhead_t)));
944 bn = BH(((
char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
949 bn->bb.bsize = ESent;
953 static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(th);
956 thr_data_t *thr = get_thr_data(th);
959 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC
"\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
975 bufsize bs = b->bh.bb.bsize;
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
983 __kmp_printf_no_lock(
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
988 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
989 if ((bs >
sizeof(bfhead_t)) &&
991 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
993 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
1003 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
1006 void __kmp_initialize_bget(kmp_info_t *th) {
1007 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012 (bufsize)__kmp_malloc_pool_incr);
1015 void __kmp_finalize_bget(kmp_info_t *th) {
1019 KMP_DEBUG_ASSERT(th != 0);
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1039 __kmp_bget_remove_from_freelist(b);
1041 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1057 void kmpc_set_poolsize(
size_t size) {
1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059 (bget_release_t)free, (bufsize)size);
1062 size_t kmpc_get_poolsize(
void) {
1065 p = get_thr_data(__kmp_get_thread());
1070 void kmpc_set_poolmode(
int mode) {
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1080 int kmpc_get_poolmode(
void) {
1083 p = get_thr_data(__kmp_get_thread());
1088 void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1089 kmp_info_t *th = __kmp_get_thread();
1092 __kmp_bget_dequeue(th);
1100 void kmpc_poolprint(
void) {
1101 kmp_info_t *th = __kmp_get_thread();
1103 __kmp_bget_dequeue(th);
1110 void *kmpc_malloc(
size_t size) {
1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1115 *(
void **)ptr = ptr;
1116 ptr = (
void **)ptr + 1;
1121 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1123 void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1127 if (!IS_POWER_OF_TWO(alignment)) {
1132 size = size +
sizeof(
void *) + alignment;
1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134 if (ptr_allocated != NULL) {
1136 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1138 *((
void **)ptr - 1) = ptr_allocated;
1145 void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1150 *(
void **)ptr = ptr;
1151 ptr = (
void **)ptr + 1;
1156 void *kmpc_realloc(
void *ptr,
size_t size) {
1157 void *result = NULL;
1160 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1162 if (result != NULL) {
1163 *(
void **)result = result;
1164 result = (
void **)result + 1;
1166 }
else if (size == 0) {
1172 KMP_ASSERT(*((
void **)ptr - 1));
1173 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1175 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1176 (bufsize)(size +
sizeof(ptr)));
1177 if (result != NULL) {
1178 *(
void **)result = result;
1179 result = (
void **)result + 1;
1186 void kmpc_free(
void *ptr) {
1187 if (!__kmp_init_serial) {
1191 kmp_info_t *th = __kmp_get_thread();
1192 __kmp_bget_dequeue(th);
1194 KMP_ASSERT(*((
void **)ptr - 1));
1195 brel(th, *((
void **)ptr - 1));
1199 void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1201 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (
int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, (bufsize)size);
1204 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1208 void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1211 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1218 void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (
int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, ptr, (bufsize)size);
1223 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1227 void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1228 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1231 __kmp_bget_dequeue(th);
1234 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1238 static const char *kmp_mk_lib_name;
1239 static void *h_memkind;
1242 static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1244 static void (*kmp_mk_free)(
void *kind,
void *ptr);
1246 static int (*kmp_mk_check)(
void *kind);
1248 static void **mk_default;
1249 static void **mk_interleave;
1250 static void **mk_hbw;
1251 static void **mk_hbw_interleave;
1252 static void **mk_hbw_preferred;
1253 static void **mk_hugetlb;
1254 static void **mk_hbw_hugetlb;
1255 static void **mk_hbw_preferred_hugetlb;
1256 static void **mk_dax_kmem;
1257 static void **mk_dax_kmem_all;
1258 static void **mk_dax_kmem_preferred;
1259 static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1260 static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1261 static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1262 static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1263 static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1264 static void *(*kmp_target_free_host)(
void *ptr,
int device);
1265 static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1266 static void *(*kmp_target_free_device)(
void *ptr,
int device);
1267 static bool __kmp_target_mem_available;
1269 #define KMP_IS_TARGET_MEM_SPACE(MS) \
1270 (MS == llvm_omp_target_host_mem_space || \
1271 MS == llvm_omp_target_shared_mem_space || \
1272 MS == llvm_omp_target_device_mem_space)
1274 #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1275 (MA == llvm_omp_target_host_mem_alloc || \
1276 MA == llvm_omp_target_shared_mem_alloc || \
1277 MA == llvm_omp_target_device_mem_alloc)
1279 #define KMP_IS_PREDEF_MEM_SPACE(MS) \
1280 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1281 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1282 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1283 KMP_IS_TARGET_MEM_SPACE(MS))
1302 bool supported =
false;
1303 using get_mem_resources_t = int (*)(int,
const int *, int,
1304 omp_memspace_handle_t,
int *);
1305 using omp_alloc_t =
void *(*)(
size_t, omp_allocator_handle_t);
1306 using omp_free_t = void (*)(
void *, omp_allocator_handle_t);
1307 get_mem_resources_t tgt_get_mem_resources =
nullptr;
1308 omp_alloc_t tgt_omp_alloc =
nullptr;
1309 omp_free_t tgt_omp_free =
nullptr;
1314 tgt_get_mem_resources =
1315 (get_mem_resources_t)KMP_DLSYM(
"__tgt_get_mem_resources");
1316 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM(
"__tgt_omp_alloc");
1317 tgt_omp_free = (omp_free_t)KMP_DLSYM(
"__tgt_omp_free");
1318 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1323 omp_memspace_handle_t memspace,
int *resources) {
1325 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1329 void *
omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
1331 return tgt_omp_alloc(size, allocator);
1335 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
1337 tgt_omp_free(ptr, allocator);
1339 } __kmp_tgt_allocator;
1341 extern "C" int omp_get_num_devices(
void);
1351 omp_memspace_handle_t memspace) {
1354 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1355 !memcmp(ms->resources, resources,
sizeof(
int) * num_resources))
1364 omp_memspace_handle_t
get(
int num_resources,
const int *resources,
1365 omp_memspace_handle_t memspace) {
1366 int gtid = __kmp_entry_gtid();
1367 __kmp_acquire_lock(&mtx, gtid);
1369 int *sorted_resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1370 KMP_MEMCPY(sorted_resources, resources, num_resources *
sizeof(
int));
1371 qsort(sorted_resources, (
size_t)num_resources,
sizeof(
int),
1372 [](
const void *a,
const void *b) {
1373 const int val_a = *(
const int *)a;
1374 const int val_b = *(
const int *)b;
1375 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1379 __kmp_free(sorted_resources);
1380 __kmp_release_lock(&mtx, gtid);
1384 ms->memspace = memspace;
1385 ms->num_resources = num_resources;
1386 ms->resources = sorted_resources;
1387 ms->next = memspace_list;
1389 __kmp_release_lock(&mtx, gtid);
1395 void init() { __kmp_init_lock(&mtx); }
1401 __kmp_free(ms->resources);
1406 __kmp_destroy_lock(&mtx);
1411 omp_memspace_handle_t memspace) {
1412 int actual_num_devices = num_devices;
1413 int *actual_devices =
const_cast<int *
>(devices);
1414 if (actual_num_devices == 0) {
1415 actual_num_devices = omp_get_num_devices();
1416 if (actual_num_devices <= 0)
1417 return omp_null_mem_space;
1419 if (actual_devices == NULL) {
1421 actual_devices = (
int *)__kmp_allocate(
sizeof(
int) * actual_num_devices);
1422 for (
int i = 0; i < actual_num_devices; i++)
1423 actual_devices[i] = i;
1427 actual_num_devices, actual_devices, host_access, memspace, NULL);
1428 if (num_resources <= 0)
1429 return omp_null_mem_space;
1431 omp_memspace_handle_t ms = omp_null_mem_space;
1432 if (num_resources > 0) {
1433 int *resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1436 actual_num_devices, actual_devices, host_access, memspace, resources);
1437 ms =
get(num_resources, resources, memspace);
1438 __kmp_free(resources);
1440 if (!devices && actual_devices)
1441 __kmp_free(actual_devices);
1445 omp_memspace_handle_t
get_memspace(
int num_resources,
const int *resources,
1446 omp_memspace_handle_t parent) {
1448 return get(num_resources, resources, ms->memspace);
1450 } __kmp_tgt_memspace_list;
1452 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1453 static inline void chk_kind(
void ***pkind) {
1454 KMP_DEBUG_ASSERT(pkind);
1456 if (kmp_mk_check(**pkind))
1461 void __kmp_init_memkind() {
1463 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1465 kmp_mk_lib_name =
"libmemkind.so";
1466 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1468 kmp_mk_check = (int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1470 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1471 kmp_mk_free = (void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1472 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1473 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1474 !kmp_mk_check(*mk_default)) {
1475 __kmp_memkind_available = 1;
1476 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1477 chk_kind(&mk_interleave);
1478 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1480 mk_hbw_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_INTERLEAVE");
1481 chk_kind(&mk_hbw_interleave);
1482 mk_hbw_preferred = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED");
1483 chk_kind(&mk_hbw_preferred);
1484 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1485 chk_kind(&mk_hugetlb);
1486 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1487 chk_kind(&mk_hbw_hugetlb);
1488 mk_hbw_preferred_hugetlb =
1489 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1490 chk_kind(&mk_hbw_preferred_hugetlb);
1491 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1492 chk_kind(&mk_dax_kmem);
1493 mk_dax_kmem_all = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_ALL");
1494 chk_kind(&mk_dax_kmem_all);
1495 mk_dax_kmem_preferred =
1496 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1497 chk_kind(&mk_dax_kmem_preferred);
1498 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1504 kmp_mk_lib_name =
"";
1507 kmp_mk_check = NULL;
1508 kmp_mk_alloc = NULL;
1511 mk_interleave = NULL;
1513 mk_hbw_interleave = NULL;
1514 mk_hbw_preferred = NULL;
1516 mk_hbw_hugetlb = NULL;
1517 mk_hbw_preferred_hugetlb = NULL;
1519 mk_dax_kmem_all = NULL;
1520 mk_dax_kmem_preferred = NULL;
1523 void __kmp_fini_memkind() {
1524 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1525 if (__kmp_memkind_available)
1526 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1531 kmp_mk_check = NULL;
1532 kmp_mk_alloc = NULL;
1535 mk_interleave = NULL;
1537 mk_hbw_interleave = NULL;
1538 mk_hbw_preferred = NULL;
1540 mk_hbw_hugetlb = NULL;
1541 mk_hbw_preferred_hugetlb = NULL;
1543 mk_dax_kmem_all = NULL;
1544 mk_dax_kmem_preferred = NULL;
1549 static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1550 #if HWLOC_API_VERSION >= 0x00020300
1551 const hwloc_topology_support *support;
1552 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1554 if (policy == HWLOC_MEMBIND_BIND)
1555 return (support->membind->alloc_membind &&
1556 support->membind->bind_membind);
1557 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1558 return (support->membind->alloc_membind &&
1559 support->membind->interleave_membind);
1567 void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr,
size_t size,
1568 hwloc_membind_policy_t policy) {
1569 #if HWLOC_API_VERSION >= 0x00020300
1572 struct hwloc_location initiator;
1578 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1579 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1581 hwloc_bitmap_free(mask);
1584 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1585 initiator.location.cpuset = mask;
1586 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1591 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1592 HWLOC_MEMBIND_BYNODESET);
1598 void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms,
size_t size,
1599 hwloc_membind_policy_t policy) {
1600 #if HWLOC_API_VERSION >= 0x00020300
1602 if (ms == omp_high_bw_mem_space) {
1603 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1604 }
else if (ms == omp_large_cap_mem_space) {
1605 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1607 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1616 void __kmp_init_target_mem() {
1617 *(
void **)(&kmp_target_alloc_host) = KMP_DLSYM(
"llvm_omp_target_alloc_host");
1618 *(
void **)(&kmp_target_alloc_shared) =
1619 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1620 *(
void **)(&kmp_target_alloc_device) =
1621 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1622 *(
void **)(&kmp_target_free_host) = KMP_DLSYM(
"llvm_omp_target_free_host");
1623 *(
void **)(&kmp_target_free_shared) =
1624 KMP_DLSYM(
"llvm_omp_target_free_shared");
1625 *(
void **)(&kmp_target_free_device) =
1626 KMP_DLSYM(
"llvm_omp_target_free_device");
1627 __kmp_target_mem_available =
1628 kmp_target_alloc_host && kmp_target_alloc_shared &&
1629 kmp_target_alloc_device && kmp_target_free_host &&
1630 kmp_target_free_shared && kmp_target_free_device;
1632 *(
void **)(&kmp_target_lock_mem) = KMP_DLSYM(
"llvm_omp_target_lock_mem");
1633 *(
void **)(&kmp_target_unlock_mem) = KMP_DLSYM(
"llvm_omp_target_unlock_mem");
1634 __kmp_tgt_allocator.
init();
1635 __kmp_tgt_memspace_list.
init();
1639 void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.
fini(); }
1641 omp_allocator_handle_t __kmpc_init_allocator(
int gtid, omp_memspace_handle_t ms,
1643 omp_alloctrait_t traits[]) {
1652 al->partition = omp_atv_environment;
1653 al->pin_device = -1;
1654 al->preferred_device = -1;
1655 al->target_access = omp_atv_single;
1656 al->atomic_scope = omp_atv_device;
1658 for (i = 0; i < ntraits; ++i) {
1659 switch (traits[i].key) {
1660 case omp_atk_sync_hint:
1661 case omp_atk_access:
1663 case omp_atk_pinned:
1666 case omp_atk_alignment:
1667 __kmp_type_convert(traits[i].value, &(al->alignment));
1668 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1670 case omp_atk_pool_size:
1671 al->pool_size = traits[i].value;
1673 case omp_atk_fallback:
1674 al->fb = (omp_alloctrait_value_t)traits[i].value;
1676 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1677 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1679 case omp_atk_fb_data:
1682 case omp_atk_partition:
1684 al->membind = (omp_alloctrait_value_t)traits[i].value;
1685 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1686 al->membind == omp_atv_nearest ||
1687 al->membind == omp_atv_blocked ||
1688 al->membind == omp_atv_interleaved);
1690 al->memkind = RCAST(
void **, traits[i].value);
1692 case omp_atk_pin_device:
1693 __kmp_type_convert(traits[i].value, &(al->pin_device));
1695 case omp_atk_preferred_device:
1696 __kmp_type_convert(traits[i].value, &(al->preferred_device));
1698 case omp_atk_target_access:
1699 al->target_access = (omp_alloctrait_value_t)traits[i].value;
1701 case omp_atk_atomic_scope:
1702 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1704 case omp_atk_part_size:
1705 __kmp_type_convert(traits[i].value, &(al->part_size));
1708 KMP_ASSERT2(0,
"Unexpected allocator trait");
1712 if (al->memspace > kmp_max_mem_space) {
1714 return (omp_allocator_handle_t)al;
1717 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1721 al->fb = omp_atv_default_mem_fb;
1723 }
else if (al->fb == omp_atv_allocator_fb) {
1724 KMP_ASSERT(al->fb_data != NULL);
1725 }
else if (al->fb == omp_atv_default_mem_fb) {
1728 if (__kmp_memkind_available) {
1730 if (ms == omp_high_bw_mem_space) {
1731 if (al->memkind == (
void *)omp_atv_interleaved && mk_hbw_interleave) {
1732 al->memkind = mk_hbw_interleave;
1733 }
else if (mk_hbw_preferred) {
1739 al->memkind = mk_hbw_preferred;
1743 return omp_null_allocator;
1745 }
else if (ms == omp_large_cap_mem_space) {
1746 if (mk_dax_kmem_all) {
1748 al->memkind = mk_dax_kmem_all;
1749 }
else if (mk_dax_kmem) {
1751 al->memkind = mk_dax_kmem;
1754 return omp_null_allocator;
1757 if (al->memkind == (
void *)omp_atv_interleaved && mk_interleave) {
1758 al->memkind = mk_interleave;
1760 al->memkind = mk_default;
1763 }
else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1765 return omp_null_allocator;
1767 if (!__kmp_hwloc_available &&
1768 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1771 return omp_null_allocator;
1774 return (omp_allocator_handle_t)al;
1777 void __kmpc_destroy_allocator(
int gtid, omp_allocator_handle_t allocator) {
1778 if (allocator > kmp_max_mem_alloc)
1779 __kmp_free(allocator);
1782 void __kmpc_set_default_allocator(
int gtid, omp_allocator_handle_t allocator) {
1783 if (allocator == omp_null_allocator)
1784 allocator = omp_default_mem_alloc;
1785 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1788 omp_allocator_handle_t __kmpc_get_default_allocator(
int gtid) {
1789 return __kmp_threads[gtid]->th.th_def_allocator;
1792 omp_memspace_handle_t __kmp_get_devices_memspace(
int ndevs,
const int *devs,
1793 omp_memspace_handle_t memspace,
1795 if (!__kmp_init_serial)
1796 __kmp_serial_initialize();
1798 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1799 return omp_null_mem_space;
1801 return __kmp_tgt_memspace_list.
get_memspace(ndevs, devs, host, memspace);
1804 omp_allocator_handle_t
1805 __kmp_get_devices_allocator(
int ndevs,
const int *devs,
1806 omp_memspace_handle_t memspace,
int host) {
1807 if (!__kmp_init_serial)
1808 __kmp_serial_initialize();
1810 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1811 return omp_null_allocator;
1813 omp_memspace_handle_t mspace =
1814 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1815 if (mspace == omp_null_mem_space)
1816 return omp_null_allocator;
1818 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1821 int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1822 if (!__kmp_init_serial)
1823 __kmp_serial_initialize();
1824 if (memspace == omp_null_mem_space)
1826 if (memspace < kmp_max_mem_space)
1829 return ms->num_resources;
1832 omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1833 int num_resources,
int *resources) {
1834 if (!__kmp_init_serial)
1835 __kmp_serial_initialize();
1836 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1839 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1840 return omp_null_mem_space;
1844 int *resources_abs = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1847 for (
int i = 0; i < num_resources; i++)
1848 resources_abs[i] = ms->resources[resources[i]];
1850 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.
get_memspace(
1851 num_resources, resources_abs, memspace);
1852 __kmp_free(resources_abs);
1857 typedef struct kmp_mem_desc {
1864 static int alignment =
sizeof(
void *);
1867 void *__kmpc_alloc(
int gtid,
size_t size, omp_allocator_handle_t allocator) {
1868 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1869 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1870 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1874 void *__kmpc_aligned_alloc(
int gtid,
size_t algn,
size_t size,
1875 omp_allocator_handle_t allocator) {
1876 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1877 (
int)size, allocator));
1878 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1879 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1883 void *__kmpc_calloc(
int gtid,
size_t nmemb,
size_t size,
1884 omp_allocator_handle_t allocator) {
1885 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1886 (
int)size, allocator));
1887 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1888 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1892 void *__kmpc_realloc(
int gtid,
void *ptr,
size_t size,
1893 omp_allocator_handle_t allocator,
1894 omp_allocator_handle_t free_allocator) {
1895 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)size,
1896 allocator, free_allocator));
1897 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1898 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1902 void __kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1903 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1904 ___kmpc_free(gtid, ptr, allocator);
1905 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1910 void *__kmp_alloc(
int gtid,
size_t algn,
size_t size,
1911 omp_allocator_handle_t allocator) {
1914 KMP_DEBUG_ASSERT(__kmp_init_serial);
1917 if (allocator == omp_null_allocator)
1918 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1919 kmp_int32 default_device =
1920 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1924 int sz_desc =
sizeof(kmp_mem_desc_t);
1925 kmp_mem_desc_t desc;
1927 kmp_uintptr_t addr_align;
1928 kmp_uintptr_t addr_descr;
1929 size_t align = alignment;
1930 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1931 align = al->alignment;
1934 desc.size_orig = size;
1935 desc.size_a = size + sz_desc + align;
1936 bool is_pinned =
false;
1937 if (allocator > kmp_max_mem_alloc)
1938 is_pinned = al->pinned;
1941 int use_default_allocator =
1942 (!__kmp_hwloc_available && !__kmp_memkind_available);
1944 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1946 return __kmp_tgt_allocator.
omp_alloc(size, allocator);
1949 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1952 if (__kmp_target_mem_available) {
1954 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1955 if (allocator == llvm_omp_target_host_mem_alloc)
1956 ptr = kmp_target_alloc_host(size, device);
1957 else if (allocator == llvm_omp_target_shared_mem_alloc)
1958 ptr = kmp_target_alloc_shared(size, device);
1960 ptr = kmp_target_alloc_device(size, device);
1963 KMP_INFORM(TargetMemNotAvailable);
1967 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1968 if (__kmp_target_mem_available) {
1970 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1971 if (al->memspace == llvm_omp_target_host_mem_space)
1972 ptr = kmp_target_alloc_host(size, device);
1973 else if (al->memspace == llvm_omp_target_shared_mem_space)
1974 ptr = kmp_target_alloc_shared(size, device);
1976 ptr = kmp_target_alloc_device(size, device);
1979 KMP_INFORM(TargetMemNotAvailable);
1984 if (__kmp_hwloc_available) {
1985 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1986 if (allocator < kmp_max_mem_alloc) {
1988 if (allocator == omp_high_bw_mem_alloc) {
1989 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1990 desc.size_a, HWLOC_MEMBIND_BIND);
1992 use_default_allocator =
true;
1993 }
else if (allocator == omp_large_cap_mem_alloc) {
1994 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1995 desc.size_a, HWLOC_MEMBIND_BIND);
1997 use_default_allocator =
true;
1999 use_default_allocator =
true;
2001 if (use_default_allocator) {
2002 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2004 }
else if (al->pool_size > 0) {
2007 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2008 if (used + desc.size_a > al->pool_size) {
2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2011 if (al->fb == omp_atv_default_mem_fb) {
2013 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2014 }
else if (al->fb == omp_atv_abort_fb) {
2016 }
else if (al->fb == omp_atv_allocator_fb) {
2017 KMP_ASSERT(al != al->fb_data);
2019 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2023 if (al->membind == omp_atv_interleaved) {
2024 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2025 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2026 HWLOC_MEMBIND_INTERLEAVE);
2028 }
else if (al->membind == omp_atv_environment) {
2029 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2030 HWLOC_MEMBIND_DEFAULT);
2032 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2035 if (al->fb == omp_atv_default_mem_fb) {
2037 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038 }
else if (al->fb == omp_atv_abort_fb) {
2040 }
else if (al->fb == omp_atv_allocator_fb) {
2041 KMP_ASSERT(al != al->fb_data);
2043 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2049 if (al->membind == omp_atv_interleaved) {
2050 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2051 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2052 HWLOC_MEMBIND_INTERLEAVE);
2054 }
else if (al->membind == omp_atv_environment) {
2055 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2056 HWLOC_MEMBIND_DEFAULT);
2058 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2061 if (al->fb == omp_atv_default_mem_fb) {
2063 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064 }
else if (al->fb == omp_atv_abort_fb) {
2066 }
else if (al->fb == omp_atv_allocator_fb) {
2067 KMP_ASSERT(al != al->fb_data);
2069 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2074 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2078 if (__kmp_memkind_available) {
2079 if (allocator < kmp_max_mem_alloc) {
2081 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2082 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2083 }
else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2084 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2086 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2088 }
else if (al->pool_size > 0) {
2091 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2092 if (used + desc.size_a > al->pool_size) {
2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2095 if (al->fb == omp_atv_default_mem_fb) {
2097 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2098 }
else if (al->fb == omp_atv_abort_fb) {
2100 }
else if (al->fb == omp_atv_allocator_fb) {
2101 KMP_ASSERT(al != al->fb_data);
2103 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2104 if (is_pinned && kmp_target_lock_mem)
2105 kmp_target_lock_mem(ptr, size, default_device);
2110 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2112 if (al->fb == omp_atv_default_mem_fb) {
2114 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2115 }
else if (al->fb == omp_atv_abort_fb) {
2117 }
else if (al->fb == omp_atv_allocator_fb) {
2118 KMP_ASSERT(al != al->fb_data);
2120 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2121 if (is_pinned && kmp_target_lock_mem)
2122 kmp_target_lock_mem(ptr, size, default_device);
2129 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2131 if (al->fb == omp_atv_default_mem_fb) {
2133 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2134 }
else if (al->fb == omp_atv_abort_fb) {
2136 }
else if (al->fb == omp_atv_allocator_fb) {
2137 KMP_ASSERT(al != al->fb_data);
2139 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2140 if (is_pinned && kmp_target_lock_mem)
2141 kmp_target_lock_mem(ptr, size, default_device);
2146 }
else if (allocator < kmp_max_mem_alloc) {
2148 if (allocator == omp_high_bw_mem_alloc) {
2149 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
2150 }
else if (allocator == omp_large_cap_mem_alloc) {
2151 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
2152 }
else if (allocator == omp_const_mem_alloc) {
2153 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
2154 }
else if (allocator == omp_low_lat_mem_alloc) {
2155 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
2156 }
else if (allocator == omp_cgroup_mem_alloc) {
2157 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
2158 }
else if (allocator == omp_pteam_mem_alloc) {
2159 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
2160 }
else if (allocator == omp_thread_mem_alloc) {
2161 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
2163 use_default_allocator =
true;
2165 if (use_default_allocator) {
2166 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2167 use_default_allocator =
false;
2169 }
else if (al->pool_size > 0) {
2172 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2173 if (used + desc.size_a > al->pool_size) {
2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2176 if (al->fb == omp_atv_default_mem_fb) {
2178 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2179 }
else if (al->fb == omp_atv_abort_fb) {
2181 }
else if (al->fb == omp_atv_allocator_fb) {
2182 KMP_ASSERT(al != al->fb_data);
2184 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2185 if (is_pinned && kmp_target_lock_mem)
2186 kmp_target_lock_mem(ptr, size, default_device);
2191 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2192 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2199 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2200 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2207 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2211 if (is_pinned && kmp_target_lock_mem)
2212 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2214 addr = (kmp_uintptr_t)ptr;
2215 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2216 addr_descr = addr_align - sz_desc;
2218 desc.ptr_alloc = ptr;
2219 desc.ptr_align = (
void *)addr_align;
2220 desc.allocator = al;
2221 *((kmp_mem_desc_t *)addr_descr) = desc;
2224 return desc.ptr_align;
2227 void *__kmp_calloc(
int gtid,
size_t algn,
size_t nmemb,
size_t size,
2228 omp_allocator_handle_t allocator) {
2231 KMP_DEBUG_ASSERT(__kmp_init_serial);
2233 if (allocator == omp_null_allocator)
2234 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2238 if (nmemb == 0 || size == 0)
2241 if ((SIZE_MAX -
sizeof(kmp_mem_desc_t)) / size < nmemb) {
2242 if (al->fb == omp_atv_abort_fb) {
2248 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2251 memset(ptr, 0x00, nmemb * size);
2256 void *__kmp_realloc(
int gtid,
void *ptr,
size_t size,
2257 omp_allocator_handle_t allocator,
2258 omp_allocator_handle_t free_allocator) {
2260 KMP_DEBUG_ASSERT(__kmp_init_serial);
2264 ___kmpc_free(gtid, ptr, free_allocator);
2268 nptr = __kmp_alloc(gtid, 0, size, allocator);
2270 if (nptr != NULL && ptr != NULL) {
2271 kmp_mem_desc_t desc;
2272 kmp_uintptr_t addr_align;
2273 kmp_uintptr_t addr_descr;
2275 addr_align = (kmp_uintptr_t)ptr;
2276 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2277 desc = *((kmp_mem_desc_t *)addr_descr);
2279 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2280 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2281 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2282 KMP_MEMCPY((
char *)nptr, (
char *)ptr,
2283 (
size_t)((size < desc.size_orig) ? size : desc.size_orig));
2287 ___kmpc_free(gtid, ptr, free_allocator);
2293 void ___kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
2298 omp_allocator_handle_t oal;
2299 al = RCAST(
kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2300 kmp_mem_desc_t desc;
2301 kmp_uintptr_t addr_align;
2302 kmp_uintptr_t addr_descr;
2304 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2305 __kmp_tgt_allocator.
omp_free(ptr, allocator);
2309 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2310 (allocator > kmp_max_mem_alloc &&
2311 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2313 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2314 if (allocator == llvm_omp_target_host_mem_alloc) {
2315 kmp_target_free_host(ptr, device);
2316 }
else if (allocator == llvm_omp_target_shared_mem_alloc) {
2317 kmp_target_free_shared(ptr, device);
2318 }
else if (allocator == llvm_omp_target_device_mem_alloc) {
2319 kmp_target_free_device(ptr, device);
2324 addr_align = (kmp_uintptr_t)ptr;
2325 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2326 desc = *((kmp_mem_desc_t *)addr_descr);
2328 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2330 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2332 al = desc.allocator;
2333 oal = (omp_allocator_handle_t)al;
2334 KMP_DEBUG_ASSERT(al);
2336 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2338 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2339 kmp_target_unlock_mem(desc.ptr_alloc, device);
2343 if (__kmp_hwloc_available) {
2344 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2346 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2348 KMP_DEBUG_ASSERT(used >= desc.size_a);
2350 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2353 if (__kmp_memkind_available) {
2354 if (oal < kmp_max_mem_alloc) {
2356 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2357 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2358 }
else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2359 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2361 kmp_mk_free(*mk_default, desc.ptr_alloc);
2364 if (al->pool_size > 0) {
2366 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2368 KMP_DEBUG_ASSERT(used >= desc.size_a);
2370 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2373 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2375 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2377 KMP_DEBUG_ASSERT(used >= desc.size_a);
2379 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2390 struct kmp_mem_descr {
2391 void *ptr_allocated;
2392 size_t size_allocated;
2394 size_t size_aligned;
2396 typedef struct kmp_mem_descr kmp_mem_descr_t;
2401 static void *___kmp_allocate_align(
size_t size,
2402 size_t alignment KMP_SRC_LOC_DECL) {
2419 kmp_mem_descr_t descr;
2420 kmp_uintptr_t addr_allocated;
2421 kmp_uintptr_t addr_aligned;
2422 kmp_uintptr_t addr_descr;
2424 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2425 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
2427 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
2428 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
2431 descr.size_aligned = size;
2432 descr.size_allocated =
2433 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
2436 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2438 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2440 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
2441 descr.ptr_allocated));
2442 if (descr.ptr_allocated == NULL) {
2443 KMP_FATAL(OutOfHeapMemory);
2446 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2448 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2449 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
2451 descr.ptr_aligned = (
void *)addr_aligned;
2453 KE_TRACE(26, (
" ___kmp_allocate_align: "
2454 "ptr_allocated=%p, size_allocated=%d, "
2455 "ptr_aligned=%p, size_aligned=%d\n",
2456 descr.ptr_allocated, (
int)descr.size_allocated,
2457 descr.ptr_aligned, (
int)descr.size_aligned));
2459 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2460 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
2461 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2462 addr_allocated + descr.size_allocated);
2463 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2465 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2468 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2474 *((kmp_mem_descr_t *)addr_descr) = descr;
2478 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2479 return descr.ptr_aligned;
2486 void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
2488 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2489 (
int)size KMP_SRC_LOC_PARM));
2490 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2491 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2499 void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
2500 int page_size = 8 * 1024;
2503 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2504 (
int)size KMP_SRC_LOC_PARM));
2505 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2506 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
2512 void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
2513 kmp_mem_descr_t descr;
2515 kmp_uintptr_t addr_allocated;
2516 kmp_uintptr_t addr_aligned;
2519 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2520 KMP_ASSERT(ptr != NULL);
2522 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
2524 KE_TRACE(26, (
" __kmp_free: "
2525 "ptr_allocated=%p, size_allocated=%d, "
2526 "ptr_aligned=%p, size_aligned=%d\n",
2527 descr.ptr_allocated, (
int)descr.size_allocated,
2528 descr.ptr_aligned, (
int)descr.size_aligned));
2530 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2531 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2532 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2533 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2534 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
2535 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2536 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2537 addr_allocated + descr.size_allocated);
2538 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2543 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
2545 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2547 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2551 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2554 #if USE_FAST_MEMORY == 3
2560 #define KMP_FREE_LIST_LIMIT 16
2563 #define DCACHE_LINE 128
2565 void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
2567 size_t num_lines, idx;
2571 kmp_mem_descr_t *descr;
2573 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2574 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
2576 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2577 idx = num_lines - 1;
2578 KMP_DEBUG_ASSERT(idx >= 0);
2582 }
else if ((idx >>= 2) == 0) {
2585 }
else if ((idx >>= 2) == 0) {
2588 }
else if ((idx >>= 2) == 0) {
2595 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2598 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2599 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2600 sizeof(kmp_mem_descr_t)))
2604 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2609 while (!KMP_COMPARE_AND_STORE_PTR(
2610 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2612 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2616 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2617 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2618 sizeof(kmp_mem_descr_t)))
2625 size = num_lines * DCACHE_LINE;
2627 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2628 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2630 __kmp_gtid_from_thread(this_thr), alloc_size));
2631 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2634 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
2636 ~(DCACHE_LINE - 1));
2637 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2639 descr->ptr_allocated = alloc_ptr;
2641 descr->ptr_aligned = (
void *)this_thr;
2644 descr->size_aligned = size;
2647 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2648 __kmp_gtid_from_thread(this_thr), ptr));
2654 void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
2655 kmp_mem_descr_t *descr;
2656 kmp_info_t *alloc_thr;
2661 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2662 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2663 KMP_ASSERT(ptr != NULL);
2665 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2667 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2668 (
int)descr->size_aligned));
2670 size = descr->size_aligned;
2672 idx = DCACHE_LINE * 2;
2675 }
else if ((idx <<= 1) == size) {
2677 }
else if ((idx <<= 2) == size) {
2679 }
else if ((idx <<= 2) == size) {
2682 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2686 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
2687 if (alloc_thr == this_thr) {
2689 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2690 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2692 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2695 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2696 *((
void **)ptr) = NULL;
2697 descr->size_allocated = (size_t)1;
2700 kmp_mem_descr_t *dsc =
2701 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
2703 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2705 dsc->size_allocated + 1;
2706 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2708 *((
void **)ptr) = head;
2709 descr->size_allocated = q_sz;
2710 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2717 void *next = *((
void **)head);
2718 while (next != NULL) {
2721 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
2724 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
2727 next = *((
void **)next);
2729 KMP_DEBUG_ASSERT(q_th != NULL);
2731 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2734 *((
void **)tail) = old_ptr;
2736 while (!KMP_COMPARE_AND_STORE_PTR(
2737 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2739 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2740 *((
void **)tail) = old_ptr;
2744 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2745 *((
void **)ptr) = NULL;
2746 descr->size_allocated = (size_t)1;
2753 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2754 __kmp_gtid_from_thread(this_thr), size));
2755 __kmp_bget_dequeue(this_thr);
2756 brel(this_thr, descr->ptr_allocated);
2759 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2765 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2766 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2768 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2773 void __kmp_free_fast_memory(kmp_info_t *th) {
2776 thr_data_t *thr = get_thr_data(th);
2780 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2782 __kmp_bget_dequeue(th);
2785 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2786 bfhead_t *b = thr->freelist[bin].ql.flink;
2787 while (b != &thr->freelist[bin]) {
2788 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
2796 while (lst != NULL) {
2798 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2799 lst, next, th, __kmp_gtid_from_thread(th)));
2800 (*thr->relfcn)(lst);
2806 lst = (
void **)next;
2810 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
omp_memspace_handle_t get(int num_resources, const int *resources, omp_memspace_handle_t memspace)
kmp_memspace_t * find(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Find memory space that matches the provided input.
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
Memory allocator information is shared with offload runtime.
Memory space informaition is shared with offload runtime.