15 #include "kmp_wrapper_malloc.h"
18 #if HWLOC_API_VERSION > 0x00020300
19 #define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20 #elif HWLOC_API_VERSION == 0x00020300
21 #define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
24 enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
36 typedef int (*bget_compact_t)(size_t, int);
37 typedef void *(*bget_acquire_t)(size_t);
38 typedef void (*bget_release_t)(
void *);
43 #if KMP_ARCH_X86 || KMP_ARCH_ARM
44 typedef kmp_int32 bufsize;
46 typedef kmp_int64 bufsize;
49 typedef ssize_t bufsize;
54 typedef enum bget_mode {
60 static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
61 static void *bget(kmp_info_t *th, bufsize size);
62 static void *bgetz(kmp_info_t *th, bufsize size);
63 static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
64 static void brel(kmp_info_t *th,
void *buf);
65 static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
76 #if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD
79 #define AlignType double
84 #define AlignType _Quad
120 static bufsize bget_bin_size[] = {
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
138 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
145 typedef struct qlinks {
146 struct bfhead *flink;
147 struct bfhead *blink;
151 typedef struct bhead2 {
159 typedef union bhead {
162 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
165 #define BH(p) ((bhead_t *)(p))
168 typedef struct bdhead {
172 #define BDH(p) ((bdhead_t *)(p))
175 typedef struct bfhead {
179 #define BFH(p) ((bfhead_t *)(p))
181 typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
187 long numpget, numprel;
188 long numdget, numdrel;
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
207 #define QLSize (sizeof(qlinks_t))
208 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
222 static int bget_get_bin(bufsize size) {
224 int lo = 0, hi = MAX_BGET_BINS - 1;
226 KMP_DEBUG_ASSERT(size > 0);
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
241 static void set_thr_data(kmp_info_t *th) {
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(
sizeof(*data))
247 : th->th.th_local.bget_data);
249 memset(data,
'\0',
sizeof(*data));
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258 #if !USE_CMP_XCHG_FOR_BGET
259 #ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
267 static thr_data_t *get_thr_data(kmp_info_t *th) {
270 data = (thr_data_t *)th->th.th_local.bget_data;
272 KMP_DEBUG_ASSERT(data != 0);
278 static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
282 #if USE_CMP_XCHG_FOR_BGET
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(
void *, old_value),
nullptr)) {
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
290 p = CCAST(
void *, old_value);
293 #ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
299 p = (
void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
302 #ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
312 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
319 p = (
void *)b->ql.flink;
327 static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
328 #ifdef USE_QUEUING_LOCK_FOR_BGET
333 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
341 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
342 __kmp_gtid_from_thread(th)));
344 #if USE_CMP_XCHG_FOR_BGET
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
349 b->ql.flink = BFH(CCAST(
void *, old_value));
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(
void *, old_value), buf)) {
354 old_value = TCR_PTR(th->th.th_local.bget_list);
357 b->ql.flink = BFH(CCAST(
void *, old_value));
361 #ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (
void *)buf;
370 #ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
379 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
382 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
385 bin = bget_get_bin(b->bh.bb.bsize);
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
400 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
409 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
413 *total_free = *max_free = 0;
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
418 best = &thr->freelist[bin];
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
434 if (*max_free > (bufsize)
sizeof(bhead_t))
435 *max_free -=
sizeof(bhead_t);
439 static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
449 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
453 __kmp_bget_dequeue(th);
455 if (size < (bufsize)SizeQ) {
458 #if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
462 size +=
sizeof(bhead_t);
463 KMP_DEBUG_ASSERT(size >= 0);
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
466 use_blink = (thr->mode == bget_mode_lifo);
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
494 b = (use_blink ? b->ql.blink : b->ql.flink);
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
513 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((
char *)ba) + size);
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
519 b->bh.bb.bsize -= (bufsize)size;
522 ba->bb.prevfree = b->bh.bb.bsize;
525 ba->bb.bsize = -size;
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
537 thr->totalloc += (size_t)size;
540 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
546 ba = BH(((
char *)b) + b->bh.bb.bsize);
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
553 __kmp_bget_remove_from_freelist(b);
555 thr->totalloc += (size_t)b->bh.bb.bsize;
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
562 TCW_PTR(ba->bb.bthr, th);
568 buf = (
void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
575 b = (use_blink ? b->ql.blink : b->ql.flink);
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
597 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
599 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
606 bdh->bh.bb.bsize = 0;
609 TCW_PTR(bdh->bh.bb.bthr, th);
611 bdh->bh.bb.prevfree = 0;
614 thr->totalloc += (size_t)size;
618 buf = (
void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
628 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
651 static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (
char *)bget(th, size);
658 b = BH(buf -
sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
663 bd = BDH(buf -
sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
666 rsize -=
sizeof(bhead_t);
669 KMP_DEBUG_ASSERT(rsize >= size);
671 (void)memset(buf, 0, (bufsize)rsize);
673 return ((
void *)buf);
681 static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
686 nbuf = bget(th, size);
693 b = BH(((
char *)buf) -
sizeof(bhead_t));
694 osize = -b->bb.bsize;
699 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
702 osize -=
sizeof(bhead_t);
705 KMP_DEBUG_ASSERT(osize > 0);
707 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
708 (
size_t)((size < osize) ? size : osize));
715 static void brel(kmp_info_t *th,
void *buf) {
716 thr_data_t *thr = get_thr_data(th);
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
723 b = BFH(((
char *)buf) -
sizeof(bhead_t));
725 if (b->bh.bb.bsize == 0) {
728 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
731 thr->totalloc -= (size_t)bdh->tsize;
736 (
void)memset((
char *)buf, 0x55, (
size_t)(bdh->tsize -
sizeof(bdhead_t)));
739 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((
void *)bdh);
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
750 __kmp_bget_enqueue(bth, buf
751 #ifdef USE_QUEUING_LOCK_FOR_BGET
753 __kmp_gtid_from_thread(th)
760 if (b->bh.bb.bsize >= 0) {
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
767 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
771 thr->totalloc += (size_t)b->bh.bb.bsize;
776 if (b->bh.bb.prevfree != 0) {
781 bufsize size = b->bh.bb.bsize;
784 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
786 b = BFH(((
char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
790 __kmp_bget_remove_from_freelist(b);
795 b->bh.bb.bsize = -b->bh.bb.bsize;
799 __kmp_bget_insert_into_freelist(thr, b);
805 bn = BFH(((
char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
810 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
813 __kmp_bget_remove_from_freelist(bn);
815 b->bh.bb.bsize += bn->bh.bb.bsize;
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
828 bn = BFH(((
char *)b) + b->bh.bb.bsize);
831 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
832 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
858 __kmp_bget_remove_from_freelist(b);
860 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
869 if (thr->last_pool == b)
879 static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
882 thr_data_t *thr = get_thr_data(th);
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
891 static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
897 __kmp_bget_dequeue(th);
900 len &= ~((bufsize)(SizeQuant - 1));
902 if (thr->pool_len == 0) {
904 }
else if (len != thr->pool_len) {
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
916 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
921 b->bh.bb.prevfree = 0;
930 len -=
sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
938 __kmp_bget_insert_into_freelist(thr, b);
941 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
942 (
size_t)(len -
sizeof(bfhead_t)));
944 bn = BH(((
char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
949 bn->bb.bsize = ESent;
953 static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(th);
956 thr_data_t *thr = get_thr_data(th);
959 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC
"\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
975 bufsize bs = b->bh.bb.bsize;
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
983 __kmp_printf_no_lock(
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
988 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
989 if ((bs >
sizeof(bfhead_t)) &&
991 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
993 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
1003 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
1006 void __kmp_initialize_bget(kmp_info_t *th) {
1007 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012 (bufsize)__kmp_malloc_pool_incr);
1015 void __kmp_finalize_bget(kmp_info_t *th) {
1019 KMP_DEBUG_ASSERT(th != 0);
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1039 __kmp_bget_remove_from_freelist(b);
1041 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1057 void kmpc_set_poolsize(
size_t size) {
1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059 (bget_release_t)free, (bufsize)size);
1062 size_t kmpc_get_poolsize(
void) {
1065 p = get_thr_data(__kmp_get_thread());
1070 void kmpc_set_poolmode(
int mode) {
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1080 int kmpc_get_poolmode(
void) {
1083 p = get_thr_data(__kmp_get_thread());
1088 void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1089 kmp_info_t *th = __kmp_get_thread();
1092 __kmp_bget_dequeue(th);
1100 void kmpc_poolprint(
void) {
1101 kmp_info_t *th = __kmp_get_thread();
1103 __kmp_bget_dequeue(th);
1110 void *kmpc_malloc(
size_t size) {
1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1115 *(
void **)ptr = ptr;
1116 ptr = (
void **)ptr + 1;
1121 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1123 void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1127 if (!IS_POWER_OF_TWO(alignment)) {
1132 size = size +
sizeof(
void *) + alignment;
1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134 if (ptr_allocated != NULL) {
1136 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1138 *((
void **)ptr - 1) = ptr_allocated;
1145 void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1150 *(
void **)ptr = ptr;
1151 ptr = (
void **)ptr + 1;
1156 void *kmpc_realloc(
void *ptr,
size_t size) {
1157 void *result = NULL;
1160 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1162 if (result != NULL) {
1163 *(
void **)result = result;
1164 result = (
void **)result + 1;
1166 }
else if (size == 0) {
1172 KMP_ASSERT(*((
void **)ptr - 1));
1173 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1175 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1176 (bufsize)(size +
sizeof(ptr)));
1177 if (result != NULL) {
1178 *(
void **)result = result;
1179 result = (
void **)result + 1;
1186 void kmpc_free(
void *ptr) {
1187 if (!__kmp_init_serial) {
1191 kmp_info_t *th = __kmp_get_thread();
1192 __kmp_bget_dequeue(th);
1194 KMP_ASSERT(*((
void **)ptr - 1));
1195 brel(th, *((
void **)ptr - 1));
1199 void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1201 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (
int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, (bufsize)size);
1204 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1208 void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1211 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1218 void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (
int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, ptr, (bufsize)size);
1223 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1227 void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1228 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1231 __kmp_bget_dequeue(th);
1234 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1240 static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1242 static void (*kmp_mk_free)(
void *kind,
void *ptr);
1244 static void **mk_default;
1245 static void **mk_interleave;
1246 static void **mk_hbw_interleave;
1247 static void **mk_hbw_preferred;
1248 static void **mk_dax_kmem;
1249 static void **mk_dax_kmem_all;
1250 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1251 static const char *kmp_mk_lib_name;
1252 static void *h_memkind;
1254 static int (*kmp_mk_check)(
void *kind);
1255 static void **mk_hbw;
1256 static void **mk_hugetlb;
1257 static void **mk_hbw_hugetlb;
1258 static void **mk_hbw_preferred_hugetlb;
1259 static void **mk_dax_kmem_preferred;
1261 static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1262 static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1263 static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1264 static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1265 static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1266 static void *(*kmp_target_free_host)(
void *ptr,
int device);
1267 static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1268 static void *(*kmp_target_free_device)(
void *ptr,
int device);
1269 static bool __kmp_target_mem_available;
1271 #define KMP_IS_TARGET_MEM_SPACE(MS) \
1272 (MS == llvm_omp_target_host_mem_space || \
1273 MS == llvm_omp_target_shared_mem_space || \
1274 MS == llvm_omp_target_device_mem_space)
1276 #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1277 (MA == llvm_omp_target_host_mem_alloc || \
1278 MA == llvm_omp_target_shared_mem_alloc || \
1279 MA == llvm_omp_target_device_mem_alloc)
1281 #define KMP_IS_PREDEF_MEM_SPACE(MS) \
1282 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1283 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1284 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1285 KMP_IS_TARGET_MEM_SPACE(MS))
1304 bool supported =
false;
1305 using get_mem_resources_t = int (*)(int,
const int *, int,
1306 omp_memspace_handle_t,
int *);
1307 using omp_alloc_t =
void *(*)(
size_t, omp_allocator_handle_t);
1308 using omp_free_t = void (*)(
void *, omp_allocator_handle_t);
1309 get_mem_resources_t tgt_get_mem_resources =
nullptr;
1310 omp_alloc_t tgt_omp_alloc =
nullptr;
1311 omp_free_t tgt_omp_free =
nullptr;
1316 tgt_get_mem_resources =
1317 (get_mem_resources_t)KMP_DLSYM(
"__tgt_get_mem_resources");
1318 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM(
"__tgt_omp_alloc");
1319 tgt_omp_free = (omp_free_t)KMP_DLSYM(
"__tgt_omp_free");
1320 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1325 omp_memspace_handle_t memspace,
int *resources) {
1327 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1331 void *
omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
1333 return tgt_omp_alloc(size, allocator);
1337 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
1339 tgt_omp_free(ptr, allocator);
1341 } __kmp_tgt_allocator;
1343 extern "C" int omp_get_num_devices(
void);
1353 omp_memspace_handle_t memspace) {
1356 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1357 !memcmp(ms->resources, resources,
sizeof(
int) * num_resources))
1366 omp_memspace_handle_t
get(
int num_resources,
const int *resources,
1367 omp_memspace_handle_t memspace) {
1368 int gtid = __kmp_entry_gtid();
1369 __kmp_acquire_lock(&mtx, gtid);
1371 int *sorted_resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1372 KMP_MEMCPY(sorted_resources, resources, num_resources *
sizeof(
int));
1373 qsort(sorted_resources, (
size_t)num_resources,
sizeof(
int),
1374 [](
const void *a,
const void *b) {
1375 const int val_a = *(
const int *)a;
1376 const int val_b = *(
const int *)b;
1377 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1381 __kmp_free(sorted_resources);
1382 __kmp_release_lock(&mtx, gtid);
1386 ms->memspace = memspace;
1387 ms->num_resources = num_resources;
1388 ms->resources = sorted_resources;
1389 ms->next = memspace_list;
1391 __kmp_release_lock(&mtx, gtid);
1397 void init() { __kmp_init_lock(&mtx); }
1403 __kmp_free(ms->resources);
1408 __kmp_destroy_lock(&mtx);
1413 omp_memspace_handle_t memspace) {
1414 int actual_num_devices = num_devices;
1415 int *actual_devices =
const_cast<int *
>(devices);
1416 if (actual_num_devices == 0) {
1417 actual_num_devices = omp_get_num_devices();
1418 if (actual_num_devices <= 0)
1419 return omp_null_mem_space;
1421 if (actual_devices == NULL) {
1423 actual_devices = (
int *)__kmp_allocate(
sizeof(
int) * actual_num_devices);
1424 for (
int i = 0; i < actual_num_devices; i++)
1425 actual_devices[i] = i;
1429 actual_num_devices, actual_devices, host_access, memspace, NULL);
1430 if (num_resources <= 0)
1431 return omp_null_mem_space;
1433 omp_memspace_handle_t ms = omp_null_mem_space;
1434 if (num_resources > 0) {
1435 int *resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1438 actual_num_devices, actual_devices, host_access, memspace, resources);
1439 ms =
get(num_resources, resources, memspace);
1440 __kmp_free(resources);
1442 if (!devices && actual_devices)
1443 __kmp_free(actual_devices);
1447 omp_memspace_handle_t
get_memspace(
int num_resources,
const int *resources,
1448 omp_memspace_handle_t parent) {
1450 return get(num_resources, resources, ms->memspace);
1452 } __kmp_tgt_memspace_list;
1454 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1455 static inline void chk_kind(
void ***pkind) {
1456 KMP_DEBUG_ASSERT(pkind);
1458 if (kmp_mk_check(**pkind))
1463 void __kmp_init_memkind() {
1465 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1467 kmp_mk_lib_name =
"libmemkind.so";
1468 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1470 kmp_mk_check = (int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1472 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1473 kmp_mk_free = (void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1474 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1475 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1476 !kmp_mk_check(*mk_default)) {
1477 __kmp_memkind_available = 1;
1478 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1479 chk_kind(&mk_interleave);
1480 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1482 mk_hbw_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_INTERLEAVE");
1483 chk_kind(&mk_hbw_interleave);
1484 mk_hbw_preferred = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED");
1485 chk_kind(&mk_hbw_preferred);
1486 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1487 chk_kind(&mk_hugetlb);
1488 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1489 chk_kind(&mk_hbw_hugetlb);
1490 mk_hbw_preferred_hugetlb =
1491 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1492 chk_kind(&mk_hbw_preferred_hugetlb);
1493 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1494 chk_kind(&mk_dax_kmem);
1495 mk_dax_kmem_all = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_ALL");
1496 chk_kind(&mk_dax_kmem_all);
1497 mk_dax_kmem_preferred =
1498 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1499 chk_kind(&mk_dax_kmem_preferred);
1500 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1506 kmp_mk_check = NULL;
1509 mk_hbw_hugetlb = NULL;
1510 mk_hbw_preferred_hugetlb = NULL;
1511 mk_dax_kmem_preferred = NULL;
1512 kmp_mk_lib_name =
"";
1514 kmp_mk_alloc = NULL;
1517 mk_interleave = NULL;
1518 mk_hbw_interleave = NULL;
1519 mk_hbw_preferred = NULL;
1521 mk_dax_kmem_all = NULL;
1524 void __kmp_fini_memkind() {
1525 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1526 if (__kmp_memkind_available)
1527 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1532 kmp_mk_check = NULL;
1535 mk_hbw_hugetlb = NULL;
1536 mk_hbw_preferred_hugetlb = NULL;
1537 mk_dax_kmem_preferred = NULL;
1539 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1540 kmp_mk_alloc = NULL;
1543 mk_interleave = NULL;
1544 mk_hbw_interleave = NULL;
1545 mk_hbw_preferred = NULL;
1547 mk_dax_kmem_all = NULL;
1551 #if KMP_HWLOC_ENABLED
1552 static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1553 #if HWLOC_API_VERSION >= 0x00020300
1554 const hwloc_topology_support *support;
1555 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1557 if (policy == HWLOC_MEMBIND_BIND)
1558 return (support->membind->alloc_membind &&
1559 support->membind->bind_membind);
1560 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1561 return (support->membind->alloc_membind &&
1562 support->membind->interleave_membind);
1570 void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr,
size_t size,
1571 hwloc_membind_policy_t policy) {
1572 #if HWLOC_API_VERSION >= 0x00020300
1575 struct hwloc_location initiator;
1581 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1582 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1584 hwloc_bitmap_free(mask);
1587 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1588 initiator.location.cpuset = mask;
1589 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1594 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1595 HWLOC_MEMBIND_BYNODESET);
1601 void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms,
size_t size,
1602 hwloc_membind_policy_t policy) {
1603 #if HWLOC_API_VERSION >= 0x00020300
1605 if (ms == omp_high_bw_mem_space) {
1606 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1607 }
else if (ms == omp_large_cap_mem_space) {
1608 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1610 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1619 void __kmp_init_target_mem() {
1620 *(
void **)(&kmp_target_alloc_host) = KMP_DLSYM(
"llvm_omp_target_alloc_host");
1621 *(
void **)(&kmp_target_alloc_shared) =
1622 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1623 *(
void **)(&kmp_target_alloc_device) =
1624 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1625 *(
void **)(&kmp_target_free_host) = KMP_DLSYM(
"llvm_omp_target_free_host");
1626 *(
void **)(&kmp_target_free_shared) =
1627 KMP_DLSYM(
"llvm_omp_target_free_shared");
1628 *(
void **)(&kmp_target_free_device) =
1629 KMP_DLSYM(
"llvm_omp_target_free_device");
1630 __kmp_target_mem_available =
1631 kmp_target_alloc_host && kmp_target_alloc_shared &&
1632 kmp_target_alloc_device && kmp_target_free_host &&
1633 kmp_target_free_shared && kmp_target_free_device;
1635 *(
void **)(&kmp_target_lock_mem) = KMP_DLSYM(
"llvm_omp_target_lock_mem");
1636 *(
void **)(&kmp_target_unlock_mem) = KMP_DLSYM(
"llvm_omp_target_unlock_mem");
1637 __kmp_tgt_allocator.
init();
1638 __kmp_tgt_memspace_list.
init();
1642 void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.
fini(); }
1644 omp_allocator_handle_t __kmpc_init_allocator(
int gtid, omp_memspace_handle_t ms,
1646 omp_alloctrait_t traits[]) {
1655 al->partition = omp_atv_environment;
1656 al->pin_device = -1;
1657 al->preferred_device = -1;
1658 al->target_access = omp_atv_single;
1659 al->atomic_scope = omp_atv_device;
1661 for (i = 0; i < ntraits; ++i) {
1662 switch (traits[i].key) {
1663 case omp_atk_sync_hint:
1664 case omp_atk_access:
1666 case omp_atk_pinned:
1669 case omp_atk_alignment:
1670 __kmp_type_convert(traits[i].value, &(al->alignment));
1671 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1673 case omp_atk_pool_size:
1674 al->pool_size = traits[i].value;
1676 case omp_atk_fallback:
1677 al->fb = (omp_alloctrait_value_t)traits[i].value;
1679 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1680 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1682 case omp_atk_fb_data:
1685 case omp_atk_partition:
1686 #if KMP_HWLOC_ENABLED
1687 al->membind = (omp_alloctrait_value_t)traits[i].value;
1688 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1689 al->membind == omp_atv_nearest ||
1690 al->membind == omp_atv_blocked ||
1691 al->membind == omp_atv_interleaved);
1693 al->memkind = RCAST(
void **, traits[i].value);
1695 case omp_atk_pin_device:
1696 __kmp_type_convert(traits[i].value, &(al->pin_device));
1698 case omp_atk_preferred_device:
1699 __kmp_type_convert(traits[i].value, &(al->preferred_device));
1701 case omp_atk_target_access:
1702 al->target_access = (omp_alloctrait_value_t)traits[i].value;
1704 case omp_atk_atomic_scope:
1705 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1707 case omp_atk_part_size:
1708 __kmp_type_convert(traits[i].value, &(al->part_size));
1711 KMP_ASSERT2(0,
"Unexpected allocator trait");
1715 if (al->memspace > kmp_max_mem_space) {
1717 return (omp_allocator_handle_t)al;
1720 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1724 al->fb = omp_atv_default_mem_fb;
1726 }
else if (al->fb == omp_atv_allocator_fb) {
1727 KMP_ASSERT(al->fb_data != NULL);
1728 }
else if (al->fb == omp_atv_default_mem_fb) {
1731 if (__kmp_memkind_available) {
1733 if (ms == omp_high_bw_mem_space) {
1734 if (al->memkind == (
void *)omp_atv_interleaved && mk_hbw_interleave) {
1735 al->memkind = mk_hbw_interleave;
1736 }
else if (mk_hbw_preferred) {
1742 al->memkind = mk_hbw_preferred;
1746 return omp_null_allocator;
1748 }
else if (ms == omp_large_cap_mem_space) {
1749 if (mk_dax_kmem_all) {
1751 al->memkind = mk_dax_kmem_all;
1752 }
else if (mk_dax_kmem) {
1754 al->memkind = mk_dax_kmem;
1757 return omp_null_allocator;
1760 if (al->memkind == (
void *)omp_atv_interleaved && mk_interleave) {
1761 al->memkind = mk_interleave;
1763 al->memkind = mk_default;
1766 }
else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1768 return omp_null_allocator;
1770 if (!__kmp_hwloc_available &&
1771 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1774 return omp_null_allocator;
1777 return (omp_allocator_handle_t)al;
1780 void __kmpc_destroy_allocator(
int gtid, omp_allocator_handle_t allocator) {
1781 if (allocator > kmp_max_mem_alloc)
1782 __kmp_free(allocator);
1785 void __kmpc_set_default_allocator(
int gtid, omp_allocator_handle_t allocator) {
1786 if (allocator == omp_null_allocator)
1787 allocator = omp_default_mem_alloc;
1788 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1791 omp_allocator_handle_t __kmpc_get_default_allocator(
int gtid) {
1792 return __kmp_threads[gtid]->th.th_def_allocator;
1795 omp_memspace_handle_t __kmp_get_devices_memspace(
int ndevs,
const int *devs,
1796 omp_memspace_handle_t memspace,
1798 if (!__kmp_init_serial)
1799 __kmp_serial_initialize();
1801 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1802 return omp_null_mem_space;
1804 return __kmp_tgt_memspace_list.
get_memspace(ndevs, devs, host, memspace);
1807 omp_allocator_handle_t
1808 __kmp_get_devices_allocator(
int ndevs,
const int *devs,
1809 omp_memspace_handle_t memspace,
int host) {
1810 if (!__kmp_init_serial)
1811 __kmp_serial_initialize();
1813 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1814 return omp_null_allocator;
1816 omp_memspace_handle_t mspace =
1817 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1818 if (mspace == omp_null_mem_space)
1819 return omp_null_allocator;
1821 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1824 int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1825 if (!__kmp_init_serial)
1826 __kmp_serial_initialize();
1827 if (memspace == omp_null_mem_space)
1829 if (memspace < kmp_max_mem_space)
1832 return ms->num_resources;
1835 omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1836 int num_resources,
int *resources) {
1837 if (!__kmp_init_serial)
1838 __kmp_serial_initialize();
1839 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1842 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1843 return omp_null_mem_space;
1847 int *resources_abs = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1850 for (
int i = 0; i < num_resources; i++)
1851 resources_abs[i] = ms->resources[resources[i]];
1853 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.
get_memspace(
1854 num_resources, resources_abs, memspace);
1855 __kmp_free(resources_abs);
1860 typedef struct kmp_mem_desc {
1867 constexpr
size_t alignment = SizeQuant;
1870 void *__kmpc_alloc(
int gtid,
size_t size, omp_allocator_handle_t allocator) {
1871 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1872 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1873 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1877 void *__kmpc_aligned_alloc(
int gtid,
size_t algn,
size_t size,
1878 omp_allocator_handle_t allocator) {
1879 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1880 (
int)size, allocator));
1881 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1882 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1886 void *__kmpc_calloc(
int gtid,
size_t nmemb,
size_t size,
1887 omp_allocator_handle_t allocator) {
1888 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1889 (
int)size, allocator));
1890 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1891 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1895 void *__kmpc_realloc(
int gtid,
void *ptr,
size_t size,
1896 omp_allocator_handle_t allocator,
1897 omp_allocator_handle_t free_allocator) {
1898 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)size,
1899 allocator, free_allocator));
1900 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1901 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1905 void __kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1906 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1907 ___kmpc_free(gtid, ptr, allocator);
1908 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1913 void *__kmp_alloc(
int gtid,
size_t algn,
size_t size,
1914 omp_allocator_handle_t allocator) {
1917 KMP_DEBUG_ASSERT(__kmp_init_serial);
1920 if (allocator == omp_null_allocator)
1921 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1922 kmp_int32 default_device =
1923 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1927 int sz_desc =
sizeof(kmp_mem_desc_t);
1928 kmp_mem_desc_t desc;
1930 kmp_uintptr_t addr_align;
1931 kmp_uintptr_t addr_descr;
1932 size_t align = alignment;
1933 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1934 align = al->alignment;
1937 desc.size_orig = size;
1938 desc.size_a = size + sz_desc + align;
1939 bool is_pinned =
false;
1940 if (allocator > kmp_max_mem_alloc)
1941 is_pinned = al->pinned;
1944 int use_default_allocator =
1945 (!__kmp_hwloc_available && !__kmp_memkind_available);
1947 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1949 return __kmp_tgt_allocator.
omp_alloc(size, allocator);
1952 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1955 if (__kmp_target_mem_available) {
1957 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1958 if (allocator == llvm_omp_target_host_mem_alloc)
1959 ptr = kmp_target_alloc_host(size, device);
1960 else if (allocator == llvm_omp_target_shared_mem_alloc)
1961 ptr = kmp_target_alloc_shared(size, device);
1963 ptr = kmp_target_alloc_device(size, device);
1966 KMP_INFORM(TargetMemNotAvailable);
1970 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1971 if (__kmp_target_mem_available) {
1973 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1974 if (al->memspace == llvm_omp_target_host_mem_space)
1975 ptr = kmp_target_alloc_host(size, device);
1976 else if (al->memspace == llvm_omp_target_shared_mem_space)
1977 ptr = kmp_target_alloc_shared(size, device);
1979 ptr = kmp_target_alloc_device(size, device);
1982 KMP_INFORM(TargetMemNotAvailable);
1986 #if KMP_HWLOC_ENABLED
1987 if (__kmp_hwloc_available) {
1988 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1989 if (allocator < kmp_max_mem_alloc) {
1991 if (allocator == omp_high_bw_mem_alloc) {
1992 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1993 desc.size_a, HWLOC_MEMBIND_BIND);
1995 use_default_allocator =
true;
1996 }
else if (allocator == omp_large_cap_mem_alloc) {
1997 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1998 desc.size_a, HWLOC_MEMBIND_BIND);
2000 use_default_allocator =
true;
2002 use_default_allocator =
true;
2004 if (use_default_allocator) {
2005 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2007 }
else if (al->pool_size > 0) {
2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2011 if (used + desc.size_a > al->pool_size) {
2013 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2014 if (al->fb == omp_atv_default_mem_fb) {
2016 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2017 }
else if (al->fb == omp_atv_abort_fb) {
2019 }
else if (al->fb == omp_atv_allocator_fb) {
2020 KMP_ASSERT(al != al->fb_data);
2022 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2026 if (al->membind == omp_atv_interleaved) {
2027 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2028 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2029 HWLOC_MEMBIND_INTERLEAVE);
2031 }
else if (al->membind == omp_atv_environment) {
2032 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2033 HWLOC_MEMBIND_DEFAULT);
2035 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038 if (al->fb == omp_atv_default_mem_fb) {
2040 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2041 }
else if (al->fb == omp_atv_abort_fb) {
2043 }
else if (al->fb == omp_atv_allocator_fb) {
2044 KMP_ASSERT(al != al->fb_data);
2046 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2052 if (al->membind == omp_atv_interleaved) {
2053 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2054 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2055 HWLOC_MEMBIND_INTERLEAVE);
2057 }
else if (al->membind == omp_atv_environment) {
2058 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2059 HWLOC_MEMBIND_DEFAULT);
2061 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064 if (al->fb == omp_atv_default_mem_fb) {
2066 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2067 }
else if (al->fb == omp_atv_abort_fb) {
2069 }
else if (al->fb == omp_atv_allocator_fb) {
2070 KMP_ASSERT(al != al->fb_data);
2072 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2077 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2081 if (__kmp_memkind_available) {
2082 if (allocator < kmp_max_mem_alloc) {
2084 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2085 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2086 }
else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2087 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2089 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2091 }
else if (al->pool_size > 0) {
2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2095 if (used + desc.size_a > al->pool_size) {
2097 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2098 if (al->fb == omp_atv_default_mem_fb) {
2100 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2101 }
else if (al->fb == omp_atv_abort_fb) {
2103 }
else if (al->fb == omp_atv_allocator_fb) {
2104 KMP_ASSERT(al != al->fb_data);
2106 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2107 if (is_pinned && kmp_target_lock_mem)
2108 kmp_target_lock_mem(ptr, size, default_device);
2113 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2115 if (al->fb == omp_atv_default_mem_fb) {
2117 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2118 }
else if (al->fb == omp_atv_abort_fb) {
2120 }
else if (al->fb == omp_atv_allocator_fb) {
2121 KMP_ASSERT(al != al->fb_data);
2123 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2124 if (is_pinned && kmp_target_lock_mem)
2125 kmp_target_lock_mem(ptr, size, default_device);
2132 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2134 if (al->fb == omp_atv_default_mem_fb) {
2136 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2137 }
else if (al->fb == omp_atv_abort_fb) {
2139 }
else if (al->fb == omp_atv_allocator_fb) {
2140 KMP_ASSERT(al != al->fb_data);
2142 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2143 if (is_pinned && kmp_target_lock_mem)
2144 kmp_target_lock_mem(ptr, size, default_device);
2149 }
else if (allocator < kmp_max_mem_alloc) {
2151 if (allocator == omp_high_bw_mem_alloc) {
2152 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
2153 }
else if (allocator == omp_large_cap_mem_alloc) {
2154 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
2155 }
else if (allocator == omp_const_mem_alloc) {
2156 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
2157 }
else if (allocator == omp_low_lat_mem_alloc) {
2158 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
2159 }
else if (allocator == omp_cgroup_mem_alloc) {
2160 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
2161 }
else if (allocator == omp_pteam_mem_alloc) {
2162 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
2163 }
else if (allocator == omp_thread_mem_alloc) {
2164 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
2166 use_default_allocator =
true;
2168 if (use_default_allocator) {
2169 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2170 use_default_allocator =
false;
2172 }
else if (al->pool_size > 0) {
2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2176 if (used + desc.size_a > al->pool_size) {
2178 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2179 if (al->fb == omp_atv_default_mem_fb) {
2181 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2182 }
else if (al->fb == omp_atv_abort_fb) {
2184 }
else if (al->fb == omp_atv_allocator_fb) {
2185 KMP_ASSERT(al != al->fb_data);
2187 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2188 if (is_pinned && kmp_target_lock_mem)
2189 kmp_target_lock_mem(ptr, size, default_device);
2194 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2195 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2202 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2203 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2207 #if KMP_HWLOC_ENABLED
2210 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2214 if (is_pinned && kmp_target_lock_mem)
2215 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2217 addr = (kmp_uintptr_t)ptr;
2218 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2219 addr_descr = addr_align - sz_desc;
2221 desc.ptr_alloc = ptr;
2222 desc.ptr_align = (
void *)addr_align;
2223 desc.allocator = al;
2224 *((kmp_mem_desc_t *)addr_descr) = desc;
2227 return desc.ptr_align;
2230 void *__kmp_calloc(
int gtid,
size_t algn,
size_t nmemb,
size_t size,
2231 omp_allocator_handle_t allocator) {
2234 KMP_DEBUG_ASSERT(__kmp_init_serial);
2236 if (allocator == omp_null_allocator)
2237 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2241 if (nmemb == 0 || size == 0)
2244 if ((SIZE_MAX -
sizeof(kmp_mem_desc_t)) / size < nmemb) {
2245 if (al->fb == omp_atv_abort_fb) {
2251 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2254 memset(ptr, 0x00, nmemb * size);
2259 void *__kmp_realloc(
int gtid,
void *ptr,
size_t size,
2260 omp_allocator_handle_t allocator,
2261 omp_allocator_handle_t free_allocator) {
2263 KMP_DEBUG_ASSERT(__kmp_init_serial);
2267 ___kmpc_free(gtid, ptr, free_allocator);
2271 nptr = __kmp_alloc(gtid, 0, size, allocator);
2273 if (nptr != NULL && ptr != NULL) {
2274 kmp_mem_desc_t desc;
2275 kmp_uintptr_t addr_align;
2276 kmp_uintptr_t addr_descr;
2278 addr_align = (kmp_uintptr_t)ptr;
2279 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2280 desc = *((kmp_mem_desc_t *)addr_descr);
2282 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2283 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2284 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2285 KMP_MEMCPY((
char *)nptr, (
char *)ptr,
2286 (
size_t)((size < desc.size_orig) ? size : desc.size_orig));
2290 ___kmpc_free(gtid, ptr, free_allocator);
2296 void ___kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
2301 omp_allocator_handle_t oal;
2302 al = RCAST(
kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2303 kmp_mem_desc_t desc;
2304 kmp_uintptr_t addr_align;
2305 kmp_uintptr_t addr_descr;
2307 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2308 __kmp_tgt_allocator.
omp_free(ptr, allocator);
2312 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2313 (allocator > kmp_max_mem_alloc &&
2314 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2316 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2317 if (allocator == llvm_omp_target_host_mem_alloc) {
2318 kmp_target_free_host(ptr, device);
2319 }
else if (allocator == llvm_omp_target_shared_mem_alloc) {
2320 kmp_target_free_shared(ptr, device);
2321 }
else if (allocator == llvm_omp_target_device_mem_alloc) {
2322 kmp_target_free_device(ptr, device);
2327 addr_align = (kmp_uintptr_t)ptr;
2328 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2329 desc = *((kmp_mem_desc_t *)addr_descr);
2331 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2333 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2335 al = desc.allocator;
2336 oal = (omp_allocator_handle_t)al;
2337 KMP_DEBUG_ASSERT(al);
2339 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2341 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2342 kmp_target_unlock_mem(desc.ptr_alloc, device);
2345 #if KMP_HWLOC_ENABLED
2346 if (__kmp_hwloc_available) {
2347 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2349 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2351 KMP_DEBUG_ASSERT(used >= desc.size_a);
2353 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2356 if (__kmp_memkind_available) {
2357 if (oal < kmp_max_mem_alloc) {
2359 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2360 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2361 }
else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2362 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2364 kmp_mk_free(*mk_default, desc.ptr_alloc);
2367 if (al->pool_size > 0) {
2369 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2371 KMP_DEBUG_ASSERT(used >= desc.size_a);
2373 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2376 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2378 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2380 KMP_DEBUG_ASSERT(used >= desc.size_a);
2382 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2384 #if KMP_HWLOC_ENABLED
2393 struct kmp_mem_descr {
2394 void *ptr_allocated;
2395 size_t size_allocated;
2397 size_t size_aligned;
2399 typedef struct kmp_mem_descr kmp_mem_descr_t;
2404 static void *___kmp_allocate_align(
size_t size,
2405 size_t alignment KMP_SRC_LOC_DECL) {
2422 kmp_mem_descr_t descr;
2423 kmp_uintptr_t addr_allocated;
2424 kmp_uintptr_t addr_aligned;
2425 kmp_uintptr_t addr_descr;
2427 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2428 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
2430 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
2431 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
2434 descr.size_aligned = size;
2435 descr.size_allocated =
2436 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
2439 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2441 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2443 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
2444 descr.ptr_allocated));
2445 if (descr.ptr_allocated == NULL) {
2446 KMP_FATAL(OutOfHeapMemory);
2449 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2451 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2452 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
2454 descr.ptr_aligned = (
void *)addr_aligned;
2456 KE_TRACE(26, (
" ___kmp_allocate_align: "
2457 "ptr_allocated=%p, size_allocated=%d, "
2458 "ptr_aligned=%p, size_aligned=%d\n",
2459 descr.ptr_allocated, (
int)descr.size_allocated,
2460 descr.ptr_aligned, (
int)descr.size_aligned));
2462 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2463 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
2464 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2465 addr_allocated + descr.size_allocated);
2466 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2468 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2471 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2477 *((kmp_mem_descr_t *)addr_descr) = descr;
2481 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2482 return descr.ptr_aligned;
2489 void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
2491 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2492 (
int)size KMP_SRC_LOC_PARM));
2493 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2494 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2502 void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
2503 int page_size = 8 * 1024;
2506 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2507 (
int)size KMP_SRC_LOC_PARM));
2508 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2509 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
2515 void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
2516 kmp_mem_descr_t descr;
2518 kmp_uintptr_t addr_allocated;
2519 kmp_uintptr_t addr_aligned;
2522 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2523 KMP_ASSERT(ptr != NULL);
2525 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
2527 KE_TRACE(26, (
" __kmp_free: "
2528 "ptr_allocated=%p, size_allocated=%d, "
2529 "ptr_aligned=%p, size_aligned=%d\n",
2530 descr.ptr_allocated, (
int)descr.size_allocated,
2531 descr.ptr_aligned, (
int)descr.size_aligned));
2533 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2534 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2535 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2536 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2537 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
2538 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2539 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2540 addr_allocated + descr.size_allocated);
2541 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2546 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
2548 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2550 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2554 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2557 #if USE_FAST_MEMORY == 3
2563 #define KMP_FREE_LIST_LIMIT 16
2566 #define DCACHE_LINE 128
2568 void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
2570 size_t num_lines, idx;
2574 kmp_mem_descr_t *descr;
2576 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2577 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
2579 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2580 idx = num_lines - 1;
2581 KMP_DEBUG_ASSERT(idx >= 0);
2585 }
else if ((idx >>= 2) == 0) {
2588 }
else if ((idx >>= 2) == 0) {
2591 }
else if ((idx >>= 2) == 0) {
2598 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2601 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2602 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2603 sizeof(kmp_mem_descr_t)))
2607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2612 while (!KMP_COMPARE_AND_STORE_PTR(
2613 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2615 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2619 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2620 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2621 sizeof(kmp_mem_descr_t)))
2628 size = num_lines * DCACHE_LINE;
2630 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2631 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2633 __kmp_gtid_from_thread(this_thr), alloc_size));
2634 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2637 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
2639 ~(DCACHE_LINE - 1));
2640 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2642 descr->ptr_allocated = alloc_ptr;
2644 descr->ptr_aligned = (
void *)this_thr;
2647 descr->size_aligned = size;
2650 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2651 __kmp_gtid_from_thread(this_thr), ptr));
2657 void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
2658 kmp_mem_descr_t *descr;
2659 kmp_info_t *alloc_thr;
2664 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2665 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2666 KMP_ASSERT(ptr != NULL);
2668 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2670 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2671 (
int)descr->size_aligned));
2673 size = descr->size_aligned;
2675 idx = DCACHE_LINE * 2;
2678 }
else if ((idx <<= 1) == size) {
2680 }
else if ((idx <<= 2) == size) {
2682 }
else if ((idx <<= 2) == size) {
2685 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2689 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
2690 if (alloc_thr == this_thr) {
2692 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2693 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2695 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2698 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2699 *((
void **)ptr) = NULL;
2700 descr->size_allocated = (size_t)1;
2703 kmp_mem_descr_t *dsc =
2704 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
2706 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2708 dsc->size_allocated + 1;
2709 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2711 *((
void **)ptr) = head;
2712 descr->size_allocated = q_sz;
2713 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2720 void *next = *((
void **)head);
2721 while (next != NULL) {
2724 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
2727 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
2730 next = *((
void **)next);
2732 KMP_DEBUG_ASSERT(q_th != NULL);
2734 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2737 *((
void **)tail) = old_ptr;
2739 while (!KMP_COMPARE_AND_STORE_PTR(
2740 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2742 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2743 *((
void **)tail) = old_ptr;
2747 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2748 *((
void **)ptr) = NULL;
2749 descr->size_allocated = (size_t)1;
2756 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2757 __kmp_gtid_from_thread(this_thr), size));
2758 __kmp_bget_dequeue(this_thr);
2759 brel(this_thr, descr->ptr_allocated);
2762 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2768 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2769 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2771 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2776 void __kmp_free_fast_memory(kmp_info_t *th) {
2779 thr_data_t *thr = get_thr_data(th);
2783 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2785 __kmp_bget_dequeue(th);
2788 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2789 bfhead_t *b = thr->freelist[bin].ql.flink;
2790 while (b != &thr->freelist[bin]) {
2791 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
2799 while (lst != NULL) {
2801 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2802 lst, next, th, __kmp_gtid_from_thread(th)));
2803 (*thr->relfcn)(lst);
2809 lst = (
void **)next;
2813 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
omp_memspace_handle_t get(int num_resources, const int *resources, omp_memspace_handle_t memspace)
kmp_memspace_t * find(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Find memory space that matches the provided input.
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
Memory allocator information is shared with offload runtime.
Memory space informaition is shared with offload runtime.