15#include "kmp_wrapper_malloc.h"
18#if HWLOC_API_VERSION > 0x00020300
19#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20#elif HWLOC_API_VERSION == 0x00020300
21#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
24enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
36typedef int (*bget_compact_t)(size_t, int);
37typedef void *(*bget_acquire_t)(size_t);
38typedef void (*bget_release_t)(
void *);
43#if KMP_ARCH_X86 || KMP_ARCH_ARM
44typedef kmp_int32 bufsize;
46typedef kmp_int64 bufsize;
49typedef ssize_t bufsize;
54typedef enum bget_mode {
60static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
61static void *bget(kmp_info_t *th, bufsize size);
62static void *bgetz(kmp_info_t *th, bufsize size);
63static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
64static void brel(kmp_info_t *th,
void *buf);
65static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
76#if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD
79#define AlignType double
84#define AlignType _Quad
120static bufsize bget_bin_size[] = {
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
138#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
145typedef struct qlinks {
146 struct bfhead *flink;
147 struct bfhead *blink;
151typedef struct bhead2 {
162 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
165#define BH(p) ((bhead_t *)(p))
168typedef struct bdhead {
172#define BDH(p) ((bdhead_t *)(p))
175typedef struct bfhead {
179#define BFH(p) ((bfhead_t *)(p))
181typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
187 long numpget, numprel;
188 long numdget, numdrel;
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
207#define QLSize (sizeof(qlinks_t))
208#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
222static int bget_get_bin(bufsize size) {
224 int lo = 0, hi = MAX_BGET_BINS - 1;
226 KMP_DEBUG_ASSERT(size > 0);
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
241static void set_thr_data(kmp_info_t *th) {
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(
sizeof(*data))
247 : th->th.th_local.bget_data);
249 memset(data,
'\0',
sizeof(*data));
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258#if !USE_CMP_XCHG_FOR_BGET
259#ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
267static thr_data_t *get_thr_data(kmp_info_t *th) {
270 data = (thr_data_t *)th->th.th_local.bget_data;
272 KMP_DEBUG_ASSERT(data != 0);
278static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
282#if USE_CMP_XCHG_FOR_BGET
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(
void *, old_value),
nullptr)) {
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
290 p = CCAST(
void *, old_value);
293#ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
299 p = (
void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
302#ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
312 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
319 p = (
void *)b->ql.flink;
327static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
328#ifdef USE_QUEUING_LOCK_FOR_BGET
333 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
341 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
342 __kmp_gtid_from_thread(th)));
344#if USE_CMP_XCHG_FOR_BGET
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
349 b->ql.flink = BFH(CCAST(
void *, old_value));
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(
void *, old_value), buf)) {
354 old_value = TCR_PTR(th->th.th_local.bget_list);
357 b->ql.flink = BFH(CCAST(
void *, old_value));
361#ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (
void *)buf;
370#ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
379static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
382 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
385 bin = bget_get_bin(b->bh.bb.bsize);
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
400static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
409static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
413 *total_free = *max_free = 0;
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
418 best = &thr->freelist[bin];
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
434 if (*max_free > (bufsize)
sizeof(bhead_t))
435 *max_free -=
sizeof(bhead_t);
439static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
449 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
453 __kmp_bget_dequeue(th);
455 if (size < (bufsize)SizeQ) {
458#if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
462 size +=
sizeof(bhead_t);
463 KMP_DEBUG_ASSERT(size >= 0);
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
466 use_blink = (thr->mode == bget_mode_lifo);
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
494 b = (use_blink ? b->ql.blink : b->ql.flink);
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
513 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((
char *)ba) + size);
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
519 b->bh.bb.bsize -= (bufsize)size;
522 ba->bb.prevfree = b->bh.bb.bsize;
525 ba->bb.bsize = -size;
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
537 thr->totalloc += (size_t)size;
540 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
546 ba = BH(((
char *)b) + b->bh.bb.bsize);
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
553 __kmp_bget_remove_from_freelist(b);
555 thr->totalloc += (size_t)b->bh.bb.bsize;
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
562 TCW_PTR(ba->bb.bthr, th);
568 buf = (
void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
575 b = (use_blink ? b->ql.blink : b->ql.flink);
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
597 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
599 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
606 bdh->bh.bb.bsize = 0;
609 TCW_PTR(bdh->bh.bb.bthr, th);
611 bdh->bh.bb.prevfree = 0;
614 thr->totalloc += (size_t)size;
618 buf = (
void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
628 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
651static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (
char *)bget(th, size);
658 b = BH(buf -
sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
663 bd = BDH(buf -
sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
666 rsize -=
sizeof(bhead_t);
669 KMP_DEBUG_ASSERT(rsize >= size);
671 (void)memset(buf, 0, (bufsize)rsize);
673 return ((
void *)buf);
681static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
686 nbuf = bget(th, size);
693 b = BH(((
char *)buf) -
sizeof(bhead_t));
694 osize = -b->bb.bsize;
699 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
702 osize -=
sizeof(bhead_t);
705 KMP_DEBUG_ASSERT(osize > 0);
707 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
708 (
size_t)((size < osize) ? size : osize));
715static void brel(kmp_info_t *th,
void *buf) {
716 thr_data_t *thr = get_thr_data(th);
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
723 b = BFH(((
char *)buf) -
sizeof(bhead_t));
725 if (b->bh.bb.bsize == 0) {
728 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
731 thr->totalloc -= (size_t)bdh->tsize;
736 (void)memset((
char *)buf, 0x55, (size_t)(bdh->tsize -
sizeof(bdhead_t)));
739 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((
void *)bdh);
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
750 __kmp_bget_enqueue(bth, buf
751#ifdef USE_QUEUING_LOCK_FOR_BGET
753 __kmp_gtid_from_thread(th)
760 if (b->bh.bb.bsize >= 0) {
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
767 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
771 thr->totalloc += (size_t)b->bh.bb.bsize;
776 if (b->bh.bb.prevfree != 0) {
781 bufsize size = b->bh.bb.bsize;
784 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
786 b = BFH(((
char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
790 __kmp_bget_remove_from_freelist(b);
795 b->bh.bb.bsize = -b->bh.bb.bsize;
799 __kmp_bget_insert_into_freelist(thr, b);
805 bn = BFH(((
char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
810 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
813 __kmp_bget_remove_from_freelist(bn);
815 b->bh.bb.bsize += bn->bh.bb.bsize;
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
828 bn = BFH(((
char *)b) + b->bh.bb.bsize);
831 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
832 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
858 __kmp_bget_remove_from_freelist(b);
860 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
869 if (thr->last_pool == b)
879static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
882 thr_data_t *thr = get_thr_data(th);
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
891static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
897 __kmp_bget_dequeue(th);
900 len &= ~((bufsize)(SizeQuant - 1));
902 if (thr->pool_len == 0) {
904 }
else if (len != thr->pool_len) {
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
916 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
921 b->bh.bb.prevfree = 0;
930 len -=
sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
938 __kmp_bget_insert_into_freelist(thr, b);
941 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
942 (
size_t)(len -
sizeof(bfhead_t)));
944 bn = BH(((
char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
949 bn->bb.bsize = ESent;
953static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(th);
956 thr_data_t *thr = get_thr_data(th);
959 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC
"\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
975 bufsize bs = b->bh.bb.bsize;
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
983 __kmp_printf_no_lock(
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
988 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
989 if ((bs >
sizeof(bfhead_t)) &&
991 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
993 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
1003 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
1006void __kmp_initialize_bget(kmp_info_t *th) {
1007 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012 (bufsize)__kmp_malloc_pool_incr);
1015void __kmp_finalize_bget(kmp_info_t *th) {
1019 KMP_DEBUG_ASSERT(th != 0);
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1039 __kmp_bget_remove_from_freelist(b);
1041 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1057void kmpc_set_poolsize(
size_t size) {
1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059 (bget_release_t)free, (bufsize)size);
1062size_t kmpc_get_poolsize(
void) {
1065 p = get_thr_data(__kmp_get_thread());
1070void kmpc_set_poolmode(
int mode) {
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1080int kmpc_get_poolmode(
void) {
1083 p = get_thr_data(__kmp_get_thread());
1088void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1089 kmp_info_t *th = __kmp_get_thread();
1092 __kmp_bget_dequeue(th);
1100void kmpc_poolprint(
void) {
1101 kmp_info_t *th = __kmp_get_thread();
1103 __kmp_bget_dequeue(th);
1110void *kmpc_malloc(
size_t size) {
1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1115 *(
void **)ptr = ptr;
1116 ptr = (
void **)ptr + 1;
1121#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1123void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1127 if (!IS_POWER_OF_TWO(alignment)) {
1132 size = size +
sizeof(
void *) + alignment;
1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134 if (ptr_allocated != NULL) {
1136 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1138 *((
void **)ptr - 1) = ptr_allocated;
1145void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1150 *(
void **)ptr = ptr;
1151 ptr = (
void **)ptr + 1;
1156void *kmpc_realloc(
void *ptr,
size_t size) {
1157 void *result = NULL;
1160 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1162 if (result != NULL) {
1163 *(
void **)result = result;
1164 result = (
void **)result + 1;
1166 }
else if (size == 0) {
1172 KMP_ASSERT(*((
void **)ptr - 1));
1173 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1175 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1176 (bufsize)(size +
sizeof(ptr)));
1177 if (result != NULL) {
1178 *(
void **)result = result;
1179 result = (
void **)result + 1;
1186void kmpc_free(
void *ptr) {
1187 if (!__kmp_init_serial) {
1191 kmp_info_t *th = __kmp_get_thread();
1192 __kmp_bget_dequeue(th);
1194 KMP_ASSERT(*((
void **)ptr - 1));
1195 brel(th, *((
void **)ptr - 1));
1199void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1201 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (
int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, (bufsize)size);
1204 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1208void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1211 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1218void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (
int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, ptr, (bufsize)size);
1223 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1227void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1228 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1231 __kmp_bget_dequeue(th);
1234 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1240static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1242static void (*kmp_mk_free)(
void *kind,
void *ptr);
1244static void **mk_default;
1245static void **mk_interleave;
1246static void **mk_hbw_interleave;
1247static void **mk_hbw_preferred;
1248static void **mk_dax_kmem;
1249static void **mk_dax_kmem_all;
1250#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1251static const char *kmp_mk_lib_name;
1252static void *h_memkind;
1254static int (*kmp_mk_check)(
void *kind);
1255static void **mk_hbw;
1256static void **mk_hugetlb;
1257static void **mk_hbw_hugetlb;
1258static void **mk_hbw_preferred_hugetlb;
1259static void **mk_dax_kmem_preferred;
1261static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1262static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1263static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1264static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1265static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1266static void *(*kmp_target_free_host)(
void *ptr,
int device);
1267static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1268static void *(*kmp_target_free_device)(
void *ptr,
int device);
1269static bool __kmp_target_mem_available;
1271#define KMP_IS_TARGET_MEM_SPACE(MS) \
1272 (MS == llvm_omp_target_host_mem_space || \
1273 MS == llvm_omp_target_shared_mem_space || \
1274 MS == llvm_omp_target_device_mem_space)
1276#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1277 (MA == llvm_omp_target_host_mem_alloc || \
1278 MA == llvm_omp_target_shared_mem_alloc || \
1279 MA == llvm_omp_target_device_mem_alloc)
1281#define KMP_IS_PREDEF_MEM_SPACE(MS) \
1282 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1283 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1284 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1285 KMP_IS_TARGET_MEM_SPACE(MS))
1304 bool supported =
false;
1305 using get_mem_resources_t = int (*)(int,
const int *, int,
1306 omp_memspace_handle_t,
int *);
1307 using omp_alloc_t =
void *(*)(
size_t, omp_allocator_handle_t);
1308 using omp_free_t = void (*)(
void *, omp_allocator_handle_t);
1309 get_mem_resources_t tgt_get_mem_resources =
nullptr;
1310 omp_alloc_t tgt_omp_alloc =
nullptr;
1311 omp_free_t tgt_omp_free =
nullptr;
1316 tgt_get_mem_resources =
1317 (get_mem_resources_t)KMP_DLSYM(
"__tgt_get_mem_resources");
1318 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM(
"__tgt_omp_alloc");
1319 tgt_omp_free = (omp_free_t)KMP_DLSYM(
"__tgt_omp_free");
1320 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1325 omp_memspace_handle_t memspace,
int *resources) {
1327 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1331 void *
omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
1333 return tgt_omp_alloc(size, allocator);
1337 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
1339 tgt_omp_free(ptr, allocator);
1341} __kmp_tgt_allocator;
1343extern "C" int omp_get_num_devices(
void);
1353 omp_memspace_handle_t memspace) {
1356 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1357 !memcmp(ms->resources, resources,
sizeof(
int) * num_resources))
1366 omp_memspace_handle_t
get(
int num_resources,
const int *resources,
1367 omp_memspace_handle_t memspace) {
1368 int gtid = __kmp_entry_gtid();
1369 __kmp_acquire_lock(&mtx, gtid);
1371 int *sorted_resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1372 KMP_MEMCPY(sorted_resources, resources, num_resources *
sizeof(
int));
1373 qsort(sorted_resources, (
size_t)num_resources,
sizeof(
int),
1374 [](
const void *a,
const void *b) {
1375 const int val_a = *(
const int *)a;
1376 const int val_b = *(
const int *)b;
1377 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1381 __kmp_free(sorted_resources);
1382 __kmp_release_lock(&mtx, gtid);
1386 ms->memspace = memspace;
1387 ms->num_resources = num_resources;
1388 ms->resources = sorted_resources;
1389 ms->next = memspace_list;
1391 __kmp_release_lock(&mtx, gtid);
1397 void init() { __kmp_init_lock(&mtx); }
1403 __kmp_free(ms->resources);
1408 __kmp_destroy_lock(&mtx);
1413 omp_memspace_handle_t memspace) {
1414 int actual_num_devices = num_devices;
1415 int *actual_devices =
const_cast<int *
>(devices);
1416 if (actual_num_devices == 0) {
1417 actual_num_devices = omp_get_num_devices();
1418 if (actual_num_devices <= 0)
1419 return omp_null_mem_space;
1421 if (actual_devices == NULL) {
1423 actual_devices = (
int *)__kmp_allocate(
sizeof(
int) * actual_num_devices);
1424 for (
int i = 0; i < actual_num_devices; i++)
1425 actual_devices[i] = i;
1429 actual_num_devices, actual_devices, host_access, memspace, NULL);
1430 if (num_resources <= 0)
1431 return omp_null_mem_space;
1433 omp_memspace_handle_t ms = omp_null_mem_space;
1434 if (num_resources > 0) {
1435 int *resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1438 actual_num_devices, actual_devices, host_access, memspace, resources);
1439 ms =
get(num_resources, resources, memspace);
1440 __kmp_free(resources);
1442 if (!devices && actual_devices)
1443 __kmp_free(actual_devices);
1447 omp_memspace_handle_t
get_memspace(
int num_resources,
const int *resources,
1448 omp_memspace_handle_t parent) {
1450 return get(num_resources, resources, ms->memspace);
1452} __kmp_tgt_memspace_list;
1454#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1455static inline void chk_kind(
void ***pkind) {
1456 KMP_DEBUG_ASSERT(pkind);
1458 if (kmp_mk_check(**pkind))
1463void __kmp_init_memkind() {
1465#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1467 kmp_mk_lib_name =
"libmemkind.so";
1468 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1470 kmp_mk_check = (int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1472 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1473 kmp_mk_free = (void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1474 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1475 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1476 !kmp_mk_check(*mk_default)) {
1477 __kmp_memkind_available = 1;
1478 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1479 chk_kind(&mk_interleave);
1480 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1482 mk_hbw_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_INTERLEAVE");
1483 chk_kind(&mk_hbw_interleave);
1484 mk_hbw_preferred = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED");
1485 chk_kind(&mk_hbw_preferred);
1486 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1487 chk_kind(&mk_hugetlb);
1488 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1489 chk_kind(&mk_hbw_hugetlb);
1490 mk_hbw_preferred_hugetlb =
1491 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1492 chk_kind(&mk_hbw_preferred_hugetlb);
1493 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1494 chk_kind(&mk_dax_kmem);
1495 mk_dax_kmem_all = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_ALL");
1496 chk_kind(&mk_dax_kmem_all);
1497 mk_dax_kmem_preferred =
1498 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1499 chk_kind(&mk_dax_kmem_preferred);
1500 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1506 kmp_mk_check = NULL;
1509 mk_hbw_hugetlb = NULL;
1510 mk_hbw_preferred_hugetlb = NULL;
1511 mk_dax_kmem_preferred = NULL;
1512 kmp_mk_lib_name =
"";
1514 kmp_mk_alloc = NULL;
1517 mk_interleave = NULL;
1518 mk_hbw_interleave = NULL;
1519 mk_hbw_preferred = NULL;
1521 mk_dax_kmem_all = NULL;
1524void __kmp_fini_memkind() {
1525#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1526 if (__kmp_memkind_available)
1527 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1532 kmp_mk_check = NULL;
1535 mk_hbw_hugetlb = NULL;
1536 mk_hbw_preferred_hugetlb = NULL;
1537 mk_dax_kmem_preferred = NULL;
1539#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1540 kmp_mk_alloc = NULL;
1543 mk_interleave = NULL;
1544 mk_hbw_interleave = NULL;
1545 mk_hbw_preferred = NULL;
1547 mk_dax_kmem_all = NULL;
1551#if KMP_HWLOC_ENABLED
1552static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1553#if HWLOC_API_VERSION >= 0x00020300
1554 const hwloc_topology_support *support;
1555 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1557 if (policy == HWLOC_MEMBIND_BIND)
1558 return (support->membind->alloc_membind &&
1559 support->membind->bind_membind);
1560 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1561 return (support->membind->alloc_membind &&
1562 support->membind->interleave_membind);
1570void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr,
size_t size,
1571 hwloc_membind_policy_t policy) {
1572#if HWLOC_API_VERSION >= 0x00020300
1575 struct hwloc_location initiator;
1581 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1582 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1584 hwloc_bitmap_free(mask);
1587 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1588 initiator.location.cpuset = mask;
1589 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1594 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1595 HWLOC_MEMBIND_BYNODESET);
1601void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms,
size_t size,
1602 hwloc_membind_policy_t policy) {
1603#if HWLOC_API_VERSION >= 0x00020300
1605 if (ms == omp_high_bw_mem_space) {
1606 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1607 }
else if (ms == omp_large_cap_mem_space) {
1608 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1610 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1619void __kmp_init_target_mem() {
1620 *(
void **)(&kmp_target_alloc_host) = KMP_DLSYM(
"llvm_omp_target_alloc_host");
1621 *(
void **)(&kmp_target_alloc_shared) =
1622 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1623 *(
void **)(&kmp_target_alloc_device) =
1624 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1625 *(
void **)(&kmp_target_free_host) = KMP_DLSYM(
"llvm_omp_target_free_host");
1626 *(
void **)(&kmp_target_free_shared) =
1627 KMP_DLSYM(
"llvm_omp_target_free_shared");
1628 *(
void **)(&kmp_target_free_device) =
1629 KMP_DLSYM(
"llvm_omp_target_free_device");
1630 __kmp_target_mem_available =
1631 kmp_target_alloc_host && kmp_target_alloc_shared &&
1632 kmp_target_alloc_device && kmp_target_free_host &&
1633 kmp_target_free_shared && kmp_target_free_device;
1635 *(
void **)(&kmp_target_lock_mem) = KMP_DLSYM(
"llvm_omp_target_lock_mem");
1636 *(
void **)(&kmp_target_unlock_mem) = KMP_DLSYM(
"llvm_omp_target_unlock_mem");
1637 __kmp_tgt_allocator.
init();
1638 __kmp_tgt_memspace_list.
init();
1642void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.
fini(); }
1644omp_allocator_handle_t __kmpc_init_allocator(
int gtid, omp_memspace_handle_t ms,
1646 omp_alloctrait_t traits[]) {
1655 al->partition = omp_atv_environment;
1656 al->pin_device = -1;
1657 al->preferred_device = -1;
1658 al->target_access = omp_atv_single;
1659 al->atomic_scope = omp_atv_device;
1661 for (i = 0; i < ntraits; ++i) {
1662 switch (traits[i].key) {
1663 case omp_atk_sync_hint:
1664 case omp_atk_access:
1666 case omp_atk_pinned:
1669 case omp_atk_alignment:
1670 __kmp_type_convert(traits[i].value, &(al->alignment));
1671 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1673 case omp_atk_pool_size:
1674 al->pool_size = traits[i].value;
1676 case omp_atk_fallback:
1677 al->fb = (omp_alloctrait_value_t)traits[i].value;
1679 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1680 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1682 case omp_atk_fb_data:
1685 case omp_atk_partition:
1686#if KMP_HWLOC_ENABLED
1687 al->membind = (omp_alloctrait_value_t)traits[i].value;
1688 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1689 al->membind == omp_atv_nearest ||
1690 al->membind == omp_atv_blocked ||
1691 al->membind == omp_atv_interleaved);
1693 al->memkind = RCAST(
void **, traits[i].value);
1695 case omp_atk_pin_device:
1696 __kmp_type_convert(traits[i].value, &(al->pin_device));
1698 case omp_atk_preferred_device:
1699 __kmp_type_convert(traits[i].value, &(al->preferred_device));
1701 case omp_atk_target_access:
1702 al->target_access = (omp_alloctrait_value_t)traits[i].value;
1704 case omp_atk_atomic_scope:
1705 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1707 case omp_atk_part_size:
1708 __kmp_type_convert(traits[i].value, &(al->part_size));
1711 KMP_ASSERT2(0,
"Unexpected allocator trait");
1715 if (al->memspace > kmp_max_mem_space) {
1717 return (omp_allocator_handle_t)al;
1720 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1724 al->fb = omp_atv_default_mem_fb;
1726 }
else if (al->fb == omp_atv_allocator_fb) {
1727 KMP_ASSERT(al->fb_data != NULL);
1728 }
else if (al->fb == omp_atv_default_mem_fb) {
1731 if (__kmp_memkind_available) {
1733 if (ms == omp_high_bw_mem_space) {
1734 if (al->memkind == (
void *)omp_atv_interleaved && mk_hbw_interleave) {
1735 al->memkind = mk_hbw_interleave;
1736 }
else if (mk_hbw_preferred) {
1742 al->memkind = mk_hbw_preferred;
1746 return omp_null_allocator;
1748 }
else if (ms == omp_large_cap_mem_space) {
1749 if (mk_dax_kmem_all) {
1751 al->memkind = mk_dax_kmem_all;
1752 }
else if (mk_dax_kmem) {
1754 al->memkind = mk_dax_kmem;
1757 return omp_null_allocator;
1760 if (al->memkind == (
void *)omp_atv_interleaved && mk_interleave) {
1761 al->memkind = mk_interleave;
1763 al->memkind = mk_default;
1766 }
else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1768 return omp_null_allocator;
1770 if (!__kmp_hwloc_available &&
1771 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1774 return omp_null_allocator;
1777 return (omp_allocator_handle_t)al;
1780void __kmpc_destroy_allocator(
int gtid, omp_allocator_handle_t allocator) {
1781 if (allocator > kmp_max_mem_alloc)
1782 __kmp_free(allocator);
1785void __kmpc_set_default_allocator(
int gtid, omp_allocator_handle_t allocator) {
1786 if (allocator == omp_null_allocator)
1787 allocator = omp_default_mem_alloc;
1788 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1791omp_allocator_handle_t __kmpc_get_default_allocator(
int gtid) {
1792 return __kmp_threads[gtid]->th.th_def_allocator;
1795omp_memspace_handle_t __kmp_get_devices_memspace(
int ndevs,
const int *devs,
1796 omp_memspace_handle_t memspace,
1798 if (!__kmp_init_serial)
1799 __kmp_serial_initialize();
1801 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1802 return omp_null_mem_space;
1804 return __kmp_tgt_memspace_list.
get_memspace(ndevs, devs, host, memspace);
1807omp_allocator_handle_t
1808__kmp_get_devices_allocator(
int ndevs,
const int *devs,
1809 omp_memspace_handle_t memspace,
int host) {
1810 if (!__kmp_init_serial)
1811 __kmp_serial_initialize();
1813 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1814 return omp_null_allocator;
1816 omp_memspace_handle_t mspace =
1817 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1818 if (mspace == omp_null_mem_space)
1819 return omp_null_allocator;
1821 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1824int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1825 if (!__kmp_init_serial)
1826 __kmp_serial_initialize();
1827 if (memspace == omp_null_mem_space)
1829 if (memspace < kmp_max_mem_space)
1832 return ms->num_resources;
1835omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1836 int num_resources,
int *resources) {
1837 if (!__kmp_init_serial)
1838 __kmp_serial_initialize();
1839 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1842 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1843 return omp_null_mem_space;
1847 int *resources_abs = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1850 for (
int i = 0; i < num_resources; i++)
1851 resources_abs[i] = ms->resources[resources[i]];
1853 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.
get_memspace(
1854 num_resources, resources_abs, memspace);
1855 __kmp_free(resources_abs);
1860typedef struct kmp_mem_desc {
1867constexpr size_t alignment = SizeQuant;
1870void *__kmpc_alloc(
int gtid,
size_t size, omp_allocator_handle_t allocator) {
1871 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1872 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1873 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1877void *__kmpc_aligned_alloc(
int gtid,
size_t algn,
size_t size,
1878 omp_allocator_handle_t allocator) {
1879 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1880 (
int)size, allocator));
1881 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1882 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1886void *__kmpc_calloc(
int gtid,
size_t nmemb,
size_t size,
1887 omp_allocator_handle_t allocator) {
1888 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1889 (
int)size, allocator));
1890 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1891 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1895void *__kmpc_realloc(
int gtid,
void *ptr,
size_t size,
1896 omp_allocator_handle_t allocator,
1897 omp_allocator_handle_t free_allocator) {
1898 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)size,
1899 allocator, free_allocator));
1900 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1901 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1905void __kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1906 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1907 ___kmpc_free(gtid, ptr, allocator);
1908 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1913void *__kmp_alloc(
int gtid,
size_t algn,
size_t size,
1914 omp_allocator_handle_t allocator) {
1917 KMP_DEBUG_ASSERT(__kmp_init_serial);
1920 if (allocator == omp_null_allocator)
1921 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1922 kmp_int32 default_device =
1923 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1927 int sz_desc =
sizeof(kmp_mem_desc_t);
1928 kmp_mem_desc_t desc;
1930 kmp_uintptr_t addr_align;
1931 kmp_uintptr_t addr_descr;
1932 size_t align = alignment;
1933 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1934 align = al->alignment;
1937 desc.size_orig = size;
1938 desc.size_a = size + sz_desc + align;
1939 bool is_pinned =
false;
1940 if (allocator > kmp_max_mem_alloc)
1941 is_pinned = al->pinned;
1944 int use_default_allocator =
1945 (!__kmp_hwloc_available && !__kmp_memkind_available);
1947 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1949 return __kmp_tgt_allocator.
omp_alloc(size, allocator);
1952 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1955 if (__kmp_target_mem_available) {
1957 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1958 if (allocator == llvm_omp_target_host_mem_alloc)
1959 ptr = kmp_target_alloc_host(size, device);
1960 else if (allocator == llvm_omp_target_shared_mem_alloc)
1961 ptr = kmp_target_alloc_shared(size, device);
1963 ptr = kmp_target_alloc_device(size, device);
1966 KMP_INFORM(TargetMemNotAvailable);
1970 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1971 if (__kmp_target_mem_available) {
1973 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1974 if (al->memspace == llvm_omp_target_host_mem_space)
1975 ptr = kmp_target_alloc_host(size, device);
1976 else if (al->memspace == llvm_omp_target_shared_mem_space)
1977 ptr = kmp_target_alloc_shared(size, device);
1979 ptr = kmp_target_alloc_device(size, device);
1982 KMP_INFORM(TargetMemNotAvailable);
1986#if KMP_HWLOC_ENABLED
1987 if (__kmp_hwloc_available) {
1988 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1989 if (allocator < kmp_max_mem_alloc) {
1991 if (allocator == omp_high_bw_mem_alloc) {
1992 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1993 desc.size_a, HWLOC_MEMBIND_BIND);
1995 use_default_allocator =
true;
1996 }
else if (allocator == omp_large_cap_mem_alloc) {
1997 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1998 desc.size_a, HWLOC_MEMBIND_BIND);
2000 use_default_allocator =
true;
2002 use_default_allocator =
true;
2004 if (use_default_allocator) {
2005 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2007 }
else if (al->pool_size > 0) {
2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2011 if (used + desc.size_a > al->pool_size) {
2013 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2014 if (al->fb == omp_atv_default_mem_fb) {
2016 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2017 }
else if (al->fb == omp_atv_abort_fb) {
2019 }
else if (al->fb == omp_atv_allocator_fb) {
2020 KMP_ASSERT(al != al->fb_data);
2022 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2026 if (al->membind == omp_atv_interleaved) {
2027 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2028 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2029 HWLOC_MEMBIND_INTERLEAVE);
2031 }
else if (al->membind == omp_atv_environment) {
2032 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2033 HWLOC_MEMBIND_DEFAULT);
2035 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038 if (al->fb == omp_atv_default_mem_fb) {
2040 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2041 }
else if (al->fb == omp_atv_abort_fb) {
2043 }
else if (al->fb == omp_atv_allocator_fb) {
2044 KMP_ASSERT(al != al->fb_data);
2046 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2052 if (al->membind == omp_atv_interleaved) {
2053 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2054 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2055 HWLOC_MEMBIND_INTERLEAVE);
2057 }
else if (al->membind == omp_atv_environment) {
2058 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2059 HWLOC_MEMBIND_DEFAULT);
2061 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064 if (al->fb == omp_atv_default_mem_fb) {
2066 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2067 }
else if (al->fb == omp_atv_abort_fb) {
2069 }
else if (al->fb == omp_atv_allocator_fb) {
2070 KMP_ASSERT(al != al->fb_data);
2072 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2077 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2081 if (__kmp_memkind_available) {
2082 if (allocator < kmp_max_mem_alloc) {
2084 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2085 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2086 }
else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2087 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2089 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2091 }
else if (al->pool_size > 0) {
2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2095 if (used + desc.size_a > al->pool_size) {
2097 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2098 if (al->fb == omp_atv_default_mem_fb) {
2100 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2101 }
else if (al->fb == omp_atv_abort_fb) {
2103 }
else if (al->fb == omp_atv_allocator_fb) {
2104 KMP_ASSERT(al != al->fb_data);
2106 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2107 if (is_pinned && kmp_target_lock_mem)
2108 kmp_target_lock_mem(ptr, size, default_device);
2113 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2115 if (al->fb == omp_atv_default_mem_fb) {
2117 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2118 }
else if (al->fb == omp_atv_abort_fb) {
2120 }
else if (al->fb == omp_atv_allocator_fb) {
2121 KMP_ASSERT(al != al->fb_data);
2123 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2124 if (is_pinned && kmp_target_lock_mem)
2125 kmp_target_lock_mem(ptr, size, default_device);
2132 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2134 if (al->fb == omp_atv_default_mem_fb) {
2136 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2137 }
else if (al->fb == omp_atv_abort_fb) {
2139 }
else if (al->fb == omp_atv_allocator_fb) {
2140 KMP_ASSERT(al != al->fb_data);
2142 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2143 if (is_pinned && kmp_target_lock_mem)
2144 kmp_target_lock_mem(ptr, size, default_device);
2149 }
else if (allocator < kmp_max_mem_alloc) {
2151 if (allocator == omp_high_bw_mem_alloc) {
2152 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
2153 }
else if (allocator == omp_large_cap_mem_alloc) {
2154 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
2155 }
else if (allocator == omp_const_mem_alloc) {
2156 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
2157 }
else if (allocator == omp_low_lat_mem_alloc) {
2158 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
2159 }
else if (allocator == omp_cgroup_mem_alloc) {
2160 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
2161 }
else if (allocator == omp_pteam_mem_alloc) {
2162 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
2163 }
else if (allocator == omp_thread_mem_alloc) {
2164 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
2166 use_default_allocator =
true;
2168 if (use_default_allocator) {
2169 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2170 use_default_allocator =
false;
2172 }
else if (al->pool_size > 0) {
2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2176 if (used + desc.size_a > al->pool_size) {
2178 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2179 if (al->fb == omp_atv_default_mem_fb) {
2181 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2182 }
else if (al->fb == omp_atv_abort_fb) {
2184 }
else if (al->fb == omp_atv_allocator_fb) {
2185 KMP_ASSERT(al != al->fb_data);
2187 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2188 if (is_pinned && kmp_target_lock_mem)
2189 kmp_target_lock_mem(ptr, size, default_device);
2194 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2195 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2202 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2203 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2207#if KMP_HWLOC_ENABLED
2210 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2214 if (is_pinned && kmp_target_lock_mem)
2215 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2217 addr = (kmp_uintptr_t)ptr;
2218 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2219 addr_descr = addr_align - sz_desc;
2221 desc.ptr_alloc = ptr;
2222 desc.ptr_align = (
void *)addr_align;
2223 desc.allocator = al;
2224 *((kmp_mem_desc_t *)addr_descr) = desc;
2227 return desc.ptr_align;
2230void *__kmp_calloc(
int gtid,
size_t algn,
size_t nmemb,
size_t size,
2231 omp_allocator_handle_t allocator) {
2234 KMP_DEBUG_ASSERT(__kmp_init_serial);
2236 if (allocator == omp_null_allocator)
2237 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2241 if (nmemb == 0 || size == 0)
2244 if ((SIZE_MAX -
sizeof(kmp_mem_desc_t)) / size < nmemb) {
2245 if (al->fb == omp_atv_abort_fb) {
2251 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2254 memset(ptr, 0x00, nmemb * size);
2259void *__kmp_realloc(
int gtid,
void *ptr,
size_t size,
2260 omp_allocator_handle_t allocator,
2261 omp_allocator_handle_t free_allocator) {
2263 KMP_DEBUG_ASSERT(__kmp_init_serial);
2267 ___kmpc_free(gtid, ptr, free_allocator);
2271 nptr = __kmp_alloc(gtid, 0, size, allocator);
2273 if (nptr != NULL && ptr != NULL) {
2274 kmp_mem_desc_t desc;
2275 kmp_uintptr_t addr_align;
2276 kmp_uintptr_t addr_descr;
2278 addr_align = (kmp_uintptr_t)ptr;
2279 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2280 desc = *((kmp_mem_desc_t *)addr_descr);
2282 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2283 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2284 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2285 KMP_MEMCPY((
char *)nptr, (
char *)ptr,
2286 (
size_t)((size < desc.size_orig) ? size : desc.size_orig));
2290 ___kmpc_free(gtid, ptr, free_allocator);
2296void ___kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
2301 omp_allocator_handle_t oal;
2302 al = RCAST(
kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2303 kmp_mem_desc_t desc;
2304 kmp_uintptr_t addr_align;
2305 kmp_uintptr_t addr_descr;
2307 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2308 __kmp_tgt_allocator.
omp_free(ptr, allocator);
2312 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2313 (allocator > kmp_max_mem_alloc &&
2314 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2316 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2317 if (allocator == llvm_omp_target_host_mem_alloc) {
2318 kmp_target_free_host(ptr, device);
2319 }
else if (allocator == llvm_omp_target_shared_mem_alloc) {
2320 kmp_target_free_shared(ptr, device);
2321 }
else if (allocator == llvm_omp_target_device_mem_alloc) {
2322 kmp_target_free_device(ptr, device);
2327 addr_align = (kmp_uintptr_t)ptr;
2328 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2329 desc = *((kmp_mem_desc_t *)addr_descr);
2331 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2333 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2335 al = desc.allocator;
2336 oal = (omp_allocator_handle_t)al;
2337 KMP_DEBUG_ASSERT(al);
2339 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2341 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2342 kmp_target_unlock_mem(desc.ptr_alloc, device);
2345#if KMP_HWLOC_ENABLED
2346 if (__kmp_hwloc_available) {
2347 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2349 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2351 KMP_DEBUG_ASSERT(used >= desc.size_a);
2353 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2356 if (__kmp_memkind_available) {
2357 if (oal < kmp_max_mem_alloc) {
2359 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2360 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2361 }
else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2362 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2364 kmp_mk_free(*mk_default, desc.ptr_alloc);
2367 if (al->pool_size > 0) {
2369 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2371 KMP_DEBUG_ASSERT(used >= desc.size_a);
2373 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2376 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2378 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2380 KMP_DEBUG_ASSERT(used >= desc.size_a);
2382 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2384#if KMP_HWLOC_ENABLED
2393struct kmp_mem_descr {
2394 void *ptr_allocated;
2395 size_t size_allocated;
2397 size_t size_aligned;
2399typedef struct kmp_mem_descr kmp_mem_descr_t;
2404static void *___kmp_allocate_align(
size_t size,
2405 size_t alignment KMP_SRC_LOC_DECL) {
2422 kmp_mem_descr_t descr;
2423 kmp_uintptr_t addr_allocated;
2424 kmp_uintptr_t addr_aligned;
2425 kmp_uintptr_t addr_descr;
2427 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2428 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
2430 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
2431 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
2434 descr.size_aligned = size;
2435 descr.size_allocated =
2436 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
2439 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2441 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2443 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
2444 descr.ptr_allocated));
2445 if (descr.ptr_allocated == NULL) {
2446 KMP_FATAL(OutOfHeapMemory);
2449 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2451 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2452 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
2454 descr.ptr_aligned = (
void *)addr_aligned;
2456 KE_TRACE(26, (
" ___kmp_allocate_align: "
2457 "ptr_allocated=%p, size_allocated=%d, "
2458 "ptr_aligned=%p, size_aligned=%d\n",
2459 descr.ptr_allocated, (
int)descr.size_allocated,
2460 descr.ptr_aligned, (
int)descr.size_aligned));
2462 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2463 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
2464 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2465 addr_allocated + descr.size_allocated);
2466 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2468 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2471 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2477 *((kmp_mem_descr_t *)addr_descr) = descr;
2481 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2482 return descr.ptr_aligned;
2489void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
2491 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2492 (
int)size KMP_SRC_LOC_PARM));
2493 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2494 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2502void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
2503 int page_size = 8 * 1024;
2506 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2507 (
int)size KMP_SRC_LOC_PARM));
2508 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2509 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
2515void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
2516 kmp_mem_descr_t descr;
2518 kmp_uintptr_t addr_allocated;
2519 kmp_uintptr_t addr_aligned;
2522 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2523 KMP_ASSERT(ptr != NULL);
2525 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
2527 KE_TRACE(26, (
" __kmp_free: "
2528 "ptr_allocated=%p, size_allocated=%d, "
2529 "ptr_aligned=%p, size_aligned=%d\n",
2530 descr.ptr_allocated, (
int)descr.size_allocated,
2531 descr.ptr_aligned, (
int)descr.size_aligned));
2533 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2534 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2535 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2536 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2537 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
2538 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2539 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2540 addr_allocated + descr.size_allocated);
2541 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2546 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
2548 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2550 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2554 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2557#if USE_FAST_MEMORY == 3
2563#define KMP_FREE_LIST_LIMIT 16
2566#define DCACHE_LINE 128
2568void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
2570 size_t num_lines, idx;
2574 kmp_mem_descr_t *descr;
2576 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2577 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
2579 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2580 idx = num_lines - 1;
2581 KMP_DEBUG_ASSERT(idx >= 0);
2585 }
else if ((idx >>= 2) == 0) {
2588 }
else if ((idx >>= 2) == 0) {
2591 }
else if ((idx >>= 2) == 0) {
2598 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2601 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2602 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2603 sizeof(kmp_mem_descr_t)))
2607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2612 while (!KMP_COMPARE_AND_STORE_PTR(
2613 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2615 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2619 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2620 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2621 sizeof(kmp_mem_descr_t)))
2628 size = num_lines * DCACHE_LINE;
2630 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2631 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2633 __kmp_gtid_from_thread(this_thr), alloc_size));
2634 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2637 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
2639 ~(DCACHE_LINE - 1));
2640 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2642 descr->ptr_allocated = alloc_ptr;
2644 descr->ptr_aligned = (
void *)this_thr;
2647 descr->size_aligned = size;
2650 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2651 __kmp_gtid_from_thread(this_thr), ptr));
2657void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
2658 kmp_mem_descr_t *descr;
2659 kmp_info_t *alloc_thr;
2664 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2665 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2666 KMP_ASSERT(ptr != NULL);
2668 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2670 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2671 (
int)descr->size_aligned));
2673 size = descr->size_aligned;
2675 idx = DCACHE_LINE * 2;
2678 }
else if ((idx <<= 1) == size) {
2680 }
else if ((idx <<= 2) == size) {
2682 }
else if ((idx <<= 2) == size) {
2685 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2689 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
2690 if (alloc_thr == this_thr) {
2692 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2693 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2695 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2698 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2699 *((
void **)ptr) = NULL;
2700 descr->size_allocated = (size_t)1;
2703 kmp_mem_descr_t *dsc =
2704 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
2706 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2708 dsc->size_allocated + 1;
2709 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2711 *((
void **)ptr) = head;
2712 descr->size_allocated = q_sz;
2713 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2720 void *next = *((
void **)head);
2721 while (next != NULL) {
2724 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
2727 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
2730 next = *((
void **)next);
2732 KMP_DEBUG_ASSERT(q_th != NULL);
2734 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2737 *((
void **)tail) = old_ptr;
2739 while (!KMP_COMPARE_AND_STORE_PTR(
2740 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2742 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2743 *((
void **)tail) = old_ptr;
2747 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2748 *((
void **)ptr) = NULL;
2749 descr->size_allocated = (size_t)1;
2756 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2757 __kmp_gtid_from_thread(this_thr), size));
2758 __kmp_bget_dequeue(this_thr);
2759 brel(this_thr, descr->ptr_allocated);
2762 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2768void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2769 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2771 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2776void __kmp_free_fast_memory(kmp_info_t *th) {
2779 thr_data_t *thr = get_thr_data(th);
2783 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2785 __kmp_bget_dequeue(th);
2788 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2789 bfhead_t *b = thr->freelist[bin].ql.flink;
2790 while (b != &thr->freelist[bin]) {
2791 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
2799 while (lst != NULL) {
2801 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2802 lst, next, th, __kmp_gtid_from_thread(th)));
2803 (*thr->relfcn)(lst);
2809 lst = (
void **)next;
2813 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
omp_memspace_handle_t get(int num_resources, const int *resources, omp_memspace_handle_t memspace)
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
kmp_memspace_t * find(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Find memory space that matches the provided input.
Memory allocator information is shared with offload runtime.
Memory space informaition is shared with offload runtime.