15#include "kmp_wrapper_malloc.h"
18#if HWLOC_API_VERSION > 0x00020300
19#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20#elif HWLOC_API_VERSION == 0x00020300
21#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
24enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
36typedef int (*bget_compact_t)(size_t, int);
37typedef void *(*bget_acquire_t)(size_t);
38typedef void (*bget_release_t)(
void *);
43#if KMP_ARCH_X86 || KMP_ARCH_ARM
44typedef kmp_int32 bufsize;
46typedef kmp_int64 bufsize;
49typedef ssize_t bufsize;
54typedef enum bget_mode {
60static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
61static void *bget(kmp_info_t *th, bufsize size);
62static void *bgetz(kmp_info_t *th, bufsize size);
63static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
64static void brel(kmp_info_t *th,
void *buf);
65static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
76#if KMP_ARCH_X86 || KMP_ARCH_SPARC || !KMP_HAVE_QUAD
79#define AlignType double
84#define AlignType _Quad
120static bufsize bget_bin_size[] = {
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
138#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
145typedef struct qlinks {
146 struct bfhead *flink;
147 struct bfhead *blink;
151typedef struct bhead2 {
162 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
165#define BH(p) ((bhead_t *)(p))
168typedef struct bdhead {
172#define BDH(p) ((bdhead_t *)(p))
175typedef struct bfhead {
179#define BFH(p) ((bfhead_t *)(p))
181typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
187 long numpget, numprel;
188 long numdget, numdrel;
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
207#define QLSize (sizeof(qlinks_t))
208#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
222static int bget_get_bin(bufsize size) {
224 int lo = 0, hi = MAX_BGET_BINS - 1;
226 KMP_DEBUG_ASSERT(size > 0);
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
241static void set_thr_data(kmp_info_t *th) {
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(
sizeof(*data))
247 : th->th.th_local.bget_data);
249 memset(data,
'\0',
sizeof(*data));
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258#if !USE_CMP_XCHG_FOR_BGET
259#ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
267static thr_data_t *get_thr_data(kmp_info_t *th) {
270 data = (thr_data_t *)th->th.th_local.bget_data;
272 KMP_DEBUG_ASSERT(data != 0);
278static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
282#if USE_CMP_XCHG_FOR_BGET
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(
void *, old_value),
nullptr)) {
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
290 p = CCAST(
void *, old_value);
293#ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
299 p = (
void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
302#ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
312 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
319 p = (
void *)b->ql.flink;
327static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
328#ifdef USE_QUEUING_LOCK_FOR_BGET
333 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
341 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
342 __kmp_gtid_from_thread(th)));
344#if USE_CMP_XCHG_FOR_BGET
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
349 b->ql.flink = BFH(CCAST(
void *, old_value));
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(
void *, old_value), buf)) {
354 old_value = TCR_PTR(th->th.th_local.bget_list);
357 b->ql.flink = BFH(CCAST(
void *, old_value));
361#ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (
void *)buf;
370#ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
379static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
382 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
385 bin = bget_get_bin(b->bh.bb.bsize);
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
400static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
409static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
413 *total_free = *max_free = 0;
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
418 best = &thr->freelist[bin];
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
434 if (*max_free > (bufsize)
sizeof(bhead_t))
435 *max_free -=
sizeof(bhead_t);
439static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
449 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
453 __kmp_bget_dequeue(th);
455 if (size < (bufsize)SizeQ) {
458#if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
462 size +=
sizeof(bhead_t);
463 KMP_DEBUG_ASSERT(size >= 0);
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
466 use_blink = (thr->mode == bget_mode_lifo);
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
494 b = (use_blink ? b->ql.blink : b->ql.flink);
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
513 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((
char *)ba) + size);
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
519 b->bh.bb.bsize -= (bufsize)size;
522 ba->bb.prevfree = b->bh.bb.bsize;
525 ba->bb.bsize = -size;
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
537 thr->totalloc += (size_t)size;
540 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
546 ba = BH(((
char *)b) + b->bh.bb.bsize);
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
553 __kmp_bget_remove_from_freelist(b);
555 thr->totalloc += (size_t)b->bh.bb.bsize;
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
562 TCW_PTR(ba->bb.bthr, th);
568 buf = (
void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
575 b = (use_blink ? b->ql.blink : b->ql.flink);
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
597 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
599 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
606 bdh->bh.bb.bsize = 0;
609 TCW_PTR(bdh->bh.bb.bthr, th);
611 bdh->bh.bb.prevfree = 0;
614 thr->totalloc += (size_t)size;
618 buf = (
void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
628 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, newpool, thr->exp_incr);
651static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (
char *)bget(th, size);
658 b = BH(buf -
sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
663 bd = BDH(buf -
sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
666 rsize -=
sizeof(bhead_t);
669 KMP_DEBUG_ASSERT(rsize >= size);
671 (void)memset(buf, 0, (bufsize)rsize);
673 return ((
void *)buf);
681static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
686 nbuf = bget(th, size);
693 b = BH(((
char *)buf) -
sizeof(bhead_t));
694 osize = -b->bb.bsize;
699 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
702 osize -=
sizeof(bhead_t);
705 KMP_DEBUG_ASSERT(osize > 0);
707 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
708 (
size_t)((size < osize) ? size : osize));
715static void brel(kmp_info_t *th,
void *buf) {
716 thr_data_t *thr = get_thr_data(th);
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
723 b = BFH(((
char *)buf) -
sizeof(bhead_t));
725 if (b->bh.bb.bsize == 0) {
728 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
731 thr->totalloc -= (size_t)bdh->tsize;
736 (void)memset((
char *)buf, 0x55, (size_t)(bdh->tsize -
sizeof(bdhead_t)));
739 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((
void *)bdh);
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
750 __kmp_bget_enqueue(bth, buf
751#ifdef USE_QUEUING_LOCK_FOR_BGET
753 __kmp_gtid_from_thread(th)
760 if (b->bh.bb.bsize >= 0) {
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
767 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
771 thr->totalloc += (size_t)b->bh.bb.bsize;
776 if (b->bh.bb.prevfree != 0) {
781 bufsize size = b->bh.bb.bsize;
784 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
786 b = BFH(((
char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
790 __kmp_bget_remove_from_freelist(b);
795 b->bh.bb.bsize = -b->bh.bb.bsize;
799 __kmp_bget_insert_into_freelist(thr, b);
805 bn = BFH(((
char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
810 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
813 __kmp_bget_remove_from_freelist(bn);
815 b->bh.bb.bsize += bn->bh.bb.bsize;
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
828 bn = BFH(((
char *)b) + b->bh.bb.bsize);
831 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
832 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
858 __kmp_bget_remove_from_freelist(b);
860 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
869 if (thr->last_pool == b)
879static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
882 thr_data_t *thr = get_thr_data(th);
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
891static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
897 __kmp_bget_dequeue(th);
900 len &= ~((bufsize)(SizeQuant - 1));
902 if (thr->pool_len == 0) {
904 }
else if (len != thr->pool_len) {
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
916 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
921 b->bh.bb.prevfree = 0;
930 len -=
sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
938 __kmp_bget_insert_into_freelist(thr, b);
941 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
942 (
size_t)(len -
sizeof(bfhead_t)));
944 bn = BH(((
char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
949 bn->bb.bsize = ESent;
953static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(th);
956 thr_data_t *thr = get_thr_data(th);
959 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC
"\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
975 bufsize bs = b->bh.bb.bsize;
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
983 __kmp_printf_no_lock(
984 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
988 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
989 if ((bs >
sizeof(bfhead_t)) &&
991 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
993 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
1003 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
1006void __kmp_initialize_bget(kmp_info_t *th) {
1007 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
1011 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1012 (bufsize)__kmp_malloc_pool_incr);
1015void __kmp_finalize_bget(kmp_info_t *th) {
1019 KMP_DEBUG_ASSERT(th != 0);
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1039 __kmp_bget_remove_from_freelist(b);
1041 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1057void kmpc_set_poolsize(
size_t size) {
1058 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1059 (bget_release_t)free, (bufsize)size);
1062size_t kmpc_get_poolsize(
void) {
1065 p = get_thr_data(__kmp_get_thread());
1070void kmpc_set_poolmode(
int mode) {
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1080int kmpc_get_poolmode(
void) {
1083 p = get_thr_data(__kmp_get_thread());
1088void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1089 kmp_info_t *th = __kmp_get_thread();
1092 __kmp_bget_dequeue(th);
1100void kmpc_poolprint(
void) {
1101 kmp_info_t *th = __kmp_get_thread();
1103 __kmp_bget_dequeue(th);
1110void *kmpc_malloc(
size_t size) {
1112 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1115 *(
void **)ptr = ptr;
1116 ptr = (
void **)ptr + 1;
1121#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1123void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1127 if (!IS_POWER_OF_TWO(alignment)) {
1132 size = size +
sizeof(
void *) + alignment;
1133 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1134 if (ptr_allocated != NULL) {
1136 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1138 *((
void **)ptr - 1) = ptr_allocated;
1145void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1147 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1150 *(
void **)ptr = ptr;
1151 ptr = (
void **)ptr + 1;
1156void *kmpc_realloc(
void *ptr,
size_t size) {
1157 void *result = NULL;
1160 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1162 if (result != NULL) {
1163 *(
void **)result = result;
1164 result = (
void **)result + 1;
1166 }
else if (size == 0) {
1172 KMP_ASSERT(*((
void **)ptr - 1));
1173 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1175 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1176 (bufsize)(size +
sizeof(ptr)));
1177 if (result != NULL) {
1178 *(
void **)result = result;
1179 result = (
void **)result + 1;
1186void kmpc_free(
void *ptr) {
1187 if (!__kmp_init_serial) {
1191 kmp_info_t *th = __kmp_get_thread();
1192 __kmp_bget_dequeue(th);
1194 KMP_ASSERT(*((
void **)ptr - 1));
1195 brel(th, *((
void **)ptr - 1));
1199void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1201 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (
int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, (bufsize)size);
1204 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1208void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1211 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, (bufsize)(nelem * elsize));
1214 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1218void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (
int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, ptr, (bufsize)size);
1223 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1227void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1228 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1231 __kmp_bget_dequeue(th);
1234 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1238static const char *kmp_mk_lib_name;
1239static void *h_memkind;
1242static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1244static void (*kmp_mk_free)(
void *kind,
void *ptr);
1246static int (*kmp_mk_check)(
void *kind);
1248static void **mk_default;
1249static void **mk_interleave;
1250static void **mk_hbw;
1251static void **mk_hbw_interleave;
1252static void **mk_hbw_preferred;
1253static void **mk_hugetlb;
1254static void **mk_hbw_hugetlb;
1255static void **mk_hbw_preferred_hugetlb;
1256static void **mk_dax_kmem;
1257static void **mk_dax_kmem_all;
1258static void **mk_dax_kmem_preferred;
1259static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1260static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1261static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1262static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1263static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1264static void *(*kmp_target_free_host)(
void *ptr,
int device);
1265static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1266static void *(*kmp_target_free_device)(
void *ptr,
int device);
1267static bool __kmp_target_mem_available;
1269#define KMP_IS_TARGET_MEM_SPACE(MS) \
1270 (MS == llvm_omp_target_host_mem_space || \
1271 MS == llvm_omp_target_shared_mem_space || \
1272 MS == llvm_omp_target_device_mem_space)
1274#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1275 (MA == llvm_omp_target_host_mem_alloc || \
1276 MA == llvm_omp_target_shared_mem_alloc || \
1277 MA == llvm_omp_target_device_mem_alloc)
1279#define KMP_IS_PREDEF_MEM_SPACE(MS) \
1280 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1281 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1282 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1283 KMP_IS_TARGET_MEM_SPACE(MS))
1302 bool supported =
false;
1303 using get_mem_resources_t = int (*)(int,
const int *, int,
1304 omp_memspace_handle_t,
int *);
1305 using omp_alloc_t =
void *(*)(
size_t, omp_allocator_handle_t);
1306 using omp_free_t = void (*)(
void *, omp_allocator_handle_t);
1307 get_mem_resources_t tgt_get_mem_resources =
nullptr;
1308 omp_alloc_t tgt_omp_alloc =
nullptr;
1309 omp_free_t tgt_omp_free =
nullptr;
1314 tgt_get_mem_resources =
1315 (get_mem_resources_t)KMP_DLSYM(
"__tgt_get_mem_resources");
1316 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM(
"__tgt_omp_alloc");
1317 tgt_omp_free = (omp_free_t)KMP_DLSYM(
"__tgt_omp_free");
1318 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1323 omp_memspace_handle_t memspace,
int *resources) {
1325 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1329 void *
omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
1331 return tgt_omp_alloc(size, allocator);
1335 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
1337 tgt_omp_free(ptr, allocator);
1339} __kmp_tgt_allocator;
1341extern "C" int omp_get_num_devices(
void);
1351 omp_memspace_handle_t memspace) {
1354 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1355 !memcmp(ms->resources, resources,
sizeof(
int) * num_resources))
1364 omp_memspace_handle_t
get(
int num_resources,
const int *resources,
1365 omp_memspace_handle_t memspace) {
1366 int gtid = __kmp_entry_gtid();
1367 __kmp_acquire_lock(&mtx, gtid);
1369 int *sorted_resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1370 KMP_MEMCPY(sorted_resources, resources, num_resources *
sizeof(
int));
1371 qsort(sorted_resources, (
size_t)num_resources,
sizeof(
int),
1372 [](
const void *a,
const void *b) {
1373 const int val_a = *(
const int *)a;
1374 const int val_b = *(
const int *)b;
1375 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1379 __kmp_free(sorted_resources);
1380 __kmp_release_lock(&mtx, gtid);
1384 ms->memspace = memspace;
1385 ms->num_resources = num_resources;
1386 ms->resources = sorted_resources;
1387 ms->next = memspace_list;
1389 __kmp_release_lock(&mtx, gtid);
1395 void init() { __kmp_init_lock(&mtx); }
1401 __kmp_free(ms->resources);
1406 __kmp_destroy_lock(&mtx);
1411 omp_memspace_handle_t memspace) {
1412 int actual_num_devices = num_devices;
1413 int *actual_devices =
const_cast<int *
>(devices);
1414 if (actual_num_devices == 0) {
1415 actual_num_devices = omp_get_num_devices();
1416 if (actual_num_devices <= 0)
1417 return omp_null_mem_space;
1419 if (actual_devices == NULL) {
1421 actual_devices = (
int *)__kmp_allocate(
sizeof(
int) * actual_num_devices);
1422 for (
int i = 0; i < actual_num_devices; i++)
1423 actual_devices[i] = i;
1427 actual_num_devices, actual_devices, host_access, memspace, NULL);
1428 if (num_resources <= 0)
1429 return omp_null_mem_space;
1431 omp_memspace_handle_t ms = omp_null_mem_space;
1432 if (num_resources > 0) {
1433 int *resources = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1436 actual_num_devices, actual_devices, host_access, memspace, resources);
1437 ms =
get(num_resources, resources, memspace);
1438 __kmp_free(resources);
1440 if (!devices && actual_devices)
1441 __kmp_free(actual_devices);
1445 omp_memspace_handle_t
get_memspace(
int num_resources,
const int *resources,
1446 omp_memspace_handle_t parent) {
1448 return get(num_resources, resources, ms->memspace);
1450} __kmp_tgt_memspace_list;
1452#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1453static inline void chk_kind(
void ***pkind) {
1454 KMP_DEBUG_ASSERT(pkind);
1456 if (kmp_mk_check(**pkind))
1461void __kmp_init_memkind() {
1463#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1465 kmp_mk_lib_name =
"libmemkind.so";
1466 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1468 kmp_mk_check = (int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1470 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1471 kmp_mk_free = (void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1472 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1473 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1474 !kmp_mk_check(*mk_default)) {
1475 __kmp_memkind_available = 1;
1476 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1477 chk_kind(&mk_interleave);
1478 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1480 mk_hbw_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_INTERLEAVE");
1481 chk_kind(&mk_hbw_interleave);
1482 mk_hbw_preferred = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED");
1483 chk_kind(&mk_hbw_preferred);
1484 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1485 chk_kind(&mk_hugetlb);
1486 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1487 chk_kind(&mk_hbw_hugetlb);
1488 mk_hbw_preferred_hugetlb =
1489 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1490 chk_kind(&mk_hbw_preferred_hugetlb);
1491 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1492 chk_kind(&mk_dax_kmem);
1493 mk_dax_kmem_all = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_ALL");
1494 chk_kind(&mk_dax_kmem_all);
1495 mk_dax_kmem_preferred =
1496 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1497 chk_kind(&mk_dax_kmem_preferred);
1498 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1504 kmp_mk_lib_name =
"";
1507 kmp_mk_check = NULL;
1508 kmp_mk_alloc = NULL;
1511 mk_interleave = NULL;
1513 mk_hbw_interleave = NULL;
1514 mk_hbw_preferred = NULL;
1516 mk_hbw_hugetlb = NULL;
1517 mk_hbw_preferred_hugetlb = NULL;
1519 mk_dax_kmem_all = NULL;
1520 mk_dax_kmem_preferred = NULL;
1523void __kmp_fini_memkind() {
1524#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1525 if (__kmp_memkind_available)
1526 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1531 kmp_mk_check = NULL;
1532 kmp_mk_alloc = NULL;
1535 mk_interleave = NULL;
1537 mk_hbw_interleave = NULL;
1538 mk_hbw_preferred = NULL;
1540 mk_hbw_hugetlb = NULL;
1541 mk_hbw_preferred_hugetlb = NULL;
1543 mk_dax_kmem_all = NULL;
1544 mk_dax_kmem_preferred = NULL;
1549static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1550#if HWLOC_API_VERSION >= 0x00020300
1551 const hwloc_topology_support *support;
1552 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1554 if (policy == HWLOC_MEMBIND_BIND)
1555 return (support->membind->alloc_membind &&
1556 support->membind->bind_membind);
1557 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1558 return (support->membind->alloc_membind &&
1559 support->membind->interleave_membind);
1567void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr,
size_t size,
1568 hwloc_membind_policy_t policy) {
1569#if HWLOC_API_VERSION >= 0x00020300
1572 struct hwloc_location initiator;
1578 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1579 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1581 hwloc_bitmap_free(mask);
1584 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1585 initiator.location.cpuset = mask;
1586 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1591 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1592 HWLOC_MEMBIND_BYNODESET);
1598void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms,
size_t size,
1599 hwloc_membind_policy_t policy) {
1600#if HWLOC_API_VERSION >= 0x00020300
1602 if (ms == omp_high_bw_mem_space) {
1603 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1604 }
else if (ms == omp_large_cap_mem_space) {
1605 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1607 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1616void __kmp_init_target_mem() {
1617 *(
void **)(&kmp_target_alloc_host) = KMP_DLSYM(
"llvm_omp_target_alloc_host");
1618 *(
void **)(&kmp_target_alloc_shared) =
1619 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1620 *(
void **)(&kmp_target_alloc_device) =
1621 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1622 *(
void **)(&kmp_target_free_host) = KMP_DLSYM(
"llvm_omp_target_free_host");
1623 *(
void **)(&kmp_target_free_shared) =
1624 KMP_DLSYM(
"llvm_omp_target_free_shared");
1625 *(
void **)(&kmp_target_free_device) =
1626 KMP_DLSYM(
"llvm_omp_target_free_device");
1627 __kmp_target_mem_available =
1628 kmp_target_alloc_host && kmp_target_alloc_shared &&
1629 kmp_target_alloc_device && kmp_target_free_host &&
1630 kmp_target_free_shared && kmp_target_free_device;
1632 *(
void **)(&kmp_target_lock_mem) = KMP_DLSYM(
"llvm_omp_target_lock_mem");
1633 *(
void **)(&kmp_target_unlock_mem) = KMP_DLSYM(
"llvm_omp_target_unlock_mem");
1634 __kmp_tgt_allocator.
init();
1635 __kmp_tgt_memspace_list.
init();
1639void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.
fini(); }
1641omp_allocator_handle_t __kmpc_init_allocator(
int gtid, omp_memspace_handle_t ms,
1643 omp_alloctrait_t traits[]) {
1652 al->partition = omp_atv_environment;
1653 al->pin_device = -1;
1654 al->preferred_device = -1;
1655 al->target_access = omp_atv_single;
1656 al->atomic_scope = omp_atv_device;
1658 for (i = 0; i < ntraits; ++i) {
1659 switch (traits[i].key) {
1660 case omp_atk_sync_hint:
1661 case omp_atk_access:
1663 case omp_atk_pinned:
1666 case omp_atk_alignment:
1667 __kmp_type_convert(traits[i].value, &(al->alignment));
1668 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1670 case omp_atk_pool_size:
1671 al->pool_size = traits[i].value;
1673 case omp_atk_fallback:
1674 al->fb = (omp_alloctrait_value_t)traits[i].value;
1676 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1677 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1679 case omp_atk_fb_data:
1682 case omp_atk_partition:
1684 al->membind = (omp_alloctrait_value_t)traits[i].value;
1685 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1686 al->membind == omp_atv_nearest ||
1687 al->membind == omp_atv_blocked ||
1688 al->membind == omp_atv_interleaved);
1690 al->memkind = RCAST(
void **, traits[i].value);
1692 case omp_atk_pin_device:
1693 __kmp_type_convert(traits[i].value, &(al->pin_device));
1695 case omp_atk_preferred_device:
1696 __kmp_type_convert(traits[i].value, &(al->preferred_device));
1698 case omp_atk_target_access:
1699 al->target_access = (omp_alloctrait_value_t)traits[i].value;
1701 case omp_atk_atomic_scope:
1702 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1704 case omp_atk_part_size:
1705 __kmp_type_convert(traits[i].value, &(al->part_size));
1708 KMP_ASSERT2(0,
"Unexpected allocator trait");
1712 if (al->memspace > kmp_max_mem_space) {
1714 return (omp_allocator_handle_t)al;
1717 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1721 al->fb = omp_atv_default_mem_fb;
1723 }
else if (al->fb == omp_atv_allocator_fb) {
1724 KMP_ASSERT(al->fb_data != NULL);
1725 }
else if (al->fb == omp_atv_default_mem_fb) {
1728 if (__kmp_memkind_available) {
1730 if (ms == omp_high_bw_mem_space) {
1731 if (al->memkind == (
void *)omp_atv_interleaved && mk_hbw_interleave) {
1732 al->memkind = mk_hbw_interleave;
1733 }
else if (mk_hbw_preferred) {
1739 al->memkind = mk_hbw_preferred;
1743 return omp_null_allocator;
1745 }
else if (ms == omp_large_cap_mem_space) {
1746 if (mk_dax_kmem_all) {
1748 al->memkind = mk_dax_kmem_all;
1749 }
else if (mk_dax_kmem) {
1751 al->memkind = mk_dax_kmem;
1754 return omp_null_allocator;
1757 if (al->memkind == (
void *)omp_atv_interleaved && mk_interleave) {
1758 al->memkind = mk_interleave;
1760 al->memkind = mk_default;
1763 }
else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1765 return omp_null_allocator;
1767 if (!__kmp_hwloc_available &&
1768 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1771 return omp_null_allocator;
1774 return (omp_allocator_handle_t)al;
1777void __kmpc_destroy_allocator(
int gtid, omp_allocator_handle_t allocator) {
1778 if (allocator > kmp_max_mem_alloc)
1779 __kmp_free(allocator);
1782void __kmpc_set_default_allocator(
int gtid, omp_allocator_handle_t allocator) {
1783 if (allocator == omp_null_allocator)
1784 allocator = omp_default_mem_alloc;
1785 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1788omp_allocator_handle_t __kmpc_get_default_allocator(
int gtid) {
1789 return __kmp_threads[gtid]->th.th_def_allocator;
1792omp_memspace_handle_t __kmp_get_devices_memspace(
int ndevs,
const int *devs,
1793 omp_memspace_handle_t memspace,
1795 if (!__kmp_init_serial)
1796 __kmp_serial_initialize();
1798 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1799 return omp_null_mem_space;
1801 return __kmp_tgt_memspace_list.
get_memspace(ndevs, devs, host, memspace);
1804omp_allocator_handle_t
1805__kmp_get_devices_allocator(
int ndevs,
const int *devs,
1806 omp_memspace_handle_t memspace,
int host) {
1807 if (!__kmp_init_serial)
1808 __kmp_serial_initialize();
1810 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1811 return omp_null_allocator;
1813 omp_memspace_handle_t mspace =
1814 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1815 if (mspace == omp_null_mem_space)
1816 return omp_null_allocator;
1818 return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
1821int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1822 if (!__kmp_init_serial)
1823 __kmp_serial_initialize();
1824 if (memspace == omp_null_mem_space)
1826 if (memspace < kmp_max_mem_space)
1829 return ms->num_resources;
1832omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1833 int num_resources,
int *resources) {
1834 if (!__kmp_init_serial)
1835 __kmp_serial_initialize();
1836 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1839 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1840 return omp_null_mem_space;
1844 int *resources_abs = (
int *)__kmp_allocate(
sizeof(
int) * num_resources);
1847 for (
int i = 0; i < num_resources; i++)
1848 resources_abs[i] = ms->resources[resources[i]];
1850 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.
get_memspace(
1851 num_resources, resources_abs, memspace);
1852 __kmp_free(resources_abs);
1857typedef struct kmp_mem_desc {
1864constexpr size_t alignment = SizeQuant;
1867void *__kmpc_alloc(
int gtid,
size_t size, omp_allocator_handle_t allocator) {
1868 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1869 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1870 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1874void *__kmpc_aligned_alloc(
int gtid,
size_t algn,
size_t size,
1875 omp_allocator_handle_t allocator) {
1876 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1877 (
int)size, allocator));
1878 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1879 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1883void *__kmpc_calloc(
int gtid,
size_t nmemb,
size_t size,
1884 omp_allocator_handle_t allocator) {
1885 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1886 (
int)size, allocator));
1887 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1888 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1892void *__kmpc_realloc(
int gtid,
void *ptr,
size_t size,
1893 omp_allocator_handle_t allocator,
1894 omp_allocator_handle_t free_allocator) {
1895 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)size,
1896 allocator, free_allocator));
1897 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1898 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1902void __kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1903 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1904 ___kmpc_free(gtid, ptr, allocator);
1905 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1910void *__kmp_alloc(
int gtid,
size_t algn,
size_t size,
1911 omp_allocator_handle_t allocator) {
1914 KMP_DEBUG_ASSERT(__kmp_init_serial);
1917 if (allocator == omp_null_allocator)
1918 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1919 kmp_int32 default_device =
1920 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1924 int sz_desc =
sizeof(kmp_mem_desc_t);
1925 kmp_mem_desc_t desc;
1927 kmp_uintptr_t addr_align;
1928 kmp_uintptr_t addr_descr;
1929 size_t align = alignment;
1930 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1931 align = al->alignment;
1934 desc.size_orig = size;
1935 desc.size_a = size + sz_desc + align;
1936 bool is_pinned =
false;
1937 if (allocator > kmp_max_mem_alloc)
1938 is_pinned = al->pinned;
1941 int use_default_allocator =
1942 (!__kmp_hwloc_available && !__kmp_memkind_available);
1944 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1946 return __kmp_tgt_allocator.
omp_alloc(size, allocator);
1949 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1952 if (__kmp_target_mem_available) {
1954 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1955 if (allocator == llvm_omp_target_host_mem_alloc)
1956 ptr = kmp_target_alloc_host(size, device);
1957 else if (allocator == llvm_omp_target_shared_mem_alloc)
1958 ptr = kmp_target_alloc_shared(size, device);
1960 ptr = kmp_target_alloc_device(size, device);
1963 KMP_INFORM(TargetMemNotAvailable);
1967 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1968 if (__kmp_target_mem_available) {
1970 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1971 if (al->memspace == llvm_omp_target_host_mem_space)
1972 ptr = kmp_target_alloc_host(size, device);
1973 else if (al->memspace == llvm_omp_target_shared_mem_space)
1974 ptr = kmp_target_alloc_shared(size, device);
1976 ptr = kmp_target_alloc_device(size, device);
1979 KMP_INFORM(TargetMemNotAvailable);
1984 if (__kmp_hwloc_available) {
1985 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1986 if (allocator < kmp_max_mem_alloc) {
1988 if (allocator == omp_high_bw_mem_alloc) {
1989 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1990 desc.size_a, HWLOC_MEMBIND_BIND);
1992 use_default_allocator =
true;
1993 }
else if (allocator == omp_large_cap_mem_alloc) {
1994 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1995 desc.size_a, HWLOC_MEMBIND_BIND);
1997 use_default_allocator =
true;
1999 use_default_allocator =
true;
2001 if (use_default_allocator) {
2002 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2004 }
else if (al->pool_size > 0) {
2007 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2008 if (used + desc.size_a > al->pool_size) {
2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2011 if (al->fb == omp_atv_default_mem_fb) {
2013 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2014 }
else if (al->fb == omp_atv_abort_fb) {
2016 }
else if (al->fb == omp_atv_allocator_fb) {
2017 KMP_ASSERT(al != al->fb_data);
2019 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2023 if (al->membind == omp_atv_interleaved) {
2024 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2025 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2026 HWLOC_MEMBIND_INTERLEAVE);
2028 }
else if (al->membind == omp_atv_environment) {
2029 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2030 HWLOC_MEMBIND_DEFAULT);
2032 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2035 if (al->fb == omp_atv_default_mem_fb) {
2037 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038 }
else if (al->fb == omp_atv_abort_fb) {
2040 }
else if (al->fb == omp_atv_allocator_fb) {
2041 KMP_ASSERT(al != al->fb_data);
2043 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2049 if (al->membind == omp_atv_interleaved) {
2050 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2051 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2052 HWLOC_MEMBIND_INTERLEAVE);
2054 }
else if (al->membind == omp_atv_environment) {
2055 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2056 HWLOC_MEMBIND_DEFAULT);
2058 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2061 if (al->fb == omp_atv_default_mem_fb) {
2063 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064 }
else if (al->fb == omp_atv_abort_fb) {
2066 }
else if (al->fb == omp_atv_allocator_fb) {
2067 KMP_ASSERT(al != al->fb_data);
2069 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2074 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2078 if (__kmp_memkind_available) {
2079 if (allocator < kmp_max_mem_alloc) {
2081 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2082 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2083 }
else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2084 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2086 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2088 }
else if (al->pool_size > 0) {
2091 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2092 if (used + desc.size_a > al->pool_size) {
2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2095 if (al->fb == omp_atv_default_mem_fb) {
2097 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2098 }
else if (al->fb == omp_atv_abort_fb) {
2100 }
else if (al->fb == omp_atv_allocator_fb) {
2101 KMP_ASSERT(al != al->fb_data);
2103 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2104 if (is_pinned && kmp_target_lock_mem)
2105 kmp_target_lock_mem(ptr, size, default_device);
2110 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2112 if (al->fb == omp_atv_default_mem_fb) {
2114 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2115 }
else if (al->fb == omp_atv_abort_fb) {
2117 }
else if (al->fb == omp_atv_allocator_fb) {
2118 KMP_ASSERT(al != al->fb_data);
2120 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2121 if (is_pinned && kmp_target_lock_mem)
2122 kmp_target_lock_mem(ptr, size, default_device);
2129 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2131 if (al->fb == omp_atv_default_mem_fb) {
2133 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2134 }
else if (al->fb == omp_atv_abort_fb) {
2136 }
else if (al->fb == omp_atv_allocator_fb) {
2137 KMP_ASSERT(al != al->fb_data);
2139 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2140 if (is_pinned && kmp_target_lock_mem)
2141 kmp_target_lock_mem(ptr, size, default_device);
2146 }
else if (allocator < kmp_max_mem_alloc) {
2148 if (allocator == omp_high_bw_mem_alloc) {
2149 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
2150 }
else if (allocator == omp_large_cap_mem_alloc) {
2151 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
2152 }
else if (allocator == omp_const_mem_alloc) {
2153 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
2154 }
else if (allocator == omp_low_lat_mem_alloc) {
2155 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
2156 }
else if (allocator == omp_cgroup_mem_alloc) {
2157 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
2158 }
else if (allocator == omp_pteam_mem_alloc) {
2159 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
2160 }
else if (allocator == omp_thread_mem_alloc) {
2161 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
2163 use_default_allocator =
true;
2165 if (use_default_allocator) {
2166 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2167 use_default_allocator =
false;
2169 }
else if (al->pool_size > 0) {
2172 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2173 if (used + desc.size_a > al->pool_size) {
2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2176 if (al->fb == omp_atv_default_mem_fb) {
2178 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2179 }
else if (al->fb == omp_atv_abort_fb) {
2181 }
else if (al->fb == omp_atv_allocator_fb) {
2182 KMP_ASSERT(al != al->fb_data);
2184 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2185 if (is_pinned && kmp_target_lock_mem)
2186 kmp_target_lock_mem(ptr, size, default_device);
2191 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2192 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2199 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2200 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2207 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2211 if (is_pinned && kmp_target_lock_mem)
2212 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2214 addr = (kmp_uintptr_t)ptr;
2215 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2216 addr_descr = addr_align - sz_desc;
2218 desc.ptr_alloc = ptr;
2219 desc.ptr_align = (
void *)addr_align;
2220 desc.allocator = al;
2221 *((kmp_mem_desc_t *)addr_descr) = desc;
2224 return desc.ptr_align;
2227void *__kmp_calloc(
int gtid,
size_t algn,
size_t nmemb,
size_t size,
2228 omp_allocator_handle_t allocator) {
2231 KMP_DEBUG_ASSERT(__kmp_init_serial);
2233 if (allocator == omp_null_allocator)
2234 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2238 if (nmemb == 0 || size == 0)
2241 if ((SIZE_MAX -
sizeof(kmp_mem_desc_t)) / size < nmemb) {
2242 if (al->fb == omp_atv_abort_fb) {
2248 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
2251 memset(ptr, 0x00, nmemb * size);
2256void *__kmp_realloc(
int gtid,
void *ptr,
size_t size,
2257 omp_allocator_handle_t allocator,
2258 omp_allocator_handle_t free_allocator) {
2260 KMP_DEBUG_ASSERT(__kmp_init_serial);
2264 ___kmpc_free(gtid, ptr, free_allocator);
2268 nptr = __kmp_alloc(gtid, 0, size, allocator);
2270 if (nptr != NULL && ptr != NULL) {
2271 kmp_mem_desc_t desc;
2272 kmp_uintptr_t addr_align;
2273 kmp_uintptr_t addr_descr;
2275 addr_align = (kmp_uintptr_t)ptr;
2276 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2277 desc = *((kmp_mem_desc_t *)addr_descr);
2279 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2280 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2281 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2282 KMP_MEMCPY((
char *)nptr, (
char *)ptr,
2283 (
size_t)((size < desc.size_orig) ? size : desc.size_orig));
2287 ___kmpc_free(gtid, ptr, free_allocator);
2293void ___kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
2298 omp_allocator_handle_t oal;
2299 al = RCAST(
kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2300 kmp_mem_desc_t desc;
2301 kmp_uintptr_t addr_align;
2302 kmp_uintptr_t addr_descr;
2304 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2305 __kmp_tgt_allocator.
omp_free(ptr, allocator);
2309 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2310 (allocator > kmp_max_mem_alloc &&
2311 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2313 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2314 if (allocator == llvm_omp_target_host_mem_alloc) {
2315 kmp_target_free_host(ptr, device);
2316 }
else if (allocator == llvm_omp_target_shared_mem_alloc) {
2317 kmp_target_free_shared(ptr, device);
2318 }
else if (allocator == llvm_omp_target_device_mem_alloc) {
2319 kmp_target_free_device(ptr, device);
2324 addr_align = (kmp_uintptr_t)ptr;
2325 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
2326 desc = *((kmp_mem_desc_t *)addr_descr);
2328 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2330 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2332 al = desc.allocator;
2333 oal = (omp_allocator_handle_t)al;
2334 KMP_DEBUG_ASSERT(al);
2336 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2338 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2339 kmp_target_unlock_mem(desc.ptr_alloc, device);
2343 if (__kmp_hwloc_available) {
2344 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2346 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2348 KMP_DEBUG_ASSERT(used >= desc.size_a);
2350 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2353 if (__kmp_memkind_available) {
2354 if (oal < kmp_max_mem_alloc) {
2356 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2357 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2358 }
else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2359 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2361 kmp_mk_free(*mk_default, desc.ptr_alloc);
2364 if (al->pool_size > 0) {
2366 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2368 KMP_DEBUG_ASSERT(used >= desc.size_a);
2370 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2373 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2375 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2377 KMP_DEBUG_ASSERT(used >= desc.size_a);
2379 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2390struct kmp_mem_descr {
2391 void *ptr_allocated;
2392 size_t size_allocated;
2394 size_t size_aligned;
2396typedef struct kmp_mem_descr kmp_mem_descr_t;
2401static void *___kmp_allocate_align(
size_t size,
2402 size_t alignment KMP_SRC_LOC_DECL) {
2419 kmp_mem_descr_t descr;
2420 kmp_uintptr_t addr_allocated;
2421 kmp_uintptr_t addr_aligned;
2422 kmp_uintptr_t addr_descr;
2424 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2425 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
2427 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
2428 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
2431 descr.size_aligned = size;
2432 descr.size_allocated =
2433 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
2436 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2438 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2440 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
2441 descr.ptr_allocated));
2442 if (descr.ptr_allocated == NULL) {
2443 KMP_FATAL(OutOfHeapMemory);
2446 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2448 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2449 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
2451 descr.ptr_aligned = (
void *)addr_aligned;
2453 KE_TRACE(26, (
" ___kmp_allocate_align: "
2454 "ptr_allocated=%p, size_allocated=%d, "
2455 "ptr_aligned=%p, size_aligned=%d\n",
2456 descr.ptr_allocated, (
int)descr.size_allocated,
2457 descr.ptr_aligned, (
int)descr.size_aligned));
2459 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2460 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
2461 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2462 addr_allocated + descr.size_allocated);
2463 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2465 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2468 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
2474 *((kmp_mem_descr_t *)addr_descr) = descr;
2478 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2479 return descr.ptr_aligned;
2486void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
2488 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2489 (
int)size KMP_SRC_LOC_PARM));
2490 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2491 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2499void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
2500 int page_size = 8 * 1024;
2503 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2504 (
int)size KMP_SRC_LOC_PARM));
2505 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2506 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
2512void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
2513 kmp_mem_descr_t descr;
2515 kmp_uintptr_t addr_allocated;
2516 kmp_uintptr_t addr_aligned;
2519 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2520 KMP_ASSERT(ptr != NULL);
2522 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
2524 KE_TRACE(26, (
" __kmp_free: "
2525 "ptr_allocated=%p, size_allocated=%d, "
2526 "ptr_aligned=%p, size_aligned=%d\n",
2527 descr.ptr_allocated, (
int)descr.size_allocated,
2528 descr.ptr_aligned, (
int)descr.size_aligned));
2530 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2531 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2532 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2533 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2534 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
2535 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2536 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2537 addr_allocated + descr.size_allocated);
2538 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2543 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
2545 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2547 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2551 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2554#if USE_FAST_MEMORY == 3
2560#define KMP_FREE_LIST_LIMIT 16
2563#define DCACHE_LINE 128
2565void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
2567 size_t num_lines, idx;
2571 kmp_mem_descr_t *descr;
2573 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2574 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
2576 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2577 idx = num_lines - 1;
2578 KMP_DEBUG_ASSERT(idx >= 0);
2582 }
else if ((idx >>= 2) == 0) {
2585 }
else if ((idx >>= 2) == 0) {
2588 }
else if ((idx >>= 2) == 0) {
2595 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2598 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2599 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2600 sizeof(kmp_mem_descr_t)))
2604 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2609 while (!KMP_COMPARE_AND_STORE_PTR(
2610 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2612 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2616 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2617 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2618 sizeof(kmp_mem_descr_t)))
2625 size = num_lines * DCACHE_LINE;
2627 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2628 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2630 __kmp_gtid_from_thread(this_thr), alloc_size));
2631 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2634 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
2636 ~(DCACHE_LINE - 1));
2637 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2639 descr->ptr_allocated = alloc_ptr;
2641 descr->ptr_aligned = (
void *)this_thr;
2644 descr->size_aligned = size;
2647 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2648 __kmp_gtid_from_thread(this_thr), ptr));
2654void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
2655 kmp_mem_descr_t *descr;
2656 kmp_info_t *alloc_thr;
2661 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2662 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2663 KMP_ASSERT(ptr != NULL);
2665 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2667 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2668 (
int)descr->size_aligned));
2670 size = descr->size_aligned;
2672 idx = DCACHE_LINE * 2;
2675 }
else if ((idx <<= 1) == size) {
2677 }
else if ((idx <<= 2) == size) {
2679 }
else if ((idx <<= 2) == size) {
2682 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2686 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
2687 if (alloc_thr == this_thr) {
2689 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2690 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2692 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2695 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2696 *((
void **)ptr) = NULL;
2697 descr->size_allocated = (size_t)1;
2700 kmp_mem_descr_t *dsc =
2701 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
2703 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2705 dsc->size_allocated + 1;
2706 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2708 *((
void **)ptr) = head;
2709 descr->size_allocated = q_sz;
2710 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2717 void *next = *((
void **)head);
2718 while (next != NULL) {
2721 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
2724 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
2727 next = *((
void **)next);
2729 KMP_DEBUG_ASSERT(q_th != NULL);
2731 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2734 *((
void **)tail) = old_ptr;
2736 while (!KMP_COMPARE_AND_STORE_PTR(
2737 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2739 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2740 *((
void **)tail) = old_ptr;
2744 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2745 *((
void **)ptr) = NULL;
2746 descr->size_allocated = (size_t)1;
2753 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2754 __kmp_gtid_from_thread(this_thr), size));
2755 __kmp_bget_dequeue(this_thr);
2756 brel(this_thr, descr->ptr_allocated);
2759 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2765void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2766 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2768 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2773void __kmp_free_fast_memory(kmp_info_t *th) {
2776 thr_data_t *thr = get_thr_data(th);
2780 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2782 __kmp_bget_dequeue(th);
2785 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2786 bfhead_t *b = thr->freelist[bin].ql.flink;
2787 while (b != &thr->freelist[bin]) {
2788 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
2796 while (lst != NULL) {
2798 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2799 lst, next, th, __kmp_gtid_from_thread(th)));
2800 (*thr->relfcn)(lst);
2806 lst = (
void **)next;
2810 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
void init()
Initialize interface with offload runtime.
int get_mem_resources(int ndevs, const int *devs, int host, omp_memspace_handle_t memspace, int *resources)
void * omp_alloc(size_t size, omp_allocator_handle_t allocator)
Invoke offload runtime's memory allocation routine.
void omp_free(void *ptr, omp_allocator_handle_t allocator)
Invoke offload runtime's memory deallocation routine.
omp_memspace_handle_t get_memspace(int num_resources, const int *resources, omp_memspace_handle_t parent)
Return sub memory space from the parent memory space.
omp_memspace_handle_t get(int num_resources, const int *resources, omp_memspace_handle_t memspace)
void init()
Initialize memory space list.
omp_memspace_handle_t get_memspace(int num_devices, const int *devices, int host_access, omp_memspace_handle_t memspace)
Return memory space for the provided input.
void fini()
Release resources for the memory space list.
kmp_memspace_t * find(int num_resources, const int *resources, omp_memspace_handle_t memspace)
Find memory space that matches the provided input.
Memory allocator information is shared with offload runtime.
Memory space informaition is shared with offload runtime.