19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20 #include <xmmintrin.h>
21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23 #elif KMP_HAVE_ALIGNED_ALLOC
24 #define KMP_ALGIN_UP(val, alignment) \
25 (((val) + (alignment)-1) / (alignment) * (alignment))
26 #define KMP_ALIGNED_ALLOCATE(size, alignment) \
27 aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28 #define KMP_ALIGNED_FREE(ptr) free(ptr)
29 #elif KMP_HAVE_POSIX_MEMALIGN
30 static inline void *KMP_ALIGNED_ALLOCATE(
size_t size,
size_t alignment) {
32 int n = posix_memalign(&ptr, alignment, size);
40 #define KMP_ALIGNED_FREE(ptr) free(ptr)
41 #elif KMP_HAVE__ALIGNED_MALLOC
43 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
46 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
53 #ifndef KMP_FOURLINE_ALIGN_CACHE
54 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
57 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
59 class distributedBarrier {
61 kmp_uint32
volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
65 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
69 kmp_uint64
volatile KMP_FOURLINE_ALIGN_CACHE iter;
73 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
76 void init(
size_t nthr);
77 void resize(
size_t nthr);
78 void computeGo(
size_t n);
79 void computeVarsForN(
size_t n);
86 IDEAL_CONTENTION = 16,
89 flags_s *flags[MAX_ITERS];
94 size_t KMP_ALIGN_CACHE num_threads;
95 size_t KMP_ALIGN_CACHE max_threads;
97 size_t KMP_ALIGN_CACHE num_gos;
99 size_t KMP_ALIGN_CACHE num_groups;
101 size_t KMP_ALIGN_CACHE threads_per_go;
102 bool KMP_ALIGN_CACHE fix_threads_per_go;
104 size_t KMP_ALIGN_CACHE threads_per_group;
106 size_t KMP_ALIGN_CACHE gos_per_group;
109 distributedBarrier() =
delete;
110 ~distributedBarrier() =
delete;
113 static distributedBarrier *allocate(
int nThreads) {
114 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
115 sizeof(distributedBarrier), 4 * CACHE_LINE);
117 KMP_FATAL(MemoryAllocFailed);
121 for (
int i = 0; i < MAX_ITERS; ++i)
127 d->fix_threads_per_go =
false;
129 d->computeGo(nThreads);
133 static void deallocate(distributedBarrier *db);
135 void update_num_threads(
size_t nthr) { init(nthr); }
137 bool need_resize(
size_t new_nthr) {
return (new_nthr > max_threads); }
138 size_t get_num_threads() {
return num_threads; }
139 kmp_uint64 go_release();