LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 #include <limits>
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  long retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66  }
67  return error;
68  }
69  int set_system_affinity(bool abort_on_error) const override {
70  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71  "Illegal set affinity operation when not capable");
72  long retval =
73  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74  if (retval >= 0) {
75  return 0;
76  }
77  int error = errno;
78  if (abort_on_error) {
79  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80  }
81  return error;
82  }
83 #if KMP_OS_WINDOWS
84  int set_process_affinity(bool abort_on_error) const override {
85  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86  "Illegal set process affinity operation when not capable");
87  int error = 0;
88  const hwloc_topology_support *support =
89  hwloc_topology_get_support(__kmp_hwloc_topology);
90  if (support->cpubind->set_proc_cpubind) {
91  int retval;
92  retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93  HWLOC_CPUBIND_PROCESS);
94  if (retval >= 0)
95  return 0;
96  error = errno;
97  if (abort_on_error)
98  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99  }
100  return error;
101  }
102 #endif
103  int get_proc_group() const override {
104  int group = -1;
105 #if KMP_OS_WINDOWS
106  if (__kmp_num_proc_groups == 1) {
107  return 1;
108  }
109  for (int i = 0; i < __kmp_num_proc_groups; i++) {
110  // On windows, the long type is always 32 bits
111  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112  unsigned long second_32_bits =
113  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114  if (first_32_bits == 0 && second_32_bits == 0) {
115  continue;
116  }
117  if (group >= 0) {
118  return -1;
119  }
120  group = i;
121  }
122 #endif /* KMP_OS_WINDOWS */
123  return group;
124  }
125  };
126  void determine_capable(const char *var) override {
127  const hwloc_topology_support *topology_support;
128  if (__kmp_hwloc_topology == NULL) {
129  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130  __kmp_hwloc_error = TRUE;
131  if (__kmp_affinity.flags.verbose) {
132  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
133  }
134  }
135  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
136  __kmp_hwloc_error = TRUE;
137  if (__kmp_affinity.flags.verbose) {
138  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
139  }
140  }
141  }
142  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
143  // Is the system capable of setting/getting this thread's affinity?
144  // Also, is topology discovery possible? (pu indicates ability to discover
145  // processing units). And finally, were there no errors when calling any
146  // hwloc_* API functions?
147  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
148  topology_support->cpubind->get_thisthread_cpubind &&
149  topology_support->discovery->pu && !__kmp_hwloc_error) {
150  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
151  KMP_AFFINITY_ENABLE(TRUE);
152  } else {
153  // indicate that hwloc didn't work and disable affinity
154  __kmp_hwloc_error = TRUE;
155  KMP_AFFINITY_DISABLE();
156  }
157  }
158  void bind_thread(int which) override {
159  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
160  "Illegal set affinity operation when not capable");
161  KMPAffinity::Mask *mask;
162  KMP_CPU_ALLOC_ON_STACK(mask);
163  KMP_CPU_ZERO(mask);
164  KMP_CPU_SET(which, mask);
165  __kmp_set_system_affinity(mask, TRUE);
166  KMP_CPU_FREE_FROM_STACK(mask);
167  }
168  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
169  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
170  KMPAffinity::Mask *allocate_mask_array(int num) override {
171  return new Mask[num];
172  }
173  void deallocate_mask_array(KMPAffinity::Mask *array) override {
174  Mask *hwloc_array = static_cast<Mask *>(array);
175  delete[] hwloc_array;
176  }
177  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
178  int index) override {
179  Mask *hwloc_array = static_cast<Mask *>(array);
180  return &(hwloc_array[index]);
181  }
182  api_type get_api_type() const override { return HWLOC; }
183 };
184 #endif /* KMP_USE_HWLOC */
185 
186 #if KMP_OS_LINUX || KMP_OS_FREEBSD
187 #if KMP_OS_LINUX
188 /* On some of the older OS's that we build on, these constants aren't present
189  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
190  all systems of the same arch where they are defined, and they cannot change.
191  stone forever. */
192 #include <sys/syscall.h>
193 #if KMP_ARCH_X86 || KMP_ARCH_ARM
194 #ifndef __NR_sched_setaffinity
195 #define __NR_sched_setaffinity 241
196 #elif __NR_sched_setaffinity != 241
197 #error Wrong code for setaffinity system call.
198 #endif /* __NR_sched_setaffinity */
199 #ifndef __NR_sched_getaffinity
200 #define __NR_sched_getaffinity 242
201 #elif __NR_sched_getaffinity != 242
202 #error Wrong code for getaffinity system call.
203 #endif /* __NR_sched_getaffinity */
204 #elif KMP_ARCH_AARCH64
205 #ifndef __NR_sched_setaffinity
206 #define __NR_sched_setaffinity 122
207 #elif __NR_sched_setaffinity != 122
208 #error Wrong code for setaffinity system call.
209 #endif /* __NR_sched_setaffinity */
210 #ifndef __NR_sched_getaffinity
211 #define __NR_sched_getaffinity 123
212 #elif __NR_sched_getaffinity != 123
213 #error Wrong code for getaffinity system call.
214 #endif /* __NR_sched_getaffinity */
215 #elif KMP_ARCH_RISCV64
216 #ifndef __NR_sched_setaffinity
217 #define __NR_sched_setaffinity 122
218 #elif __NR_sched_setaffinity != 122
219 #error Wrong code for setaffinity system call.
220 #endif /* __NR_sched_setaffinity */
221 #ifndef __NR_sched_getaffinity
222 #define __NR_sched_getaffinity 123
223 #elif __NR_sched_getaffinity != 123
224 #error Wrong code for getaffinity system call.
225 #endif /* __NR_sched_getaffinity */
226 #elif KMP_ARCH_X86_64
227 #ifndef __NR_sched_setaffinity
228 #define __NR_sched_setaffinity 203
229 #elif __NR_sched_setaffinity != 203
230 #error Wrong code for setaffinity system call.
231 #endif /* __NR_sched_setaffinity */
232 #ifndef __NR_sched_getaffinity
233 #define __NR_sched_getaffinity 204
234 #elif __NR_sched_getaffinity != 204
235 #error Wrong code for getaffinity system call.
236 #endif /* __NR_sched_getaffinity */
237 #elif KMP_ARCH_PPC64
238 #ifndef __NR_sched_setaffinity
239 #define __NR_sched_setaffinity 222
240 #elif __NR_sched_setaffinity != 222
241 #error Wrong code for setaffinity system call.
242 #endif /* __NR_sched_setaffinity */
243 #ifndef __NR_sched_getaffinity
244 #define __NR_sched_getaffinity 223
245 #elif __NR_sched_getaffinity != 223
246 #error Wrong code for getaffinity system call.
247 #endif /* __NR_sched_getaffinity */
248 #elif KMP_ARCH_MIPS
249 #ifndef __NR_sched_setaffinity
250 #define __NR_sched_setaffinity 4239
251 #elif __NR_sched_setaffinity != 4239
252 #error Wrong code for setaffinity system call.
253 #endif /* __NR_sched_setaffinity */
254 #ifndef __NR_sched_getaffinity
255 #define __NR_sched_getaffinity 4240
256 #elif __NR_sched_getaffinity != 4240
257 #error Wrong code for getaffinity system call.
258 #endif /* __NR_sched_getaffinity */
259 #elif KMP_ARCH_MIPS64
260 #ifndef __NR_sched_setaffinity
261 #define __NR_sched_setaffinity 5195
262 #elif __NR_sched_setaffinity != 5195
263 #error Wrong code for setaffinity system call.
264 #endif /* __NR_sched_setaffinity */
265 #ifndef __NR_sched_getaffinity
266 #define __NR_sched_getaffinity 5196
267 #elif __NR_sched_getaffinity != 5196
268 #error Wrong code for getaffinity system call.
269 #endif /* __NR_sched_getaffinity */
270 #elif KMP_ARCH_LOONGARCH64
271 #ifndef __NR_sched_setaffinity
272 #define __NR_sched_setaffinity 122
273 #elif __NR_sched_setaffinity != 122
274 #error Wrong code for setaffinity system call.
275 #endif /* __NR_sched_setaffinity */
276 #ifndef __NR_sched_getaffinity
277 #define __NR_sched_getaffinity 123
278 #elif __NR_sched_getaffinity != 123
279 #error Wrong code for getaffinity system call.
280 #endif /* __NR_sched_getaffinity */
281 #elif KMP_ARCH_RISCV64
282 #ifndef __NR_sched_setaffinity
283 #define __NR_sched_setaffinity 122
284 #elif __NR_sched_setaffinity != 122
285 #error Wrong code for setaffinity system call.
286 #endif /* __NR_sched_setaffinity */
287 #ifndef __NR_sched_getaffinity
288 #define __NR_sched_getaffinity 123
289 #elif __NR_sched_getaffinity != 123
290 #error Wrong code for getaffinity system call.
291 #endif /* __NR_sched_getaffinity */
292 #else
293 #error Unknown or unsupported architecture
294 #endif /* KMP_ARCH_* */
295 #elif KMP_OS_FREEBSD
296 #include <pthread.h>
297 #include <pthread_np.h>
298 #endif
299 class KMPNativeAffinity : public KMPAffinity {
300  class Mask : public KMPAffinity::Mask {
301  typedef unsigned long mask_t;
302  typedef decltype(__kmp_affin_mask_size) mask_size_type;
303  static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
304  static const mask_t ONE = 1;
305  mask_size_type get_num_mask_types() const {
306  return __kmp_affin_mask_size / sizeof(mask_t);
307  }
308 
309  public:
310  mask_t *mask;
311  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
312  ~Mask() {
313  if (mask)
314  __kmp_free(mask);
315  }
316  void set(int i) override {
317  mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
318  }
319  bool is_set(int i) const override {
320  return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
321  }
322  void clear(int i) override {
323  mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
324  }
325  void zero() override {
326  mask_size_type e = get_num_mask_types();
327  for (mask_size_type i = 0; i < e; ++i)
328  mask[i] = (mask_t)0;
329  }
330  void copy(const KMPAffinity::Mask *src) override {
331  const Mask *convert = static_cast<const Mask *>(src);
332  mask_size_type e = get_num_mask_types();
333  for (mask_size_type i = 0; i < e; ++i)
334  mask[i] = convert->mask[i];
335  }
336  void bitwise_and(const KMPAffinity::Mask *rhs) override {
337  const Mask *convert = static_cast<const Mask *>(rhs);
338  mask_size_type e = get_num_mask_types();
339  for (mask_size_type i = 0; i < e; ++i)
340  mask[i] &= convert->mask[i];
341  }
342  void bitwise_or(const KMPAffinity::Mask *rhs) override {
343  const Mask *convert = static_cast<const Mask *>(rhs);
344  mask_size_type e = get_num_mask_types();
345  for (mask_size_type i = 0; i < e; ++i)
346  mask[i] |= convert->mask[i];
347  }
348  void bitwise_not() override {
349  mask_size_type e = get_num_mask_types();
350  for (mask_size_type i = 0; i < e; ++i)
351  mask[i] = ~(mask[i]);
352  }
353  int begin() const override {
354  int retval = 0;
355  while (retval < end() && !is_set(retval))
356  ++retval;
357  return retval;
358  }
359  int end() const override {
360  int e;
361  __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
362  return e;
363  }
364  int next(int previous) const override {
365  int retval = previous + 1;
366  while (retval < end() && !is_set(retval))
367  ++retval;
368  return retval;
369  }
370  int get_system_affinity(bool abort_on_error) override {
371  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
372  "Illegal get affinity operation when not capable");
373 #if KMP_OS_LINUX
374  long retval =
375  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
376 #elif KMP_OS_FREEBSD
377  int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
378  reinterpret_cast<cpuset_t *>(mask));
379  int retval = (r == 0 ? 0 : -1);
380 #endif
381  if (retval >= 0) {
382  return 0;
383  }
384  int error = errno;
385  if (abort_on_error) {
386  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
387  }
388  return error;
389  }
390  int set_system_affinity(bool abort_on_error) const override {
391  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
392  "Illegal set affinity operation when not capable");
393 #if KMP_OS_LINUX
394  long retval =
395  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
396 #elif KMP_OS_FREEBSD
397  int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
398  reinterpret_cast<cpuset_t *>(mask));
399  int retval = (r == 0 ? 0 : -1);
400 #endif
401  if (retval >= 0) {
402  return 0;
403  }
404  int error = errno;
405  if (abort_on_error) {
406  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
407  }
408  return error;
409  }
410  };
411  void determine_capable(const char *env_var) override {
412  __kmp_affinity_determine_capable(env_var);
413  }
414  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
415  KMPAffinity::Mask *allocate_mask() override {
416  KMPNativeAffinity::Mask *retval = new Mask();
417  return retval;
418  }
419  void deallocate_mask(KMPAffinity::Mask *m) override {
420  KMPNativeAffinity::Mask *native_mask =
421  static_cast<KMPNativeAffinity::Mask *>(m);
422  delete native_mask;
423  }
424  KMPAffinity::Mask *allocate_mask_array(int num) override {
425  return new Mask[num];
426  }
427  void deallocate_mask_array(KMPAffinity::Mask *array) override {
428  Mask *linux_array = static_cast<Mask *>(array);
429  delete[] linux_array;
430  }
431  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
432  int index) override {
433  Mask *linux_array = static_cast<Mask *>(array);
434  return &(linux_array[index]);
435  }
436  api_type get_api_type() const override { return NATIVE_OS; }
437 };
438 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
439 
440 #if KMP_OS_WINDOWS
441 class KMPNativeAffinity : public KMPAffinity {
442  class Mask : public KMPAffinity::Mask {
443  typedef ULONG_PTR mask_t;
444  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
445  mask_t *mask;
446 
447  public:
448  Mask() {
449  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
450  }
451  ~Mask() {
452  if (mask)
453  __kmp_free(mask);
454  }
455  void set(int i) override {
456  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
457  }
458  bool is_set(int i) const override {
459  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
460  }
461  void clear(int i) override {
462  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
463  }
464  void zero() override {
465  for (int i = 0; i < __kmp_num_proc_groups; ++i)
466  mask[i] = 0;
467  }
468  void copy(const KMPAffinity::Mask *src) override {
469  const Mask *convert = static_cast<const Mask *>(src);
470  for (int i = 0; i < __kmp_num_proc_groups; ++i)
471  mask[i] = convert->mask[i];
472  }
473  void bitwise_and(const KMPAffinity::Mask *rhs) override {
474  const Mask *convert = static_cast<const Mask *>(rhs);
475  for (int i = 0; i < __kmp_num_proc_groups; ++i)
476  mask[i] &= convert->mask[i];
477  }
478  void bitwise_or(const KMPAffinity::Mask *rhs) override {
479  const Mask *convert = static_cast<const Mask *>(rhs);
480  for (int i = 0; i < __kmp_num_proc_groups; ++i)
481  mask[i] |= convert->mask[i];
482  }
483  void bitwise_not() override {
484  for (int i = 0; i < __kmp_num_proc_groups; ++i)
485  mask[i] = ~(mask[i]);
486  }
487  int begin() const override {
488  int retval = 0;
489  while (retval < end() && !is_set(retval))
490  ++retval;
491  return retval;
492  }
493  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
494  int next(int previous) const override {
495  int retval = previous + 1;
496  while (retval < end() && !is_set(retval))
497  ++retval;
498  return retval;
499  }
500  int set_process_affinity(bool abort_on_error) const override {
501  if (__kmp_num_proc_groups <= 1) {
502  if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
503  DWORD error = GetLastError();
504  if (abort_on_error) {
505  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
506  __kmp_msg_null);
507  }
508  return error;
509  }
510  }
511  return 0;
512  }
513  int set_system_affinity(bool abort_on_error) const override {
514  if (__kmp_num_proc_groups > 1) {
515  // Check for a valid mask.
516  GROUP_AFFINITY ga;
517  int group = get_proc_group();
518  if (group < 0) {
519  if (abort_on_error) {
520  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
521  }
522  return -1;
523  }
524  // Transform the bit vector into a GROUP_AFFINITY struct
525  // and make the system call to set affinity.
526  ga.Group = group;
527  ga.Mask = mask[group];
528  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
529 
530  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
531  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
532  DWORD error = GetLastError();
533  if (abort_on_error) {
534  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
535  __kmp_msg_null);
536  }
537  return error;
538  }
539  } else {
540  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
541  DWORD error = GetLastError();
542  if (abort_on_error) {
543  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
544  __kmp_msg_null);
545  }
546  return error;
547  }
548  }
549  return 0;
550  }
551  int get_system_affinity(bool abort_on_error) override {
552  if (__kmp_num_proc_groups > 1) {
553  this->zero();
554  GROUP_AFFINITY ga;
555  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
556  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
557  DWORD error = GetLastError();
558  if (abort_on_error) {
559  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
560  KMP_ERR(error), __kmp_msg_null);
561  }
562  return error;
563  }
564  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
565  (ga.Mask == 0)) {
566  return -1;
567  }
568  mask[ga.Group] = ga.Mask;
569  } else {
570  mask_t newMask, sysMask, retval;
571  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
572  DWORD error = GetLastError();
573  if (abort_on_error) {
574  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
575  KMP_ERR(error), __kmp_msg_null);
576  }
577  return error;
578  }
579  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
580  if (!retval) {
581  DWORD error = GetLastError();
582  if (abort_on_error) {
583  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
584  KMP_ERR(error), __kmp_msg_null);
585  }
586  return error;
587  }
588  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
589  if (!newMask) {
590  DWORD error = GetLastError();
591  if (abort_on_error) {
592  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
593  KMP_ERR(error), __kmp_msg_null);
594  }
595  }
596  *mask = retval;
597  }
598  return 0;
599  }
600  int get_proc_group() const override {
601  int group = -1;
602  if (__kmp_num_proc_groups == 1) {
603  return 1;
604  }
605  for (int i = 0; i < __kmp_num_proc_groups; i++) {
606  if (mask[i] == 0)
607  continue;
608  if (group >= 0)
609  return -1;
610  group = i;
611  }
612  return group;
613  }
614  };
615  void determine_capable(const char *env_var) override {
616  __kmp_affinity_determine_capable(env_var);
617  }
618  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
619  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
620  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
621  KMPAffinity::Mask *allocate_mask_array(int num) override {
622  return new Mask[num];
623  }
624  void deallocate_mask_array(KMPAffinity::Mask *array) override {
625  Mask *windows_array = static_cast<Mask *>(array);
626  delete[] windows_array;
627  }
628  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
629  int index) override {
630  Mask *windows_array = static_cast<Mask *>(array);
631  return &(windows_array[index]);
632  }
633  api_type get_api_type() const override { return NATIVE_OS; }
634 };
635 #endif /* KMP_OS_WINDOWS */
636 #endif /* KMP_AFFINITY_SUPPORTED */
637 
638 // Describe an attribute for a level in the machine topology
639 struct kmp_hw_attr_t {
640  int core_type : 8;
641  int core_eff : 8;
642  unsigned valid : 1;
643  unsigned reserved : 15;
644 
645  static const int UNKNOWN_CORE_EFF = -1;
646 
647  kmp_hw_attr_t()
648  : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
649  valid(0), reserved(0) {}
650  void set_core_type(kmp_hw_core_type_t type) {
651  valid = 1;
652  core_type = type;
653  }
654  void set_core_eff(int eff) {
655  valid = 1;
656  core_eff = eff;
657  }
658  kmp_hw_core_type_t get_core_type() const {
659  return (kmp_hw_core_type_t)core_type;
660  }
661  int get_core_eff() const { return core_eff; }
662  bool is_core_type_valid() const {
663  return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
664  }
665  bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
666  operator bool() const { return valid; }
667  void clear() {
668  core_type = KMP_HW_CORE_TYPE_UNKNOWN;
669  core_eff = UNKNOWN_CORE_EFF;
670  valid = 0;
671  }
672  bool contains(const kmp_hw_attr_t &other) const {
673  if (!valid && !other.valid)
674  return true;
675  if (valid && other.valid) {
676  if (other.is_core_type_valid()) {
677  if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
678  return false;
679  }
680  if (other.is_core_eff_valid()) {
681  if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
682  return false;
683  }
684  return true;
685  }
686  return false;
687  }
688  bool operator==(const kmp_hw_attr_t &rhs) const {
689  return (rhs.valid == valid && rhs.core_eff == core_eff &&
690  rhs.core_type == core_type);
691  }
692  bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
693 };
694 
695 #if KMP_AFFINITY_SUPPORTED
696 KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
697 #endif
698 
699 class kmp_hw_thread_t {
700 public:
701  static const int UNKNOWN_ID = -1;
702  static const int MULTIPLE_ID = -2;
703  static int compare_ids(const void *a, const void *b);
704  static int compare_compact(const void *a, const void *b);
705  int ids[KMP_HW_LAST];
706  int sub_ids[KMP_HW_LAST];
707  bool leader;
708  int os_id;
709  kmp_hw_attr_t attrs;
710 
711  void print() const;
712  void clear() {
713  for (int i = 0; i < (int)KMP_HW_LAST; ++i)
714  ids[i] = UNKNOWN_ID;
715  leader = false;
716  attrs.clear();
717  }
718 };
719 
720 class kmp_topology_t {
721 
722  struct flags_t {
723  int uniform : 1;
724  int reserved : 31;
725  };
726 
727  int depth;
728 
729  // The following arrays are all 'depth' long and have been
730  // allocated to hold up to KMP_HW_LAST number of objects if
731  // needed so layers can be added without reallocation of any array
732 
733  // Orderd array of the types in the topology
734  kmp_hw_t *types;
735 
736  // Keep quick topology ratios, for non-uniform topologies,
737  // this ratio holds the max number of itemAs per itemB
738  // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
739  int *ratio;
740 
741  // Storage containing the absolute number of each topology layer
742  int *count;
743 
744  // The number of core efficiencies. This is only useful for hybrid
745  // topologies. Core efficiencies will range from 0 to num efficiencies - 1
746  int num_core_efficiencies;
747  int num_core_types;
748  kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
749 
750  // The hardware threads array
751  // hw_threads is num_hw_threads long
752  // Each hw_thread's ids and sub_ids are depth deep
753  int num_hw_threads;
754  kmp_hw_thread_t *hw_threads;
755 
756  // Equivalence hash where the key is the hardware topology item
757  // and the value is the equivalent hardware topology type in the
758  // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
759  // known equivalence for the topology type
760  kmp_hw_t equivalent[KMP_HW_LAST];
761 
762  // Flags describing the topology
763  flags_t flags;
764 
765  // Compact value used during sort_compact()
766  int compact;
767 
768  // Insert a new topology layer after allocation
769  void _insert_layer(kmp_hw_t type, const int *ids);
770 
771 #if KMP_GROUP_AFFINITY
772  // Insert topology information about Windows Processor groups
773  void _insert_windows_proc_groups();
774 #endif
775 
776  // Count each item & get the num x's per y
777  // e.g., get the number of cores and the number of threads per core
778  // for each (x, y) in (KMP_HW_* , KMP_HW_*)
779  void _gather_enumeration_information();
780 
781  // Remove layers that don't add information to the topology.
782  // This is done by having the layer take on the id = UNKNOWN_ID (-1)
783  void _remove_radix1_layers();
784 
785  // Find out if the topology is uniform
786  void _discover_uniformity();
787 
788  // Set all the sub_ids for each hardware thread
789  void _set_sub_ids();
790 
791  // Set global affinity variables describing the number of threads per
792  // core, the number of packages, the number of cores per package, and
793  // the number of cores.
794  void _set_globals();
795 
796  // Set the last level cache equivalent type
797  void _set_last_level_cache();
798 
799  // Return the number of cores with a particular attribute, 'attr'.
800  // If 'find_all' is true, then find all cores on the machine, otherwise find
801  // all cores per the layer 'above'
802  int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
803  bool find_all = false) const;
804 
805 public:
806  // Force use of allocate()/deallocate()
807  kmp_topology_t() = delete;
808  kmp_topology_t(const kmp_topology_t &t) = delete;
809  kmp_topology_t(kmp_topology_t &&t) = delete;
810  kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
811  kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
812 
813  static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
814  static void deallocate(kmp_topology_t *);
815 
816  // Functions used in create_map() routines
817  kmp_hw_thread_t &at(int index) {
818  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
819  return hw_threads[index];
820  }
821  const kmp_hw_thread_t &at(int index) const {
822  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
823  return hw_threads[index];
824  }
825  int get_num_hw_threads() const { return num_hw_threads; }
826  void sort_ids() {
827  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
828  kmp_hw_thread_t::compare_ids);
829  }
830  // Check if the hardware ids are unique, if they are
831  // return true, otherwise return false
832  bool check_ids() const;
833 
834  // Function to call after the create_map() routine
835  void canonicalize();
836  void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
837 
838 // Functions used after canonicalize() called
839 
840 #if KMP_AFFINITY_SUPPORTED
841  // Set the granularity for affinity settings
842  void set_granularity(kmp_affinity_t &stgs) const;
843 #endif
844  bool filter_hw_subset();
845  bool is_close(int hwt1, int hwt2, int level) const;
846  bool is_uniform() const { return flags.uniform; }
847  // Tell whether a type is a valid type in the topology
848  // returns KMP_HW_UNKNOWN when there is no equivalent type
849  kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
850  // Set type1 = type2
851  void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
852  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
853  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
854  kmp_hw_t real_type2 = equivalent[type2];
855  if (real_type2 == KMP_HW_UNKNOWN)
856  real_type2 = type2;
857  equivalent[type1] = real_type2;
858  // This loop is required since any of the types may have been set to
859  // be equivalent to type1. They all must be checked and reset to type2.
860  KMP_FOREACH_HW_TYPE(type) {
861  if (equivalent[type] == type1) {
862  equivalent[type] = real_type2;
863  }
864  }
865  }
866  // Calculate number of types corresponding to level1
867  // per types corresponding to level2 (e.g., number of threads per core)
868  int calculate_ratio(int level1, int level2) const {
869  KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
870  KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
871  int r = 1;
872  for (int level = level1; level > level2; --level)
873  r *= ratio[level];
874  return r;
875  }
876  int get_ratio(int level) const {
877  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
878  return ratio[level];
879  }
880  int get_depth() const { return depth; };
881  kmp_hw_t get_type(int level) const {
882  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
883  return types[level];
884  }
885  int get_level(kmp_hw_t type) const {
886  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
887  int eq_type = equivalent[type];
888  if (eq_type == KMP_HW_UNKNOWN)
889  return -1;
890  for (int i = 0; i < depth; ++i)
891  if (types[i] == eq_type)
892  return i;
893  return -1;
894  }
895  int get_count(int level) const {
896  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
897  return count[level];
898  }
899  // Return the total number of cores with attribute 'attr'
900  int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
901  return _get_ncores_with_attr(attr, -1, true);
902  }
903  // Return the number of cores with attribute
904  // 'attr' per topology level 'above'
905  int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
906  return _get_ncores_with_attr(attr, above, false);
907  }
908 
909 #if KMP_AFFINITY_SUPPORTED
910  friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
911  void sort_compact(kmp_affinity_t &affinity) {
912  compact = affinity.compact;
913  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
914  kmp_hw_thread_t::compare_compact);
915  }
916 #endif
917  void print(const char *env_var = "KMP_AFFINITY") const;
918  void dump() const;
919 };
920 extern kmp_topology_t *__kmp_topology;
921 
922 class kmp_hw_subset_t {
923  const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
924 
925 public:
926  // Describe a machine topology item in KMP_HW_SUBSET
927  struct item_t {
928  kmp_hw_t type;
929  int num_attrs;
930  int num[MAX_ATTRS];
931  int offset[MAX_ATTRS];
932  kmp_hw_attr_t attr[MAX_ATTRS];
933  };
934  // Put parenthesis around max to avoid accidental use of Windows max macro.
935  const static int USE_ALL = (std::numeric_limits<int>::max)();
936 
937 private:
938  int depth;
939  int capacity;
940  item_t *items;
941  kmp_uint64 set;
942  bool absolute;
943  // The set must be able to handle up to KMP_HW_LAST number of layers
944  KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
945  // Sorting the KMP_HW_SUBSET items to follow topology order
946  // All unknown topology types will be at the beginning of the subset
947  static int hw_subset_compare(const void *i1, const void *i2) {
948  kmp_hw_t type1 = ((const item_t *)i1)->type;
949  kmp_hw_t type2 = ((const item_t *)i2)->type;
950  int level1 = __kmp_topology->get_level(type1);
951  int level2 = __kmp_topology->get_level(type2);
952  return level1 - level2;
953  }
954 
955 public:
956  // Force use of allocate()/deallocate()
957  kmp_hw_subset_t() = delete;
958  kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
959  kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
960  kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
961  kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
962 
963  static kmp_hw_subset_t *allocate() {
964  int initial_capacity = 5;
965  kmp_hw_subset_t *retval =
966  (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
967  retval->depth = 0;
968  retval->capacity = initial_capacity;
969  retval->set = 0ull;
970  retval->absolute = false;
971  retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
972  return retval;
973  }
974  static void deallocate(kmp_hw_subset_t *subset) {
975  __kmp_free(subset->items);
976  __kmp_free(subset);
977  }
978  void set_absolute() { absolute = true; }
979  bool is_absolute() const { return absolute; }
980  void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
981  for (int i = 0; i < depth; ++i) {
982  // Found an existing item for this layer type
983  // Add the num, offset, and attr to this item
984  if (items[i].type == type) {
985  int idx = items[i].num_attrs++;
986  if ((size_t)idx >= MAX_ATTRS)
987  return;
988  items[i].num[idx] = num;
989  items[i].offset[idx] = offset;
990  items[i].attr[idx] = attr;
991  return;
992  }
993  }
994  if (depth == capacity - 1) {
995  capacity *= 2;
996  item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
997  for (int i = 0; i < depth; ++i)
998  new_items[i] = items[i];
999  __kmp_free(items);
1000  items = new_items;
1001  }
1002  items[depth].num_attrs = 1;
1003  items[depth].type = type;
1004  items[depth].num[0] = num;
1005  items[depth].offset[0] = offset;
1006  items[depth].attr[0] = attr;
1007  depth++;
1008  set |= (1ull << type);
1009  }
1010  int get_depth() const { return depth; }
1011  const item_t &at(int index) const {
1012  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1013  return items[index];
1014  }
1015  item_t &at(int index) {
1016  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1017  return items[index];
1018  }
1019  void remove(int index) {
1020  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1021  set &= ~(1ull << items[index].type);
1022  for (int j = index + 1; j < depth; ++j) {
1023  items[j - 1] = items[j];
1024  }
1025  depth--;
1026  }
1027  void sort() {
1028  KMP_DEBUG_ASSERT(__kmp_topology);
1029  qsort(items, depth, sizeof(item_t), hw_subset_compare);
1030  }
1031  bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1032  void dump() const {
1033  printf("**********************\n");
1034  printf("*** kmp_hw_subset: ***\n");
1035  printf("* depth: %d\n", depth);
1036  printf("* items:\n");
1037  for (int i = 0; i < depth; ++i) {
1038  printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1039  for (int j = 0; j < items[i].num_attrs; ++j) {
1040  printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1041  items[i].offset[j]);
1042  if (!items[i].attr[j]) {
1043  printf(" (none)\n");
1044  } else {
1045  printf(
1046  " core_type = %s, core_eff = %d\n",
1047  __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1048  items[i].attr[j].get_core_eff());
1049  }
1050  }
1051  }
1052  printf("* set: 0x%llx\n", set);
1053  printf("* absolute: %d\n", absolute);
1054  printf("**********************\n");
1055  }
1056 };
1057 extern kmp_hw_subset_t *__kmp_hw_subset;
1058 
1059 /* A structure for holding machine-specific hierarchy info to be computed once
1060  at init. This structure represents a mapping of threads to the actual machine
1061  hierarchy, or to our best guess at what the hierarchy might be, for the
1062  purpose of performing an efficient barrier. In the worst case, when there is
1063  no machine hierarchy information, it produces a tree suitable for a barrier,
1064  similar to the tree used in the hyper barrier. */
1065 class hierarchy_info {
1066 public:
1067  /* Good default values for number of leaves and branching factor, given no
1068  affinity information. Behaves a bit like hyper barrier. */
1069  static const kmp_uint32 maxLeaves = 4;
1070  static const kmp_uint32 minBranch = 4;
1076  kmp_uint32 maxLevels;
1077 
1082  kmp_uint32 depth;
1083  kmp_uint32 base_num_threads;
1084  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1085  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1086  // 2=initialization in progress
1087  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1088 
1093  kmp_uint32 *numPerLevel;
1094  kmp_uint32 *skipPerLevel;
1095 
1096  void deriveLevels() {
1097  int hier_depth = __kmp_topology->get_depth();
1098  for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1099  numPerLevel[level] = __kmp_topology->get_ratio(i);
1100  }
1101  }
1102 
1103  hierarchy_info()
1104  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1105 
1106  void fini() {
1107  if (!uninitialized && numPerLevel) {
1108  __kmp_free(numPerLevel);
1109  numPerLevel = NULL;
1110  uninitialized = not_initialized;
1111  }
1112  }
1113 
1114  void init(int num_addrs) {
1115  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1116  &uninitialized, not_initialized, initializing);
1117  if (bool_result == 0) { // Wait for initialization
1118  while (TCR_1(uninitialized) != initialized)
1119  KMP_CPU_PAUSE();
1120  return;
1121  }
1122  KMP_DEBUG_ASSERT(bool_result == 1);
1123 
1124  /* Added explicit initialization of the data fields here to prevent usage of
1125  dirty value observed when static library is re-initialized multiple times
1126  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1127  OpenMP). */
1128  depth = 1;
1129  resizing = 0;
1130  maxLevels = 7;
1131  numPerLevel =
1132  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1133  skipPerLevel = &(numPerLevel[maxLevels]);
1134  for (kmp_uint32 i = 0; i < maxLevels;
1135  ++i) { // init numPerLevel[*] to 1 item per level
1136  numPerLevel[i] = 1;
1137  skipPerLevel[i] = 1;
1138  }
1139 
1140  // Sort table by physical ID
1141  if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1142  deriveLevels();
1143  } else {
1144  numPerLevel[0] = maxLeaves;
1145  numPerLevel[1] = num_addrs / maxLeaves;
1146  if (num_addrs % maxLeaves)
1147  numPerLevel[1]++;
1148  }
1149 
1150  base_num_threads = num_addrs;
1151  for (int i = maxLevels - 1; i >= 0;
1152  --i) // count non-empty levels to get depth
1153  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1154  depth++;
1155 
1156  kmp_uint32 branch = minBranch;
1157  if (numPerLevel[0] == 1)
1158  branch = num_addrs / maxLeaves;
1159  if (branch < minBranch)
1160  branch = minBranch;
1161  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1162  while (numPerLevel[d] > branch ||
1163  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1164  if (numPerLevel[d] & 1)
1165  numPerLevel[d]++;
1166  numPerLevel[d] = numPerLevel[d] >> 1;
1167  if (numPerLevel[d + 1] == 1)
1168  depth++;
1169  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1170  }
1171  if (numPerLevel[0] == 1) {
1172  branch = branch >> 1;
1173  if (branch < 4)
1174  branch = minBranch;
1175  }
1176  }
1177 
1178  for (kmp_uint32 i = 1; i < depth; ++i)
1179  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1180  // Fill in hierarchy in the case of oversubscription
1181  for (kmp_uint32 i = depth; i < maxLevels; ++i)
1182  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1183 
1184  uninitialized = initialized; // One writer
1185  }
1186 
1187  // Resize the hierarchy if nproc changes to something larger than before
1188  void resize(kmp_uint32 nproc) {
1189  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1190  while (bool_result == 0) { // someone else is trying to resize
1191  KMP_CPU_PAUSE();
1192  if (nproc <= base_num_threads) // happy with other thread's resize
1193  return;
1194  else // try to resize
1195  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1196  }
1197  KMP_DEBUG_ASSERT(bool_result != 0);
1198  if (nproc <= base_num_threads)
1199  return; // happy with other thread's resize
1200 
1201  // Calculate new maxLevels
1202  kmp_uint32 old_sz = skipPerLevel[depth - 1];
1203  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1204  // First see if old maxLevels is enough to contain new size
1205  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1206  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1207  numPerLevel[i - 1] *= 2;
1208  old_sz *= 2;
1209  depth++;
1210  }
1211  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1212  while (nproc > old_sz) {
1213  old_sz *= 2;
1214  incs++;
1215  depth++;
1216  }
1217  maxLevels += incs;
1218 
1219  // Resize arrays
1220  kmp_uint32 *old_numPerLevel = numPerLevel;
1221  kmp_uint32 *old_skipPerLevel = skipPerLevel;
1222  numPerLevel = skipPerLevel = NULL;
1223  numPerLevel =
1224  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1225  skipPerLevel = &(numPerLevel[maxLevels]);
1226 
1227  // Copy old elements from old arrays
1228  for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1229  // init numPerLevel[*] to 1 item per level
1230  numPerLevel[i] = old_numPerLevel[i];
1231  skipPerLevel[i] = old_skipPerLevel[i];
1232  }
1233 
1234  // Init new elements in arrays to 1
1235  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1236  // init numPerLevel[*] to 1 item per level
1237  numPerLevel[i] = 1;
1238  skipPerLevel[i] = 1;
1239  }
1240 
1241  // Free old arrays
1242  __kmp_free(old_numPerLevel);
1243  }
1244 
1245  // Fill in oversubscription levels of hierarchy
1246  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1247  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1248 
1249  base_num_threads = nproc;
1250  resizing = 0; // One writer
1251  }
1252 };
1253 #endif // KMP_AFFINITY_H