LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_affinity.h
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18
19#if KMP_AFFINITY_SUPPORTED
20#if KMP_USE_HWLOC
21class KMPHwlocAffinity : public KMPAffinity {
22public:
23 class Mask : public KMPAffinity::Mask {
24 hwloc_cpuset_t mask;
25
26 public:
27 Mask() {
28 mask = hwloc_bitmap_alloc();
29 this->zero();
30 }
31 ~Mask() { hwloc_bitmap_free(mask); }
32 void set(int i) override { hwloc_bitmap_set(mask, i); }
33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35 void zero() override { hwloc_bitmap_zero(mask); }
36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
39 }
40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
43 }
44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
47 }
48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49 int begin() const override { return hwloc_bitmap_first(mask); }
50 int end() const override { return -1; }
51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
53 }
54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
57 int retval =
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59 if (retval >= 0) {
60 return 0;
61 }
62 int error = errno;
63 if (abort_on_error) {
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65 }
66 return error;
67 }
68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
71 int retval =
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73 if (retval >= 0) {
74 return 0;
75 }
76 int error = errno;
77 if (abort_on_error) {
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79 }
80 return error;
81 }
82 int get_proc_group() const override {
83 int group = -1;
84#if KMP_OS_WINDOWS
85 if (__kmp_num_proc_groups == 1) {
86 return 1;
87 }
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
94 continue;
95 }
96 if (group >= 0) {
97 return -1;
98 }
99 group = i;
100 }
101#endif /* KMP_OS_WINDOWS */
102 return group;
103 }
104 };
105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112 }
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117 }
118 }
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
129 } else {
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
133 }
134 }
135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
140 KMP_CPU_ZERO(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
144 }
145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
149 }
150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
153 }
154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
158 }
159 api_type get_api_type() const override { return HWLOC; }
160};
161#endif /* KMP_USE_HWLOC */
162
163#if KMP_OS_LINUX || KMP_OS_FREEBSD
164#if KMP_OS_LINUX
165/* On some of the older OS's that we build on, these constants aren't present
166 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167 all systems of the same arch where they are defined, and they cannot change.
168 stone forever. */
169#include <sys/syscall.h>
170#if KMP_ARCH_X86 || KMP_ARCH_ARM
171#ifndef __NR_sched_setaffinity
172#define __NR_sched_setaffinity 241
173#elif __NR_sched_setaffinity != 241
174#error Wrong code for setaffinity system call.
175#endif /* __NR_sched_setaffinity */
176#ifndef __NR_sched_getaffinity
177#define __NR_sched_getaffinity 242
178#elif __NR_sched_getaffinity != 242
179#error Wrong code for getaffinity system call.
180#endif /* __NR_sched_getaffinity */
181#elif KMP_ARCH_AARCH64
182#ifndef __NR_sched_setaffinity
183#define __NR_sched_setaffinity 122
184#elif __NR_sched_setaffinity != 122
185#error Wrong code for setaffinity system call.
186#endif /* __NR_sched_setaffinity */
187#ifndef __NR_sched_getaffinity
188#define __NR_sched_getaffinity 123
189#elif __NR_sched_getaffinity != 123
190#error Wrong code for getaffinity system call.
191#endif /* __NR_sched_getaffinity */
192#elif KMP_ARCH_X86_64
193#ifndef __NR_sched_setaffinity
194#define __NR_sched_setaffinity 203
195#elif __NR_sched_setaffinity != 203
196#error Wrong code for setaffinity system call.
197#endif /* __NR_sched_setaffinity */
198#ifndef __NR_sched_getaffinity
199#define __NR_sched_getaffinity 204
200#elif __NR_sched_getaffinity != 204
201#error Wrong code for getaffinity system call.
202#endif /* __NR_sched_getaffinity */
203#elif KMP_ARCH_PPC64
204#ifndef __NR_sched_setaffinity
205#define __NR_sched_setaffinity 222
206#elif __NR_sched_setaffinity != 222
207#error Wrong code for setaffinity system call.
208#endif /* __NR_sched_setaffinity */
209#ifndef __NR_sched_getaffinity
210#define __NR_sched_getaffinity 223
211#elif __NR_sched_getaffinity != 223
212#error Wrong code for getaffinity system call.
213#endif /* __NR_sched_getaffinity */
214# elif KMP_ARCH_MIPS
215# ifndef __NR_sched_setaffinity
216# define __NR_sched_setaffinity 4239
217# elif __NR_sched_setaffinity != 4239
218# error Wrong code for setaffinity system call.
219# endif /* __NR_sched_setaffinity */
220# ifndef __NR_sched_getaffinity
221# define __NR_sched_getaffinity 4240
222# elif __NR_sched_getaffinity != 4240
223# error Wrong code for getaffinity system call.
224# endif /* __NR_sched_getaffinity */
225# elif KMP_ARCH_MIPS64
226# ifndef __NR_sched_setaffinity
227# define __NR_sched_setaffinity 5195
228# elif __NR_sched_setaffinity != 5195
229# error Wrong code for setaffinity system call.
230# endif /* __NR_sched_setaffinity */
231# ifndef __NR_sched_getaffinity
232# define __NR_sched_getaffinity 5196
233# elif __NR_sched_getaffinity != 5196
234# error Wrong code for getaffinity system call.
235# endif /* __NR_sched_getaffinity */
236# else
237#error Unknown or unsupported architecture
238#endif /* KMP_ARCH_* */
239#elif KMP_OS_FREEBSD
240#include <pthread.h>
241#include <pthread_np.h>
242#endif
243class KMPNativeAffinity : public KMPAffinity {
244 class Mask : public KMPAffinity::Mask {
245 typedef unsigned char mask_t;
246 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
247
248 public:
249 mask_t *mask;
250 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
251 ~Mask() {
252 if (mask)
253 __kmp_free(mask);
254 }
255 void set(int i) override {
256 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
257 }
258 bool is_set(int i) const override {
259 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
260 }
261 void clear(int i) override {
262 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
263 }
264 void zero() override {
265 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266 mask[i] = 0;
267 }
268 void copy(const KMPAffinity::Mask *src) override {
269 const Mask *convert = static_cast<const Mask *>(src);
270 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271 mask[i] = convert->mask[i];
272 }
273 void bitwise_and(const KMPAffinity::Mask *rhs) override {
274 const Mask *convert = static_cast<const Mask *>(rhs);
275 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276 mask[i] &= convert->mask[i];
277 }
278 void bitwise_or(const KMPAffinity::Mask *rhs) override {
279 const Mask *convert = static_cast<const Mask *>(rhs);
280 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281 mask[i] |= convert->mask[i];
282 }
283 void bitwise_not() override {
284 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
285 mask[i] = ~(mask[i]);
286 }
287 int begin() const override {
288 int retval = 0;
289 while (retval < end() && !is_set(retval))
290 ++retval;
291 return retval;
292 }
293 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
294 int next(int previous) const override {
295 int retval = previous + 1;
296 while (retval < end() && !is_set(retval))
297 ++retval;
298 return retval;
299 }
300 int get_system_affinity(bool abort_on_error) override {
301 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
302 "Illegal get affinity operation when not capable");
303#if KMP_OS_LINUX
304 int retval =
305 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
306#elif KMP_OS_FREEBSD
307 int r =
308 pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
309 int retval = (r == 0 ? 0 : -1);
310#endif
311 if (retval >= 0) {
312 return 0;
313 }
314 int error = errno;
315 if (abort_on_error) {
316 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
317 }
318 return error;
319 }
320 int set_system_affinity(bool abort_on_error) const override {
321 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
322 "Illegal get affinity operation when not capable");
323#if KMP_OS_LINUX
324 int retval =
325 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
326#elif KMP_OS_FREEBSD
327 int r =
328 pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
329 int retval = (r == 0 ? 0 : -1);
330#endif
331 if (retval >= 0) {
332 return 0;
333 }
334 int error = errno;
335 if (abort_on_error) {
336 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
337 }
338 return error;
339 }
340 };
341 void determine_capable(const char *env_var) override {
342 __kmp_affinity_determine_capable(env_var);
343 }
344 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
345 KMPAffinity::Mask *allocate_mask() override {
346 KMPNativeAffinity::Mask *retval = new Mask();
347 return retval;
348 }
349 void deallocate_mask(KMPAffinity::Mask *m) override {
350 KMPNativeAffinity::Mask *native_mask =
351 static_cast<KMPNativeAffinity::Mask *>(m);
352 delete native_mask;
353 }
354 KMPAffinity::Mask *allocate_mask_array(int num) override {
355 return new Mask[num];
356 }
357 void deallocate_mask_array(KMPAffinity::Mask *array) override {
358 Mask *linux_array = static_cast<Mask *>(array);
359 delete[] linux_array;
360 }
361 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
362 int index) override {
363 Mask *linux_array = static_cast<Mask *>(array);
364 return &(linux_array[index]);
365 }
366 api_type get_api_type() const override { return NATIVE_OS; }
367};
368#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
369
370#if KMP_OS_WINDOWS
371class KMPNativeAffinity : public KMPAffinity {
372 class Mask : public KMPAffinity::Mask {
373 typedef ULONG_PTR mask_t;
374 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
375 mask_t *mask;
376
377 public:
378 Mask() {
379 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
380 }
381 ~Mask() {
382 if (mask)
383 __kmp_free(mask);
384 }
385 void set(int i) override {
386 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
387 }
388 bool is_set(int i) const override {
389 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
390 }
391 void clear(int i) override {
392 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
393 }
394 void zero() override {
395 for (int i = 0; i < __kmp_num_proc_groups; ++i)
396 mask[i] = 0;
397 }
398 void copy(const KMPAffinity::Mask *src) override {
399 const Mask *convert = static_cast<const Mask *>(src);
400 for (int i = 0; i < __kmp_num_proc_groups; ++i)
401 mask[i] = convert->mask[i];
402 }
403 void bitwise_and(const KMPAffinity::Mask *rhs) override {
404 const Mask *convert = static_cast<const Mask *>(rhs);
405 for (int i = 0; i < __kmp_num_proc_groups; ++i)
406 mask[i] &= convert->mask[i];
407 }
408 void bitwise_or(const KMPAffinity::Mask *rhs) override {
409 const Mask *convert = static_cast<const Mask *>(rhs);
410 for (int i = 0; i < __kmp_num_proc_groups; ++i)
411 mask[i] |= convert->mask[i];
412 }
413 void bitwise_not() override {
414 for (int i = 0; i < __kmp_num_proc_groups; ++i)
415 mask[i] = ~(mask[i]);
416 }
417 int begin() const override {
418 int retval = 0;
419 while (retval < end() && !is_set(retval))
420 ++retval;
421 return retval;
422 }
423 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
424 int next(int previous) const override {
425 int retval = previous + 1;
426 while (retval < end() && !is_set(retval))
427 ++retval;
428 return retval;
429 }
430 int set_system_affinity(bool abort_on_error) const override {
431 if (__kmp_num_proc_groups > 1) {
432 // Check for a valid mask.
433 GROUP_AFFINITY ga;
434 int group = get_proc_group();
435 if (group < 0) {
436 if (abort_on_error) {
437 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
438 }
439 return -1;
440 }
441 // Transform the bit vector into a GROUP_AFFINITY struct
442 // and make the system call to set affinity.
443 ga.Group = group;
444 ga.Mask = mask[group];
445 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
446
447 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
448 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
449 DWORD error = GetLastError();
450 if (abort_on_error) {
451 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
452 __kmp_msg_null);
453 }
454 return error;
455 }
456 } else {
457 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
458 DWORD error = GetLastError();
459 if (abort_on_error) {
460 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
461 __kmp_msg_null);
462 }
463 return error;
464 }
465 }
466 return 0;
467 }
468 int get_system_affinity(bool abort_on_error) override {
469 if (__kmp_num_proc_groups > 1) {
470 this->zero();
471 GROUP_AFFINITY ga;
472 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
473 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
474 DWORD error = GetLastError();
475 if (abort_on_error) {
476 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
477 KMP_ERR(error), __kmp_msg_null);
478 }
479 return error;
480 }
481 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
482 (ga.Mask == 0)) {
483 return -1;
484 }
485 mask[ga.Group] = ga.Mask;
486 } else {
487 mask_t newMask, sysMask, retval;
488 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
489 DWORD error = GetLastError();
490 if (abort_on_error) {
491 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
492 KMP_ERR(error), __kmp_msg_null);
493 }
494 return error;
495 }
496 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
497 if (!retval) {
498 DWORD error = GetLastError();
499 if (abort_on_error) {
500 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
501 KMP_ERR(error), __kmp_msg_null);
502 }
503 return error;
504 }
505 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
506 if (!newMask) {
507 DWORD error = GetLastError();
508 if (abort_on_error) {
509 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
510 KMP_ERR(error), __kmp_msg_null);
511 }
512 }
513 *mask = retval;
514 }
515 return 0;
516 }
517 int get_proc_group() const override {
518 int group = -1;
519 if (__kmp_num_proc_groups == 1) {
520 return 1;
521 }
522 for (int i = 0; i < __kmp_num_proc_groups; i++) {
523 if (mask[i] == 0)
524 continue;
525 if (group >= 0)
526 return -1;
527 group = i;
528 }
529 return group;
530 }
531 };
532 void determine_capable(const char *env_var) override {
533 __kmp_affinity_determine_capable(env_var);
534 }
535 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
536 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
537 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
538 KMPAffinity::Mask *allocate_mask_array(int num) override {
539 return new Mask[num];
540 }
541 void deallocate_mask_array(KMPAffinity::Mask *array) override {
542 Mask *windows_array = static_cast<Mask *>(array);
543 delete[] windows_array;
544 }
545 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
546 int index) override {
547 Mask *windows_array = static_cast<Mask *>(array);
548 return &(windows_array[index]);
549 }
550 api_type get_api_type() const override { return NATIVE_OS; }
551};
552#endif /* KMP_OS_WINDOWS */
553#endif /* KMP_AFFINITY_SUPPORTED */
554
555class Address {
556public:
557 static const unsigned maxDepth = 32;
558 unsigned labels[maxDepth];
559 unsigned childNums[maxDepth];
560 unsigned depth;
561 unsigned leader;
562 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
563 Address &operator=(const Address &b) {
564 depth = b.depth;
565 for (unsigned i = 0; i < depth; i++) {
566 labels[i] = b.labels[i];
567 childNums[i] = b.childNums[i];
568 }
569 leader = FALSE;
570 return *this;
571 }
572 bool operator==(const Address &b) const {
573 if (depth != b.depth)
574 return false;
575 for (unsigned i = 0; i < depth; i++)
576 if (labels[i] != b.labels[i])
577 return false;
578 return true;
579 }
580 bool isClose(const Address &b, int level) const {
581 if (depth != b.depth)
582 return false;
583 if ((unsigned)level >= depth)
584 return true;
585 for (unsigned i = 0; i < (depth - level); i++)
586 if (labels[i] != b.labels[i])
587 return false;
588 return true;
589 }
590 bool operator!=(const Address &b) const { return !operator==(b); }
591 void print() const {
592 unsigned i;
593 printf("Depth: %u --- ", depth);
594 for (i = 0; i < depth; i++) {
595 printf("%u ", labels[i]);
596 }
597 }
598};
599
600class AddrUnsPair {
601public:
602 Address first;
603 unsigned second;
604 AddrUnsPair(Address _first, unsigned _second)
605 : first(_first), second(_second) {}
606 AddrUnsPair &operator=(const AddrUnsPair &b) {
607 first = b.first;
608 second = b.second;
609 return *this;
610 }
611 void print() const {
612 printf("first = ");
613 first.print();
614 printf(" --- second = %u", second);
615 }
616 bool operator==(const AddrUnsPair &b) const {
617 if (first != b.first)
618 return false;
619 if (second != b.second)
620 return false;
621 return true;
622 }
623 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
624};
625
626static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
627 const Address *aa = &(((const AddrUnsPair *)a)->first);
628 const Address *bb = &(((const AddrUnsPair *)b)->first);
629 unsigned depth = aa->depth;
630 unsigned i;
631 KMP_DEBUG_ASSERT(depth == bb->depth);
632 for (i = 0; i < depth; i++) {
633 if (aa->labels[i] < bb->labels[i])
634 return -1;
635 if (aa->labels[i] > bb->labels[i])
636 return 1;
637 }
638 return 0;
639}
640
641/* A structure for holding machine-specific hierarchy info to be computed once
642 at init. This structure represents a mapping of threads to the actual machine
643 hierarchy, or to our best guess at what the hierarchy might be, for the
644 purpose of performing an efficient barrier. In the worst case, when there is
645 no machine hierarchy information, it produces a tree suitable for a barrier,
646 similar to the tree used in the hyper barrier. */
647class hierarchy_info {
648public:
649 /* Good default values for number of leaves and branching factor, given no
650 affinity information. Behaves a bit like hyper barrier. */
651 static const kmp_uint32 maxLeaves = 4;
652 static const kmp_uint32 minBranch = 4;
658 kmp_uint32 maxLevels;
659
664 kmp_uint32 depth;
665 kmp_uint32 base_num_threads;
666 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
667 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
668 // 2=initialization in progress
669 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
670
675 kmp_uint32 *numPerLevel;
676 kmp_uint32 *skipPerLevel;
677
678 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
679 int hier_depth = adr2os[0].first.depth;
680 int level = 0;
681 for (int i = hier_depth - 1; i >= 0; --i) {
682 int max = -1;
683 for (int j = 0; j < num_addrs; ++j) {
684 int next = adr2os[j].first.childNums[i];
685 if (next > max)
686 max = next;
687 }
688 numPerLevel[level] = max + 1;
689 ++level;
690 }
691 }
692
693 hierarchy_info()
694 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
695
696 void fini() {
697 if (!uninitialized && numPerLevel) {
698 __kmp_free(numPerLevel);
699 numPerLevel = NULL;
700 uninitialized = not_initialized;
701 }
702 }
703
704 void init(AddrUnsPair *adr2os, int num_addrs) {
705 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
706 &uninitialized, not_initialized, initializing);
707 if (bool_result == 0) { // Wait for initialization
708 while (TCR_1(uninitialized) != initialized)
709 KMP_CPU_PAUSE();
710 return;
711 }
712 KMP_DEBUG_ASSERT(bool_result == 1);
713
714 /* Added explicit initialization of the data fields here to prevent usage of
715 dirty value observed when static library is re-initialized multiple times
716 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
717 OpenMP). */
718 depth = 1;
719 resizing = 0;
720 maxLevels = 7;
721 numPerLevel =
722 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
723 skipPerLevel = &(numPerLevel[maxLevels]);
724 for (kmp_uint32 i = 0; i < maxLevels;
725 ++i) { // init numPerLevel[*] to 1 item per level
726 numPerLevel[i] = 1;
727 skipPerLevel[i] = 1;
728 }
729
730 // Sort table by physical ID
731 if (adr2os) {
732 qsort(adr2os, num_addrs, sizeof(*adr2os),
733 __kmp_affinity_cmp_Address_labels);
734 deriveLevels(adr2os, num_addrs);
735 } else {
736 numPerLevel[0] = maxLeaves;
737 numPerLevel[1] = num_addrs / maxLeaves;
738 if (num_addrs % maxLeaves)
739 numPerLevel[1]++;
740 }
741
742 base_num_threads = num_addrs;
743 for (int i = maxLevels - 1; i >= 0;
744 --i) // count non-empty levels to get depth
745 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
746 depth++;
747
748 kmp_uint32 branch = minBranch;
749 if (numPerLevel[0] == 1)
750 branch = num_addrs / maxLeaves;
751 if (branch < minBranch)
752 branch = minBranch;
753 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
754 while (numPerLevel[d] > branch ||
755 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
756 if (numPerLevel[d] & 1)
757 numPerLevel[d]++;
758 numPerLevel[d] = numPerLevel[d] >> 1;
759 if (numPerLevel[d + 1] == 1)
760 depth++;
761 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
762 }
763 if (numPerLevel[0] == 1) {
764 branch = branch >> 1;
765 if (branch < 4)
766 branch = minBranch;
767 }
768 }
769
770 for (kmp_uint32 i = 1; i < depth; ++i)
771 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
772 // Fill in hierarchy in the case of oversubscription
773 for (kmp_uint32 i = depth; i < maxLevels; ++i)
774 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
775
776 uninitialized = initialized; // One writer
777 }
778
779 // Resize the hierarchy if nproc changes to something larger than before
780 void resize(kmp_uint32 nproc) {
781 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
782 while (bool_result == 0) { // someone else is trying to resize
783 KMP_CPU_PAUSE();
784 if (nproc <= base_num_threads) // happy with other thread's resize
785 return;
786 else // try to resize
787 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
788 }
789 KMP_DEBUG_ASSERT(bool_result != 0);
790 if (nproc <= base_num_threads)
791 return; // happy with other thread's resize
792
793 // Calculate new maxLevels
794 kmp_uint32 old_sz = skipPerLevel[depth - 1];
795 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
796 // First see if old maxLevels is enough to contain new size
797 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
798 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
799 numPerLevel[i - 1] *= 2;
800 old_sz *= 2;
801 depth++;
802 }
803 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
804 while (nproc > old_sz) {
805 old_sz *= 2;
806 incs++;
807 depth++;
808 }
809 maxLevels += incs;
810
811 // Resize arrays
812 kmp_uint32 *old_numPerLevel = numPerLevel;
813 kmp_uint32 *old_skipPerLevel = skipPerLevel;
814 numPerLevel = skipPerLevel = NULL;
815 numPerLevel =
816 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
817 skipPerLevel = &(numPerLevel[maxLevels]);
818
819 // Copy old elements from old arrays
820 for (kmp_uint32 i = 0; i < old_maxLevels;
821 ++i) { // init numPerLevel[*] to 1 item per level
822 numPerLevel[i] = old_numPerLevel[i];
823 skipPerLevel[i] = old_skipPerLevel[i];
824 }
825
826 // Init new elements in arrays to 1
827 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
828 ++i) { // init numPerLevel[*] to 1 item per level
829 numPerLevel[i] = 1;
830 skipPerLevel[i] = 1;
831 }
832
833 // Free old arrays
834 __kmp_free(old_numPerLevel);
835 }
836
837 // Fill in oversubscription levels of hierarchy
838 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
839 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
840
841 base_num_threads = nproc;
842 resizing = 0; // One writer
843 }
844};
845#endif // KMP_AFFINITY_H