| /* |
| * kmp_barrier.h |
| */ |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef KMP_BARRIER_H |
| #define KMP_BARRIER_H |
| |
| #include "kmp.h" |
| #include "kmp_i18n.h" |
| |
| #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC |
| #include <xmmintrin.h> |
| #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment) |
| #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr) |
| #elif KMP_HAVE_ALIGNED_ALLOC |
| #define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size) |
| #define KMP_ALIGNED_FREE(ptr) free(ptr) |
| #elif KMP_HAVE_POSIX_MEMALIGN |
| static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) { |
| void *ptr; |
| int n = posix_memalign(&ptr, alignment, size); |
| if (n != 0) { |
| if (ptr) |
| free(ptr); |
| return nullptr; |
| } |
| return ptr; |
| } |
| #define KMP_ALIGNED_FREE(ptr) free(ptr) |
| #elif KMP_HAVE__ALIGNED_MALLOC |
| #include <malloc.h> |
| #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment) |
| #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr) |
| #else |
| #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size) |
| #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr) |
| #endif |
| |
| // Use four cache lines: MLC tends to prefetch the next or previous cache line |
| // creating a possible fake conflict between cores, so this is the only way to |
| // guarantee that no such prefetch can happen. |
| #ifndef KMP_FOURLINE_ALIGN_CACHE |
| #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE) |
| #endif |
| |
| #define KMP_OPTIMIZE_FOR_REDUCTIONS 0 |
| |
| class distributedBarrier { |
| struct flags_s { |
| kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed; |
| }; |
| |
| struct go_s { |
| std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go; |
| }; |
| |
| struct iter_s { |
| kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter; |
| }; |
| |
| struct sleep_s { |
| std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep; |
| }; |
| |
| void init(size_t nthr); |
| void resize(size_t nthr); |
| void computeGo(size_t n); |
| void computeVarsForN(size_t n); |
| |
| public: |
| enum { |
| MAX_ITERS = 3, |
| MAX_GOS = 8, |
| IDEAL_GOS = 4, |
| IDEAL_CONTENTION = 16, |
| }; |
| |
| flags_s *flags[MAX_ITERS]; |
| go_s *go; |
| iter_s *iter; |
| sleep_s *sleep; |
| |
| size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier |
| size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure |
| // number of go signals each requiring one write per iteration |
| size_t KMP_ALIGN_CACHE num_gos; |
| // number of groups of gos |
| size_t KMP_ALIGN_CACHE num_groups; |
| // threads per go signal |
| size_t KMP_ALIGN_CACHE threads_per_go; |
| bool KMP_ALIGN_CACHE fix_threads_per_go; |
| // threads per group |
| size_t KMP_ALIGN_CACHE threads_per_group; |
| // number of go signals in a group |
| size_t KMP_ALIGN_CACHE gos_per_group; |
| void *team_icvs; |
| |
| distributedBarrier() = delete; |
| ~distributedBarrier() = delete; |
| |
| // Used instead of constructor to create aligned data |
| static distributedBarrier *allocate(int nThreads) { |
| distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE( |
| sizeof(distributedBarrier), 4 * CACHE_LINE); |
| if (!d) { |
| KMP_FATAL(MemoryAllocFailed); |
| } |
| d->num_threads = 0; |
| d->max_threads = 0; |
| for (int i = 0; i < MAX_ITERS; ++i) |
| d->flags[i] = NULL; |
| d->go = NULL; |
| d->iter = NULL; |
| d->sleep = NULL; |
| d->team_icvs = NULL; |
| d->fix_threads_per_go = false; |
| // calculate gos and groups ONCE on base size |
| d->computeGo(nThreads); |
| d->init(nThreads); |
| return d; |
| } |
| |
| static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } |
| |
| void update_num_threads(size_t nthr) { init(nthr); } |
| |
| bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); } |
| size_t get_num_threads() { return num_threads; } |
| kmp_uint64 go_release(); |
| void go_reset(); |
| }; |
| |
| #endif // KMP_BARRIER_H |