[OpenMP][libomp] Allow users to specify KMP_HW_SUBSET in any order
Remove restriction forcing users to specify the KMP_HW_SUBSET value in
topology order. This patch sorts the user KMP_HW_SUBSET value before
trying to apply it. For example: 1s,4c,2t is equivalent to 2t,1s,4c
Differential Revision: https://reviews.llvm.org/D112027
GitOrigin-RevId: a0afb9d0fc2b6b1a0e89c9cd8f0bd1d839e740ce
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index f82767a..1bec347 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -739,6 +739,9 @@
if (!__kmp_hw_subset)
return false;
+ // First, sort the KMP_HW_SUBSET items by the machine topology
+ __kmp_hw_subset->sort();
+
// Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
int hw_subset_depth = __kmp_hw_subset->get_depth();
kmp_hw_t specified[KMP_HW_LAST];
@@ -770,23 +773,6 @@
}
specified[equivalent_type] = type;
- // Check to see if layers are in order
- if (i + 1 < hw_subset_depth) {
- kmp_hw_t next_type = get_equivalent_type(__kmp_hw_subset->at(i + 1).type);
- if (next_type == KMP_HW_UNKNOWN) {
- KMP_WARNING(
- AffHWSubsetNotExistGeneric,
- __kmp_hw_get_catalog_string(__kmp_hw_subset->at(i + 1).type));
- return false;
- }
- int next_topology_level = get_level(next_type);
- if (level > next_topology_level) {
- KMP_WARNING(AffHWSubsetOutOfOrder, __kmp_hw_get_catalog_string(type),
- __kmp_hw_get_catalog_string(next_type));
- return false;
- }
- }
-
// Check to see if each layer's num & offset parameters are valid
max_count = get_ratio(level);
if (max_count < 0 || num + offset > max_count) {
diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h
index 2a8ac77..5b1569b 100644
--- a/runtime/src/kmp_affinity.h
+++ b/runtime/src/kmp_affinity.h
@@ -806,6 +806,7 @@
void print(const char *env_var = "KMP_AFFINITY") const;
void dump() const;
};
+extern kmp_topology_t *__kmp_topology;
class kmp_hw_subset_t {
public:
@@ -823,6 +824,15 @@
bool absolute;
// The set must be able to handle up to KMP_HW_LAST number of layers
KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
+ // Sorting the KMP_HW_SUBSET items to follow topology order
+ // All unknown topology types will be at the beginning of the subset
+ static int hw_subset_compare(const void *i1, const void *i2) {
+ kmp_hw_t type1 = ((const item_t *)i1)->type;
+ kmp_hw_t type2 = ((const item_t *)i2)->type;
+ int level1 = __kmp_topology->get_level(type1);
+ int level2 = __kmp_topology->get_level(type2);
+ return level1 - level2;
+ }
public:
// Force use of allocate()/deallocate()
@@ -881,6 +891,10 @@
}
depth--;
}
+ void sort() {
+ KMP_DEBUG_ASSERT(__kmp_topology);
+ qsort(items, depth, sizeof(item_t), hw_subset_compare);
+ }
bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
void dump() const {
printf("**********************\n");
@@ -896,8 +910,6 @@
printf("**********************\n");
}
};
-
-extern kmp_topology_t *__kmp_topology;
extern kmp_hw_subset_t *__kmp_hw_subset;
/* A structure for holding machine-specific hierarchy info to be computed once
diff --git a/runtime/test/affinity/kmp-hw-subset.c b/runtime/test/affinity/kmp-hw-subset.c
index d5b306e..606fcdf 100644
--- a/runtime/test/affinity/kmp-hw-subset.c
+++ b/runtime/test/affinity/kmp-hw-subset.c
@@ -2,6 +2,7 @@
// RUN: env OMP_PLACES=threads %libomp-run
// RUN: env OMP_PLACES=cores %libomp-run
// RUN: env OMP_PLACES=sockets %libomp-run
+// RUN: env OMP_PLACES=cores RUN_OUT_OF_ORDER=1 %libomp-run
// REQUIRES: linux
#include <stdio.h>
@@ -103,8 +104,13 @@
if (nsockets > 1)
nsockets /= 2;
- snprintf(buf, sizeof(buf), "%ds,%dc,%dt", nsockets, ncores_per_socket,
- nthreads_per_core);
+ if (getenv("RUN_OUT_OF_ORDER")) {
+ snprintf(buf, sizeof(buf), "%dt,%ds,%dc", nthreads_per_core, nsockets,
+ ncores_per_socket);
+ } else {
+ snprintf(buf, sizeof(buf), "%ds,%dc,%dt", nsockets, ncores_per_socket,
+ nthreads_per_core);
+ }
setenv("KMP_HW_SUBSET", buf, 1);
openmp_places = topology_alloc_openmp_places();