[OpenMP][libomp] Allow users to specify KMP_HW_SUBSET in any order

Remove restriction forcing users to specify the KMP_HW_SUBSET value in
topology order. This patch sorts the user KMP_HW_SUBSET value before
trying to apply it. For example: 1s,4c,2t is equivalent to 2t,1s,4c

Differential Revision: https://reviews.llvm.org/D112027

GitOrigin-RevId: a0afb9d0fc2b6b1a0e89c9cd8f0bd1d839e740ce
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index f82767a..1bec347 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -739,6 +739,9 @@
   if (!__kmp_hw_subset)
     return false;
 
+  // First, sort the KMP_HW_SUBSET items by the machine topology
+  __kmp_hw_subset->sort();
+
   // Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
   int hw_subset_depth = __kmp_hw_subset->get_depth();
   kmp_hw_t specified[KMP_HW_LAST];
@@ -770,23 +773,6 @@
     }
     specified[equivalent_type] = type;
 
-    // Check to see if layers are in order
-    if (i + 1 < hw_subset_depth) {
-      kmp_hw_t next_type = get_equivalent_type(__kmp_hw_subset->at(i + 1).type);
-      if (next_type == KMP_HW_UNKNOWN) {
-        KMP_WARNING(
-            AffHWSubsetNotExistGeneric,
-            __kmp_hw_get_catalog_string(__kmp_hw_subset->at(i + 1).type));
-        return false;
-      }
-      int next_topology_level = get_level(next_type);
-      if (level > next_topology_level) {
-        KMP_WARNING(AffHWSubsetOutOfOrder, __kmp_hw_get_catalog_string(type),
-                    __kmp_hw_get_catalog_string(next_type));
-        return false;
-      }
-    }
-
     // Check to see if each layer's num & offset parameters are valid
     max_count = get_ratio(level);
     if (max_count < 0 || num + offset > max_count) {
diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h
index 2a8ac77..5b1569b 100644
--- a/runtime/src/kmp_affinity.h
+++ b/runtime/src/kmp_affinity.h
@@ -806,6 +806,7 @@
   void print(const char *env_var = "KMP_AFFINITY") const;
   void dump() const;
 };
+extern kmp_topology_t *__kmp_topology;
 
 class kmp_hw_subset_t {
 public:
@@ -823,6 +824,15 @@
   bool absolute;
   // The set must be able to handle up to KMP_HW_LAST number of layers
   KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
+  // Sorting the KMP_HW_SUBSET items to follow topology order
+  // All unknown topology types will be at the beginning of the subset
+  static int hw_subset_compare(const void *i1, const void *i2) {
+    kmp_hw_t type1 = ((const item_t *)i1)->type;
+    kmp_hw_t type2 = ((const item_t *)i2)->type;
+    int level1 = __kmp_topology->get_level(type1);
+    int level2 = __kmp_topology->get_level(type2);
+    return level1 - level2;
+  }
 
 public:
   // Force use of allocate()/deallocate()
@@ -881,6 +891,10 @@
     }
     depth--;
   }
+  void sort() {
+    KMP_DEBUG_ASSERT(__kmp_topology);
+    qsort(items, depth, sizeof(item_t), hw_subset_compare);
+  }
   bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
   void dump() const {
     printf("**********************\n");
@@ -896,8 +910,6 @@
     printf("**********************\n");
   }
 };
-
-extern kmp_topology_t *__kmp_topology;
 extern kmp_hw_subset_t *__kmp_hw_subset;
 
 /* A structure for holding machine-specific hierarchy info to be computed once
diff --git a/runtime/test/affinity/kmp-hw-subset.c b/runtime/test/affinity/kmp-hw-subset.c
index d5b306e..606fcdf 100644
--- a/runtime/test/affinity/kmp-hw-subset.c
+++ b/runtime/test/affinity/kmp-hw-subset.c
@@ -2,6 +2,7 @@
 // RUN: env OMP_PLACES=threads %libomp-run
 // RUN: env OMP_PLACES=cores %libomp-run
 // RUN: env OMP_PLACES=sockets %libomp-run
+// RUN: env OMP_PLACES=cores RUN_OUT_OF_ORDER=1 %libomp-run
 // REQUIRES: linux
 
 #include <stdio.h>
@@ -103,8 +104,13 @@
   if (nsockets > 1)
     nsockets /= 2;
 
-  snprintf(buf, sizeof(buf), "%ds,%dc,%dt", nsockets, ncores_per_socket,
-           nthreads_per_core);
+  if (getenv("RUN_OUT_OF_ORDER")) {
+    snprintf(buf, sizeof(buf), "%dt,%ds,%dc", nthreads_per_core, nsockets,
+             ncores_per_socket);
+  } else {
+    snprintf(buf, sizeof(buf), "%ds,%dc,%dt", nsockets, ncores_per_socket,
+             nthreads_per_core);
+  }
   setenv("KMP_HW_SUBSET", buf, 1);
 
   openmp_places = topology_alloc_openmp_places();