[OpenMP][libomp] Enable HWLOC topology detection of multiple CPU kinds

Teach the HWLOC topology method how to detect Atom and Core
types so hybrid CPUs are properly detected and represented when using
the HWLOC topology method.

Differential Revision: https://reviews.llvm.org/D112270

GitOrigin-RevId: a733b18bdbe28778cf34705447ee7691b2064b6e
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index a46a7bb..84086f3 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -384,7 +384,6 @@
 void kmp_topology_t::_gather_enumeration_information() {
   int previous_id[KMP_HW_LAST];
   int max[KMP_HW_LAST];
-  int previous_core_id = kmp_hw_thread_t::UNKNOWN_ID;
 
   for (int i = 0; i < depth; ++i) {
     previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
@@ -398,6 +397,7 @@
       core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
     }
   }
+  int core_level = get_level(KMP_HW_CORE);
   for (int i = 0; i < num_hw_threads; ++i) {
     kmp_hw_thread_t &hw_thread = hw_threads[i];
     for (int layer = 0; layer < depth; ++layer) {
@@ -413,21 +413,15 @@
             ratio[l] = max[l];
           max[l] = 1;
         }
+        // Figure out the number of each core type for hybrid CPUs
+        if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level)
+          _increment_core_type(hw_thread.core_type);
         break;
       }
     }
     for (int layer = 0; layer < depth; ++layer) {
       previous_id[layer] = hw_thread.ids[layer];
     }
-    // Figure out the number of each core type for hybrid CPUs
-    if (__kmp_is_hybrid_cpu()) {
-      int core_level = get_level(KMP_HW_CORE);
-      if (core_level != -1) {
-        if (hw_thread.ids[core_level] != previous_core_id)
-          _increment_core_type(hw_thread.core_type);
-        previous_core_id = hw_thread.ids[core_level];
-      }
-    }
   }
   for (int layer = 0; layer < depth; ++layer) {
     if (max[layer] > ratio[layer])
@@ -1360,6 +1354,45 @@
     return true;
   }
 
+  // Handle multiple types of cores if they exist on the system
+  int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
+
+  typedef struct kmp_hwloc_cpukinds_info_t {
+    int efficiency;
+    kmp_hw_core_type_t core_type;
+    hwloc_bitmap_t mask;
+  } kmp_hwloc_cpukinds_info_t;
+  kmp_hwloc_cpukinds_info_t *cpukinds = nullptr;
+
+  if (nr_cpu_kinds > 0) {
+    unsigned nr_infos;
+    struct hwloc_info_s *infos;
+    cpukinds = (kmp_hwloc_cpukinds_info_t *)__kmp_allocate(
+        sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds);
+    for (unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
+      cpukinds[idx].efficiency = -1;
+      cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
+      cpukinds[idx].mask = hwloc_bitmap_alloc();
+      if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
+                                  &cpukinds[idx].efficiency, &nr_infos, &infos,
+                                  0) == 0) {
+        for (unsigned i = 0; i < nr_infos; ++i) {
+          if (__kmp_str_match("CoreType", 8, infos[i].name)) {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+            if (__kmp_str_match("IntelAtom", 9, infos[i].value)) {
+              cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
+              break;
+            } else if (__kmp_str_match("IntelCore", 9, infos[i].value)) {
+              cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
+              break;
+            }
+#endif
+          }
+        }
+      }
+    }
+  }
+
   root = hwloc_get_root_obj(tp);
 
   // Figure out the depth and types in the topology
@@ -1419,6 +1452,18 @@
       hw_thread.clear();
       hw_thread.ids[index] = pu->logical_index;
       hw_thread.os_id = pu->os_index;
+      // If multiple core types, then set that attribute for the hardware thread
+      if (cpukinds) {
+        int cpukind_index = -1;
+        for (int i = 0; i < nr_cpu_kinds; ++i) {
+          if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
+            cpukind_index = i;
+            break;
+          }
+        }
+        if (cpukind_index >= 0)
+          hw_thread.core_type = cpukinds[cpukind_index].core_type;
+      }
       index--;
     }
     obj = pu;
@@ -1462,6 +1507,13 @@
     if (included)
       hw_thread_index++;
   }
+
+  // Free the core types information
+  if (cpukinds) {
+    for (int idx = 0; idx < nr_cpu_kinds; ++idx)
+      hwloc_bitmap_free(cpukinds[idx].mask);
+    __kmp_free(cpukinds);
+  }
   __kmp_topology->sort_ids();
   return true;
 }