[AMDGPU] Move LDS lowering related utility functions to a separate utils file.

Move some utility functions which are used within LDS lowering pass to a separate utils
file so that other LDS related passes can make use of them when required.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D100526

GitOrigin-RevId: 82787eb2285dc03e7dbc635ddb2bc22f871d9b45
diff --git a/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index fda044a..ce451a6 100644
--- a/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -28,6 +28,7 @@
 
 #include "AMDGPU.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDGPULDSUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -49,95 +50,6 @@
 
 class AMDGPULowerModuleLDS : public ModulePass {
 
-  static bool isKernelCC(Function *Func) {
-    return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
-  }
-
-  static Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
-    return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
-                                         GV->getValueType());
-  }
-
-  static bool
-  userRequiresLowering(const SmallPtrSetImpl<GlobalValue *> &UsedList,
-                       User *InitialUser) {
-    // Any LDS variable can be lowered by moving into the created struct
-    // Each variable so lowered is allocated in every kernel, so variables
-    // whose users are all known to be safe to lower without the transform
-    // are left unchanged.
-    SmallPtrSet<User *, 8> Visited;
-    SmallVector<User *, 16> Stack;
-    Stack.push_back(InitialUser);
-
-    while (!Stack.empty()) {
-      User *V = Stack.pop_back_val();
-      Visited.insert(V);
-
-      if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
-        if (UsedList.contains(G)) {
-          continue;
-        }
-      }
-
-      if (auto *I = dyn_cast<Instruction>(V)) {
-        if (isKernelCC(I->getFunction())) {
-          continue;
-        }
-      }
-
-      if (auto *E = dyn_cast<ConstantExpr>(V)) {
-        for (Value::user_iterator EU = E->user_begin(); EU != E->user_end();
-             ++EU) {
-          if (Visited.insert(*EU).second) {
-            Stack.push_back(*EU);
-          }
-        }
-        continue;
-      }
-
-      // Unknown user, conservatively lower the variable
-      return true;
-    }
-
-    return false;
-  }
-
-  static std::vector<GlobalVariable *>
-  findVariablesToLower(Module &M,
-                       const SmallPtrSetImpl<GlobalValue *> &UsedList) {
-    std::vector<llvm::GlobalVariable *> LocalVars;
-    for (auto &GV : M.globals()) {
-      if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
-        continue;
-      }
-      if (!GV.hasInitializer()) {
-        // addrspace(3) without initializer implies cuda/hip extern __shared__
-        // the semantics for such a variable appears to be that all extern
-        // __shared__ variables alias one another, in which case this transform
-        // is not required
-        continue;
-      }
-      if (!isa<UndefValue>(GV.getInitializer())) {
-        // Initializers are unimplemented for local address space.
-        // Leave such variables in place for consistent error reporting.
-        continue;
-      }
-      if (GV.isConstant()) {
-        // A constant undef variable can't be written to, and any load is
-        // undef, so it should be eliminated by the optimizer. It could be
-        // dropped by the back end if not. This pass skips over it.
-        continue;
-      }
-      if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) {
-            return userRequiresLowering(UsedList, U);
-          })) {
-        continue;
-      }
-      LocalVars.push_back(&GV);
-    }
-    return LocalVars;
-  }
-
   static void removeFromUsedList(Module &M, StringRef Name,
                                  SmallPtrSetImpl<Constant *> &ToRemove) {
     GlobalVariable *GV = M.getGlobalVariable(Name);
@@ -217,20 +129,6 @@
                        "");
   }
 
-  static SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
-    SmallPtrSet<GlobalValue *, 32> UsedList;
-
-    SmallVector<GlobalValue *, 32> TmpVec;
-    collectUsedGlobalVariables(M, TmpVec, true);
-    UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-    TmpVec.clear();
-    collectUsedGlobalVariables(M, TmpVec, false);
-    UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-    return UsedList;
-  }
-
 public:
   static char ID;
 
@@ -241,11 +139,11 @@
   bool runOnModule(Module &M) override {
     LLVMContext &Ctx = M.getContext();
     const DataLayout &DL = M.getDataLayout();
-    SmallPtrSet<GlobalValue *, 32> UsedList = getUsedList(M);
+    SmallPtrSet<GlobalValue *, 32> UsedList = AMDGPU::getUsedList(M);
 
     // Find variables to move into new struct instance
     std::vector<GlobalVariable *> FoundLocalVars =
-        findVariablesToLower(M, UsedList);
+        AMDGPU::findVariablesToLower(M, UsedList);
 
     if (FoundLocalVars.empty()) {
       // No variables to rewrite, no changes made.
@@ -257,8 +155,8 @@
     llvm::stable_sort(
         FoundLocalVars,
         [&](const GlobalVariable *LHS, const GlobalVariable *RHS) -> bool {
-          Align ALHS = getAlign(DL, LHS);
-          Align ARHS = getAlign(DL, RHS);
+          Align ALHS = AMDGPU::getAlign(DL, LHS);
+          Align ARHS = AMDGPU::getAlign(DL, RHS);
           if (ALHS != ARHS) {
             return ALHS > ARHS;
           }
@@ -280,7 +178,7 @@
       uint64_t CurrentOffset = 0;
       for (size_t I = 0; I < FoundLocalVars.size(); I++) {
         GlobalVariable *FGV = FoundLocalVars[I];
-        Align DataAlign = getAlign(DL, FGV);
+        Align DataAlign = AMDGPU::getAlign(DL, FGV);
 
         uint64_t DataAlignV = DataAlign.value();
         if (uint64_t Rem = CurrentOffset % DataAlignV) {
@@ -312,7 +210,8 @@
     StructType *LDSTy = StructType::create(
         Ctx, LocalVarTypes, llvm::StringRef("llvm.amdgcn.module.lds.t"));
 
-    Align MaxAlign = getAlign(DL, LocalVars[0]); // was sorted on alignment
+    Align MaxAlign =
+        AMDGPU::getAlign(DL, LocalVars[0]); // was sorted on alignment
     Constant *InstanceAddress = Constant::getIntegerValue(
         PointerType::get(LDSTy, AMDGPUAS::LOCAL_ADDRESS), APInt(32, 0));
 
@@ -350,7 +249,7 @@
       SmallPtrSet<Function *, 32> Kernels;
       for (auto &I : M.functions()) {
         Function *Func = &I;
-        if (isKernelCC(Func) && !Kernels.contains(Func)) {
+        if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) {
           markUsedByKernel(Builder, Func, SGV);
           Kernels.insert(Func);
         }
diff --git a/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
new file mode 100644
index 0000000..e5cee6f
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -0,0 +1,127 @@
+//===- AMDGPULDSUtils.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDGPU LDS related helper utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPULDSUtils.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+namespace AMDGPU {
+
+bool isKernelCC(Function *Func) {
+  return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
+}
+
+Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
+  return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
+                                       GV->getValueType());
+}
+
+bool userRequiresLowering(const SmallPtrSetImpl<GlobalValue *> &UsedList,
+                          User *InitialUser) {
+  // Any LDS variable can be lowered by moving into the created struct
+  // Each variable so lowered is allocated in every kernel, so variables
+  // whose users are all known to be safe to lower without the transform
+  // are left unchanged.
+  SmallPtrSet<User *, 8> Visited;
+  SmallVector<User *, 16> Stack;
+  Stack.push_back(InitialUser);
+
+  while (!Stack.empty()) {
+    User *V = Stack.pop_back_val();
+    Visited.insert(V);
+
+    if (auto *G = dyn_cast<GlobalValue>(V->stripPointerCasts())) {
+      if (UsedList.contains(G)) {
+        continue;
+      }
+    }
+
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      if (isKernelCC(I->getFunction())) {
+        continue;
+      }
+    }
+
+    if (auto *E = dyn_cast<ConstantExpr>(V)) {
+      for (Value::user_iterator EU = E->user_begin(); EU != E->user_end();
+           ++EU) {
+        if (Visited.insert(*EU).second) {
+          Stack.push_back(*EU);
+        }
+      }
+      continue;
+    }
+
+    // Unknown user, conservatively lower the variable
+    return true;
+  }
+
+  return false;
+}
+
+std::vector<GlobalVariable *>
+findVariablesToLower(Module &M,
+                     const SmallPtrSetImpl<GlobalValue *> &UsedList) {
+  std::vector<llvm::GlobalVariable *> LocalVars;
+  for (auto &GV : M.globals()) {
+    if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
+      continue;
+    }
+    if (!GV.hasInitializer()) {
+      // addrspace(3) without initializer implies cuda/hip extern __shared__
+      // the semantics for such a variable appears to be that all extern
+      // __shared__ variables alias one another, in which case this transform
+      // is not required
+      continue;
+    }
+    if (!isa<UndefValue>(GV.getInitializer())) {
+      // Initializers are unimplemented for local address space.
+      // Leave such variables in place for consistent error reporting.
+      continue;
+    }
+    if (GV.isConstant()) {
+      // A constant undef variable can't be written to, and any load is
+      // undef, so it should be eliminated by the optimizer. It could be
+      // dropped by the back end if not. This pass skips over it.
+      continue;
+    }
+    if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) {
+          return userRequiresLowering(UsedList, U);
+        })) {
+      continue;
+    }
+    LocalVars.push_back(&GV);
+  }
+  return LocalVars;
+}
+
+SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
+  SmallPtrSet<GlobalValue *, 32> UsedList;
+
+  SmallVector<GlobalValue *, 32> TmpVec;
+  collectUsedGlobalVariables(M, TmpVec, true);
+  UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+  TmpVec.clear();
+  collectUsedGlobalVariables(M, TmpVec, false);
+  UsedList.insert(TmpVec.begin(), TmpVec.end());
+
+  return UsedList;
+}
+
+} // end namespace AMDGPU
+
+} // end namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
new file mode 100644
index 0000000..f2c781a
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -0,0 +1,38 @@
+//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// AMDGPU LDS related helper utility functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
+
+#include "AMDGPU.h"
+
+namespace llvm {
+
+namespace AMDGPU {
+
+bool isKernelCC(Function *Func);
+
+Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
+
+bool userRequiresLowering(const SmallPtrSetImpl<GlobalValue *> &UsedList,
+                          User *InitialUser);
+
+std::vector<GlobalVariable *>
+findVariablesToLower(Module &M, const SmallPtrSetImpl<GlobalValue *> &UsedList);
+
+SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M);
+
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
diff --git a/lib/Target/AMDGPU/Utils/CMakeLists.txt b/lib/Target/AMDGPU/Utils/CMakeLists.txt
index fdb6e2e..6350ff4 100644
--- a/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -1,8 +1,9 @@
 add_llvm_component_library(LLVMAMDGPUUtils
-  AMDGPUBaseInfo.cpp
-  AMDKernelCodeTUtils.cpp
   AMDGPUAsmUtils.cpp
+  AMDGPUBaseInfo.cpp
+  AMDGPULDSUtils.cpp
   AMDGPUPALMetadata.cpp
+  AMDKernelCodeTUtils.cpp
 
   LINK_COMPONENTS
   Core