GlobalISel: Have load lowering handle some unaligned accesses The code for splitting an unaligned access into 2 pieces is essentially the same as for splitting a non-power-of-2 load for scalars. It would be better to pick an optimal memory access size and directly use it, but splitting in half is what the DAG does. As-is this fixes handling of some unaligned sextload/zextloads for AMDGPU. In the future this will help drop the ugly abuse of narrowScalar to handle splitting unaligned accesses.

commit: f19226dda5aea6d677b0e52b309ef9c605178a51 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Thu Jul 22 08:11:14 2021 -0400
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Fri Jul 30 12:55:58 2021 -0400
tree: 67b9e0a3dfcc48bd1a854e2015fcbafe342ebbaf
parent: c15d9822b3ab767d744e93d7d9c25d5f7d2faac8 [diff] [blame]
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c1e0d25..4dddb98 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

@@ -2928,16 +2928,15 @@
     return Legalized;
   }
 
-  // This load needs splitting into power of 2 sized loads.
   if (DstTy.isVector())
     return UnableToLegalize;
-  if (isPowerOf2_32(MemSizeInBits))
-    return UnableToLegalize; // Don't know what we're being asked to do.
 
   // Big endian lowering not implemented.
   if (MIRBuilder.getDataLayout().isBigEndian())
     return UnableToLegalize;
 
+  // This load needs splitting into power of 2 sized loads.
+  //
   // Our strategy here is to generate anyextending loads for the smaller
   // types up to next power-2 result type, and then combine the two larger
   // result values together, before truncating back down to the non-pow-2
@@ -2950,8 +2949,21 @@
   // v1 = i24 trunc v5
   // By doing this we generate the correct truncate which should get
   // combined away as an artifact with a matching extend.
-  uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
-  uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+  uint64_t LargeSplitSize, SmallSplitSize;
+
+  if (!isPowerOf2_32(MemSizeInBits)) {
+    LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+    SmallSplitSize = MemSizeInBits - LargeSplitSize;
+  } else {
+    // Assume we're being asked to decompose an unaligned load.
+    // TODO: If this requires multiple splits, handle them all at once.
+    auto &Ctx = MF.getFunction().getContext();
+    if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+      return UnableToLegalize;
+
+    SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+  }
 
   MachineMemOperand *LargeMMO =
       MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -2976,9 +2988,16 @@
 
   if (AnyExtTy == DstTy)
     MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
-  else {
+  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
     auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
     MIRBuilder.buildTrunc(DstReg, {Or});
+  } else {
+    assert(DstTy.isPointer() && "expected pointer");
+    auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+    // FIXME: We currently consider this to be illegal for non-integral address
+    // spaces, but we need still need a way to reinterpret the bits.
+    MIRBuilder.buildIntToPtr(DstReg, Or);
   }
 
   LoadMI.eraseFromParent();
commit	f19226dda5aea6d677b0e52b309ef9c605178a51	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Thu Jul 22 08:11:14 2021 -0400
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Fri Jul 30 12:55:58 2021 -0400
tree	67b9e0a3dfcc48bd1a854e2015fcbafe342ebbaf
parent	c15d9822b3ab767d744e93d7d9c25d5f7d2faac8 [diff] [blame]