GlobalISel: Handle lowering non-power-of-2 extloads
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b31cf3f..354cecf 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2849,60 +2849,62 @@
return Legalized;
}
- if (DstTy.getSizeInBits() != MMO.getSizeInBits())
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(MemSizeInBits))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Big endian lowering not implemented.
+ if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ auto SmallLoad = MIRBuilder.buildLoadInstr(
+ MI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
- auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr,
- *SmallMMO);
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ if (AnyExtTy == DstTy)
+ MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
+ else {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
- MI.eraseFromParent();
- return Legalized;
}
- return UnableToLegalize;
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult