[AMDGPU][GlobalISel] Legalization of G_ROTL and G_ROTR
Add implementation for the legalization of G_ROTL and G_ROTR machine
instructions. They are very similar to funnel shift instructions, the only
difference is funnel shifts have 3 operands, whereas rotate instructions have
two operands, the first being the register that is being rotated and the second
being the number of shifts. The legalization of G_ROTL/G_ROTR is just lowering
them into funnel shift instructions if they are legal.
Patch by: Mateja Marjanovic
Differential Revision: https://reviews.llvm.org/D105347
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 315c199..ca7fe92 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4434,6 +4434,8 @@
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
@@ -6079,6 +6081,27 @@
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;