[GISel] Legalize bitreverse with types smaller than 8 bits (#92998)

This patch adds support for lowering `bitreverse` with types smaller
than 8 bits. It also fixes an existing assertion failure in
`llvm::APInt::getSplat`: https://godbolt.org/z/7crs8xrcG

The lowering logic is copied from SDAG:

https://github.com/llvm/llvm-project/blob/2034f2fc8729bd4645ef7caa3c5c6efa284d2d3f/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp#L9384-L9398
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d8b0f52..9208b09 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7977,27 +7977,51 @@
 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
   auto [Dst, Src] = MI.getFirst2Regs();
   const LLT Ty = MRI.getType(Src);
-  unsigned Size = Ty.getSizeInBits();
+  unsigned Size = Ty.getScalarSizeInBits();
 
-  MachineInstrBuilder BSWAP =
-      MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+  if (Size >= 8) {
+    MachineInstrBuilder BSWAP =
+        MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
 
-  // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
-  //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
-  // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
-  MachineInstrBuilder Swap4 =
-      SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+    // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+    //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+    // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+    MachineInstrBuilder Swap4 =
+        SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
 
-  // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
-  //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
-  // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
-  MachineInstrBuilder Swap2 =
-      SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+    // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+    //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+    // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+    MachineInstrBuilder Swap2 =
+        SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
 
-  // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
-  //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
-  // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
-  SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+    // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
+    // 6|7
+    //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+    // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+    SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+  } else {
+    // Expand bitreverse for types smaller than 8 bits.
+    MachineInstrBuilder Tmp;
+    for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
+      MachineInstrBuilder Tmp2;
+      if (I < J) {
+        auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
+        Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
+      } else {
+        auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
+        Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
+      }
+
+      auto Mask = MIRBuilder.buildConstant(Ty, 1U << J);
+      Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
+      if (I == 0)
+        Tmp = Tmp2;
+      else
+        Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
+    }
+    MIRBuilder.buildCopy(Dst, Tmp);
+  }
 
   MI.eraseFromParent();
   return Legalized;