[GlobalISel] Handle non-multiples of the base type in narrowScalarAddSub

When narrowing G_ADD and G_SUB, handle types that aren't a multiple of
the type we're narrowing to. This allows us to handle types like s96
on 64 bit targets.

Note that the test here has a couple of dead instructions because of
the way the setup legalizes. I wasn't able to come up with a way to
write this test that avoids that easily.

Differential Revision: https://reviews.llvm.org/D97811
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da76f0e..5478cb1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4885,17 +4885,6 @@
   if (DstType.isVector())
     return UnableToLegalize;
 
-  uint64_t SizeOp0 = DstType.getSizeInBits();
-  uint64_t NarrowSize = NarrowTy.getSizeInBits();
-
-  // FIXME: add support for when SizeOp0 isn't an exact multiple of
-  // NarrowSize.
-  if (SizeOp0 % NarrowSize != 0)
-    return UnableToLegalize;
-
-  // Expand in terms of carry-setting/consuming G_<Op>E instructions.
-  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
-
   unsigned Opcode = MI.getOpcode();
   unsigned OpO, OpE, OpF;
   switch (Opcode) {
@@ -4929,28 +4918,37 @@
   unsigned NumDefs = MI.getNumExplicitDefs();
   Register Src1 = MI.getOperand(NumDefs).getReg();
   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
-  Register CarryDst;
+  Register CarryDst, CarryIn;
   if (NumDefs == 2)
     CarryDst = MI.getOperand(1).getReg();
-  Register CarryIn;
   if (MI.getNumOperands() == NumDefs + 3)
     CarryIn = MI.getOperand(NumDefs + 2).getReg();
 
-  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
-  extractParts(Src1, NarrowTy, NumParts, Src1Regs);
-  extractParts(Src2, NarrowTy, NumParts, Src2Regs);
+  LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
+  LLT LeftoverTy, DummyTy;
+  SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
+  extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
+  extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
 
-  for (int i = 0; i < NumParts; ++i) {
-    Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+  int NarrowParts = Src1Regs.size();
+  for (int I = 0, E = Src1Left.size(); I != E; ++I) {
+    Src1Regs.push_back(Src1Left[I]);
+    Src2Regs.push_back(Src2Left[I]);
+  }
+  DstRegs.reserve(Src1Regs.size());
+
+  for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
+    Register DstReg =
+        MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
     // Forward the final carry-out to the destination register
-    if (i == NumParts - 1 && CarryDst)
+    if (i == e - 1 && CarryDst)
       CarryOut = CarryDst;
 
     if (!CarryIn) {
       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
                             {Src1Regs[i], Src2Regs[i]});
-    } else if (i == NumParts - 1) {
+    } else if (i == e - 1) {
       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
                             {Src1Regs[i], Src2Regs[i], CarryIn});
     } else {
@@ -4961,7 +4959,10 @@
     DstRegs.push_back(DstReg);
     CarryIn = CarryOut;
   }
-  MIRBuilder.buildMerge(DstReg, DstRegs);
+  insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
+              makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
+              makeArrayRef(DstRegs).drop_front(NarrowParts));
+
   MI.eraseFromParent();
   return Legalized;
 }