GlobalISel: Implement fewerElementsVector for G_TRUNC

Extend fewerElementsVectorBasic to handle operands with different
element types.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b5d55f..9b9985e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2492,6 +2492,9 @@
   return Legalized;
 }
 
+// Handles operands with different types, but all must have the same number of
+// elements. There will be multiple type indexes. NarrowTy is expected to have
+// the result element type.
 LegalizerHelper::LegalizeResult
 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
                                           LLT NarrowTy) {
@@ -2507,16 +2510,22 @@
   SmallVector<Register, 8> ExtractedRegs[3];
   SmallVector<Register, 8> Parts;
 
+  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+
   // Break down all the sources into NarrowTy pieces we can operate on. This may
   // involve creating merges to a wider type, padded with undef.
   for (int I = 0; I != NumOps; ++I) {
     Register SrcReg =  MI.getOperand(I + 1).getReg();
     LLT SrcTy = MRI.getType(SrcReg);
-    LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, NarrowTy, SrcReg);
+
+    // Each operand may have its own type, but only the number of elements
+    // matters.
+    LLT OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
+    LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
 
     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
-    buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ExtractedRegs[I],
-                        TargetOpcode::G_ANYEXT);
+    buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy,
+                        ExtractedRegs[I], TargetOpcode::G_ANYEXT);
   }
 
   SmallVector<Register, 8> ResultRegs;
@@ -2525,7 +2534,10 @@
   SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
 
   int NumParts = ExtractedRegs[0].size();
-  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+  const LLT DstTy = MRI.getType(DstReg);
+  const unsigned DstSize = DstTy.getSizeInBits();
+  LLT DstLCMTy = getLCMType(DstTy, NarrowTy);
+
   const unsigned NarrowSize = NarrowTy.getSizeInBits();
 
   // We widened the source registers to satisfy merge/unmerge size
@@ -2548,9 +2560,7 @@
     ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0));
 
   // Extract the possibly padded result to the original result register.
-  LLT DstTy = MRI.getType(DstReg);
-  LLT LCMTy = getLCMType(DstTy, NarrowTy);
-  buildWidenedRemergeToDst(DstReg, LCMTy, ResultRegs);
+  buildWidenedRemergeToDst(DstReg, DstLCMTy, ResultRegs);
 
   MI.eraseFromParent();
   return Legalized;
@@ -3123,6 +3133,7 @@
   switch (MI.getOpcode()) {
   case G_IMPLICIT_DEF:
     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
+  case G_TRUNC:
   case G_AND:
   case G_OR:
   case G_XOR: