GlobalISel: Use multiple returns for intrinsic structs

This is consistent with what SelectionDAG does and is much easier to
work with than the extract sequence with an artificial wide register.

For the AMDGPU control flow intrinsics, this was producing an s128 for
the i64, i1 tuple return. Any legalization that should apply to a real
s128 value would badly obscure the direct values that need to be seen.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356147 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 34b66d6..3bbd6ec 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -791,7 +791,7 @@
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
+  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<unsigned> Res,
                                      bool HasSideEffects);
 
   /// Build and insert \p Res = G_FPTRUNC \p Op
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7db5407..8528636 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1138,8 +1138,8 @@
       ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
   }
 
-  bool IsSplitType = valueIsSplit(CI);
   if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
+    bool IsSplitType = valueIsSplit(CI);
     unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
                                      getLLTForType(*CI.getType(), *DL))
                                : getOrCreateVReg(CI);
@@ -1163,16 +1163,12 @@
   if (translateKnownIntrinsic(CI, ID, MIRBuilder))
     return true;
 
-  unsigned Res = 0;
-  if (!CI.getType()->isVoidTy()) {
-    if (IsSplitType)
-      Res =
-          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
-    else
-      Res = getOrCreateVReg(CI);
-  }
+  ArrayRef<unsigned> ResultRegs;
+  if (!CI.getType()->isVoidTy())
+    ResultRegs = getOrCreateVRegs(CI);
+
   MachineInstrBuilder MIB =
-      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+      MIRBuilder.buildIntrinsic(ID, ResultRegs, !CI.doesNotAccessMemory());
 
   for (auto &Arg : CI.arg_operands()) {
     // Some intrinsics take metadata parameters. Reject them.
@@ -1181,9 +1177,6 @@
     MIB.addUse(packRegs(*Arg, MIRBuilder));
   }
 
-  if (IsSplitType)
-    unpackRegs(CI, Res, MIRBuilder);
-
   // Add a MachineMemOperand if it is a target mem intrinsic.
   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
   TargetLowering::IntrinsicInfo Info;
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 30fe2b0..81d26e6 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -609,13 +609,13 @@
 }
 
 MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
-                                                     unsigned Res,
+                                                     ArrayRef<unsigned> ResultRegs,
                                                      bool HasSideEffects) {
   auto MIB =
       buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
                                 : TargetOpcode::G_INTRINSIC);
-  if (Res)
-    MIB.addDef(Res);
+  for (unsigned ResultReg : ResultRegs)
+    MIB.addDef(ResultReg);
   MIB.addIntrinsicID(ID);
   return MIB;
 }
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 57575ed..2e02ec0 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -2343,7 +2343,7 @@
 }
 
 ; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32
-; CHECK: %1:_(s384) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load 48 from %ir.ptr, align 64)
+; CHECK: %1:_(<4 x s32>), %2:_(<4 x s32>), %3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load 48 from %ir.ptr, align 64)
 define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr) {
   %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %ptr)
   ret void
diff --git a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
new file mode 100644
index 0000000..f450370
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -stop-after=irtranslator -o - %s | FileCheck %s
+
+declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
+
+define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
+  ; CHECK-LABEL: name: test_div_scale
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK:   liveins: $vgpr0, $vgpr1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK:   [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[C]](s1)
+  ; CHECK:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
+  ; CHECK:   G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1)
+  ; CHECK:   G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK:   S_ENDPGM
+  %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
+  %extract0 = extractvalue { float, i1 } %call, 0
+  %extract1 = extractvalue { float, i1 } %call, 1
+  %ext = sext i1 %extract1 to i32
+  store float %extract0, float addrspace(1)* undef
+  store i32 %ext, i32 addrspace(1)* undef
+  ret void
+}