AMDGPU: Use CopyToReg for interp intrinsic lowering

This doesn't use the default value, so doesn't benefit from the hack
to help optimize it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375450 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 80eb2be..30fed3b 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5877,34 +5877,35 @@
   case Intrinsic::amdgcn_fdiv_fast:
     return lowerFDIV_FAST(Op, DAG);
   case Intrinsic::amdgcn_interp_mov: {
-    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
-    SDValue Glue = M0.getValue(1);
+    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                    Op.getOperand(4), SDValue());
     return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
-                       Op.getOperand(2), Op.getOperand(3), Glue);
+                       Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
   }
   case Intrinsic::amdgcn_interp_p1: {
-    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
-    SDValue Glue = M0.getValue(1);
+    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                    Op.getOperand(4), SDValue());
     return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
-                       Op.getOperand(2), Op.getOperand(3), Glue);
+                       Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
   }
   case Intrinsic::amdgcn_interp_p2: {
-    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
-    SDValue Glue = SDValue(M0.getNode(), 1);
+    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                    Op.getOperand(5), SDValue());
     return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
-                       Glue);
+                       ToM0.getValue(1));
   }
   case Intrinsic::amdgcn_interp_p1_f16: {
-    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
-    SDValue Glue = M0.getValue(1);
+    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                    Op.getOperand(5), SDValue());
+
     if (getSubtarget()->getLDSBankCount() == 16) {
       // 16 bank LDS
       SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
                               DAG.getConstant(2, DL, MVT::i32), // P0
                               Op.getOperand(2), // Attrchan
                               Op.getOperand(3), // Attr
-                              Glue);
+                              ToM0.getValue(1));
       SDValue Ops[] = {
         Op.getOperand(1), // Src0
         Op.getOperand(2), // Attrchan
@@ -5927,14 +5928,14 @@
         Op.getOperand(4), // high
         DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
         DAG.getTargetConstant(0, DL, MVT::i32), // $omod
-        Glue
+        ToM0.getValue(1)
       };
       return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
     }
   }
   case Intrinsic::amdgcn_interp_p2_f16: {
-    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
-    SDValue Glue = SDValue(M0.getNode(), 1);
+    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                    Op.getOperand(6), SDValue());
     SDValue Ops[] = {
       Op.getOperand(2), // Src0
       Op.getOperand(3), // Attrchan
@@ -5944,7 +5945,7 @@
       DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
       Op.getOperand(5), // high
       DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
-      Glue
+      ToM0.getValue(1)
     };
     return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
   }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
index 37417a9..5d2e107 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
@@ -6,8 +6,8 @@
 define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
 ; GFX9-32BANK-LABEL: interp_f16:
 ; GFX9-32BANK:       ; %bb.0: ; %main_body
-; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
+; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v1, v0, attr2.y
 ; GFX9-32BANK-NEXT:    v_mov_b32_e32 v2, s1
@@ -20,8 +20,8 @@
 ;
 ; GFX8-32BANK-LABEL: interp_f16:
 ; GFX8-32BANK:       ; %bb.0: ; %main_body
-; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
+; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v1, v0, attr2.y
 ; GFX8-32BANK-NEXT:    v_mov_b32_e32 v2, s1
@@ -119,8 +119,8 @@
 define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
 ; GFX9-32BANK-LABEL: interp_p2_m0_setup:
 ; GFX9-32BANK:       ; %bb.0: ; %main_body
-; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-32BANK-NEXT:    s_mov_b32 m0, s2
+; GFX9-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GFX9-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
 ; GFX9-32BANK-NEXT:    ;;#ASMSTART
@@ -136,8 +136,8 @@
 ;
 ; GFX8-32BANK-LABEL: interp_p2_m0_setup:
 ; GFX8-32BANK:       ; %bb.0: ; %main_body
-; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-32BANK-NEXT:    s_mov_b32 m0, s2
+; GFX8-32BANK-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-32BANK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 ; GFX8-32BANK-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
 ; GFX8-32BANK-NEXT:    ;;#ASMSTART