LiveIntervals: Fix handleMoveUp with subreg def moving across a def

If a subregister def was moved across another subregister def and
another use, the main range was not correctly updated. The end point
of the moved interval ended too early and missed the use from theh
other lanes in the subreg def.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375300 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index 9d85fc3..2989930 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1288,6 +1288,20 @@
           const SlotIndex SplitPos = NewIdxDef;
           OldIdxVNI = OldIdxIn->valno;
 
+          SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
+          LiveRange::iterator Prev = std::prev(OldIdxIn);
+          if (OldIdxIn != LR.begin() &&
+              SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
+            // If the segment before OldIdx read a value defined earlier than
+            // NewIdx, the moved instruction also reads and forwards that
+            // value. Extend the lifetime of the new def point.
+
+            // Extend to where the previous range started, unless there is
+            // another redef first.
+            NewDefEndPoint = std::min(OldIdxIn->start,
+                                      std::next(NewIdxOut)->start);
+          }
+
           // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
           OldIdxOut->valno->def = OldIdxIn->start;
           *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
@@ -1305,7 +1319,8 @@
             // There is no gap between NewSegment and its predecessor.
             *NewSegment = LiveRange::Segment(Next->start, SplitPos,
                                              Next->valno);
-            *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+
+            *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
             Next->valno->def = SplitPos;
           } else {
             // There is a gap between NewSegment and its predecessor
diff --git a/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
new file mode 100644
index 0000000..47c8e19d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s  | FileCheck %s
+
+---
+name:            handleMoveUp_incorrect_interval
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  scratchWaveOffsetReg: '$sgpr101'
+  frameOffsetReg:  '$sgpr101'
+  stackPtrOffsetReg: '$sgpr101'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+    workGroupIDX:    { reg: '$sgpr6' }
+    privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+    workItemIDX:     { reg: '$vgpr0' }
+body:             |
+  ; CHECK-LABEL: name: handleMoveUp_incorrect_interval
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $sgpr4_sgpr5
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+  ; CHECK:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+  ; CHECK:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329
+  ; CHECK:   undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
+  ; CHECK:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   INLINEASM &"", 1, 851978, def dead %11
+  ; CHECK:   GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; CHECK:   [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
+  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16
+  ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
+  ; CHECK:   [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK:   [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
+  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22
+  ; CHECK:   [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
+  ; CHECK:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec
+  ; CHECK:   [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U32_e64 64, [[V_ADD_U32_e32_]], implicit $exec
+  ; CHECK:   [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
+  ; CHECK:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]]
+  ; CHECK:   DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
+  ; CHECK:   DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
+  ; CHECK:   DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
+  ; CHECK:   undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+  ; CHECK:   [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_ADD_U32_e32_]], [[S_MOV_B32_]], implicit $exec
+  ; CHECK:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_ADD_U32_e32_]], [[V_CMP_GT_U32_e64_]], implicit $exec
+  ; CHECK:   [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_]], [[DEF1]], implicit $exec
+  ; CHECK:   [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_CNDMASK_B32_e64_]], [[S_MOV_B32_]], implicit $exec
+  ; CHECK:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec
+  ; CHECK:   [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec
+  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+  ; CHECK:   [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec
+  ; CHECK:   undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec
+  ; CHECK:   undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec
+  ; CHECK:   undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
+  ; CHECK:   %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
+  ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+  ; CHECK:   INLINEASM &"", 1
+  ; CHECK:   [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
+  ; CHECK:   GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; CHECK:   %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec
+  ; CHECK:   DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+    %4:sreg_32_xm0 = S_MOV_B32 5329
+    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
+    %8:vreg_64 = IMPLICIT_DEF
+    %9:vgpr_32 = IMPLICIT_DEF
+    %10:vgpr_32 = IMPLICIT_DEF
+
+  bb.1:
+    INLINEASM &"", 1, 851978, def %11:vgpr_32
+    GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
+    INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32
+    %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec
+    %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
+    %19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
+    INLINEASM &"def $0 $1", 1, 851978, def %21:vgpr_32, 851978, def %22:vgpr_32
+    %23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
+    %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %5.sub1:vreg_64 = COPY %6
+    %25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec
+    %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec
+    %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19
+    DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
+    DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
+    DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
+    undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %33:vgpr_32 = V_MUL_LO_U32 %25, %4, implicit $exec
+    %10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec
+    %34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec
+    %9:vgpr_32 = V_MUL_LO_U32 %10, %4, implicit $exec
+    %35:vgpr_32 = V_ADD_U32_e32 %34, %8.sub0, implicit $exec
+    %36:vgpr_32 = V_SUB_U32_e32 %9, %33, implicit $exec
+    %37:vgpr_32 = COPY %3.sub1
+    undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec
+    %8.sub1:vreg_64 = COPY %6
+    undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec
+    undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec
+    undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
+    %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
+    %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+    INLINEASM &"", 1
+    %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
+    GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %31.sub0:vreg_64 = COPY %43, implicit $exec
+    DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
+    S_BRANCH %bb.1
+
+...
diff --git a/unittests/MI/LiveIntervalTest.cpp b/unittests/MI/LiveIntervalTest.cpp
index c81f13f..782df23 100644
--- a/unittests/MI/LiveIntervalTest.cpp
+++ b/unittests/MI/LiveIntervalTest.cpp
@@ -421,6 +421,46 @@
   });
 }
 
+TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDef) {
+  liveIntervalTest(R"MIR(
+    %1:vreg_64 = IMPLICIT_DEF
+
+  bb.1:
+    %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
+    undef %1.sub0:vreg_64 = V_ADD_U32_e32 %2, %2, implicit $exec
+    %1.sub1:vreg_64 = COPY %2
+    S_NOP 0, implicit %1.sub1
+    S_BRANCH %bb.1
+
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+     MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
+     // The scheduler clears undef from subregister defs before moving
+     UndefSubregDef.getOperand(0).setIsUndef(false);
+     testHandleMove(MF, LIS, 3, 1, 1);
+  });
+}
+
+TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) {
+  liveIntervalTest(R"MIR(
+    %1:vreg_96 = IMPLICIT_DEF
+
+  bb.1:
+    %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
+    undef %1.sub0:vreg_96 = V_ADD_U32_e32 %2, %2, implicit $exec
+    %1.sub1:vreg_96 = COPY %2
+    %1.sub2:vreg_96 = COPY %2
+    S_NOP 0, implicit %1.sub1, implicit %1.sub2
+    S_BRANCH %bb.1
+
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+     MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
+     // The scheduler clears undef from subregister defs before moving
+     UndefSubregDef.getOperand(0).setIsUndef(false);
+     testHandleMove(MF, LIS, 4, 1, 1);
+  });
+}
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
   initLLVM();