[AVR] Fix codegen bug in 16-bit loads

Prior to this patch, the AVR::LDWRdPtr instruction was always lowered to
instructions of this pattern:

    ld  $GPR8, [PTR:XYZ]+
    ld  $GPR8, [PTR]+1

This has a problem; the [PTR] is incremented in-place once, but never
decremented.

Future uses of the same pointer will use the now clobbered value,
leading to the pointer being incorrect by an offset of one.

This patch modifies the expansion code of the LDWRdPtr pseudo
instruction so that the pointer variable is not silently clobbered in
future uses in the same live range.

Patch by Keshav Kini.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351544 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 536a547..8756174 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -583,8 +583,8 @@
   unsigned TmpReg = 0; // 0 for no temporary register
   unsigned SrcReg = MI.getOperand(1).getReg();
   bool SrcIsKill = MI.getOperand(1).isKill();
-  OpLo = AVR::LDRdPtrPi;
-  OpHi = AVR::LDRdPtr;
+  OpLo = AVR::LDRdPtr;
+  OpHi = AVR::LDDRdPtrQ;
   TRI->splitReg(DstReg, DstLoReg, DstHiReg);
 
   // Use a temporary register if src and dst registers are the same.
@@ -597,8 +597,7 @@
   // Load low byte.
   auto MIBLO = buildMI(MBB, MBBI, OpLo)
     .addReg(CurDstLoReg, RegState::Define)
-    .addReg(SrcReg, RegState::Define)
-    .addReg(SrcReg);
+    .addReg(SrcReg, RegState::Define);
 
   // Push low byte onto stack if necessary.
   if (TmpReg)
@@ -607,7 +606,8 @@
   // Load high byte.
   auto MIBHI = buildMI(MBB, MBBI, OpHi)
     .addReg(CurDstHiReg, RegState::Define)
-    .addReg(SrcReg, getKillRegState(SrcIsKill));
+    .addReg(SrcReg, getKillRegState(SrcIsKill))
+    .addImm(1);
 
   if (TmpReg) {
     // Move the high byte into the final destination.
diff --git a/test/CodeGen/AVR/PR37143.ll b/test/CodeGen/AVR/PR37143.ll
new file mode 100644
index 0000000..db157ed
--- /dev/null
+++ b/test/CodeGen/AVR/PR37143.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s
+
+; CHECK: ld {{r[0-9]+}}, [[PTR:[YZ]]]
+; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1
+; CHECK: st [[PTR]], {{r[0-9]+}}
+; CHECK: std [[PTR]]+1, {{r[0-9]+}}
+define void @load_store_16(i16* nocapture %ptr) local_unnamed_addr #1 {
+entry:
+  %0 = load i16, i16* %ptr, align 2
+  %add = add i16 %0, 5
+  store i16 %add, i16* %ptr, align 2
+  ret void
+}
diff --git a/test/CodeGen/AVR/atomics/load16.ll b/test/CodeGen/AVR/atomics/load16.ll
index 2b51afe..6b96edb 100644
--- a/test/CodeGen/AVR/atomics/load16.ll
+++ b/test/CodeGen/AVR/atomics/load16.ll
@@ -3,8 +3,8 @@
 ; CHECK-LABEL: atomic_load16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR:r[0-9]+]], [[RD:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ld  [[RR:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load16(i16* %foo) {
   %val = load atomic i16, i16* %foo unordered, align 2
@@ -29,12 +29,12 @@
 ; CHECK-LABEL: atomic_load_add16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]
+; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: add [[RR1]], [[TMP:r[0-9]+]]
 ; CHECK-NEXT: adc [[RR2]], [[TMP:r[0-9]+]]
-; CHECK-NEXT: st [[RD1]], [[RR1]]
-; CHECK-NEXT: std [[RD1]]+1, [[A:r[0-9]+]]
+; CHECK-NEXT: st [[RD]], [[RR1]]
+; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]]
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load_add16(i16* %foo) {
   %val = atomicrmw add i16* %foo, i16 13 seq_cst
@@ -44,12 +44,12 @@
 ; CHECK-LABEL: atomic_load_sub16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]
+; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: sub [[RR1]], [[TMP:r[0-9]+]]
 ; CHECK-NEXT: sbc [[RR2]], [[TMP:r[0-9]+]]
-; CHECK-NEXT: st [[RD1]], [[RR1]]
-; CHECK-NEXT: std [[RD1]]+1, [[A:r[0-9]+]]
+; CHECK-NEXT: st [[RD]], [[RR1]]
+; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]]
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load_sub16(i16* %foo) {
   %val = atomicrmw sub i16* %foo, i16 13 seq_cst
@@ -59,12 +59,12 @@
 ; CHECK-LABEL: atomic_load_and16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]
+; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: and [[RR1]], [[TMP:r[0-9]+]]
 ; CHECK-NEXT: and [[RR2]], [[TMP:r[0-9]+]]
-; CHECK-NEXT: st [[RD1]], [[RR1]]
-; CHECK-NEXT: std [[RD1]]+1, [[A:r[0-9]+]]
+; CHECK-NEXT: st [[RD]], [[RR1]]
+; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]]
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load_and16(i16* %foo) {
   %val = atomicrmw and i16* %foo, i16 13 seq_cst
@@ -74,12 +74,12 @@
 ; CHECK-LABEL: atomic_load_or16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]
+; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: or [[RR1]], [[TMP:r[0-9]+]]
 ; CHECK-NEXT: or [[RR2]], [[TMP:r[0-9]+]]
-; CHECK-NEXT: st [[RD1]], [[RR1]]
-; CHECK-NEXT: std [[RD1]]+1, [[A:r[0-9]+]]
+; CHECK-NEXT: st [[RD]], [[RR1]]
+; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]]
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load_or16(i16* %foo) {
   %val = atomicrmw or i16* %foo, i16 13 seq_cst
@@ -89,12 +89,12 @@
 ; CHECK-LABEL: atomic_load_xor16
 ; CHECK:      in r0, 63
 ; CHECK-NEXT: cli
-; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+
-; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]
+; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD:(X|Y|Z)]]
+; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD]]+1
 ; CHECK-NEXT: eor [[RR1]], [[TMP:r[0-9]+]]
 ; CHECK-NEXT: eor [[RR2]], [[TMP:r[0-9]+]]
-; CHECK-NEXT: st [[RD1]], [[RR1]]
-; CHECK-NEXT: std [[RD1]]+1, [[A:r[0-9]+]]
+; CHECK-NEXT: st [[RD]], [[RR1]]
+; CHECK-NEXT: std [[RD]]+1, [[A:r[0-9]+]]
 ; CHECK-NEXT: out 63, r0
 define i16 @atomic_load_xor16(i16* %foo) {
   %val = atomicrmw xor i16* %foo, i16 13 seq_cst
diff --git a/test/CodeGen/AVR/load.ll b/test/CodeGen/AVR/load.ll
index 73568b5..f58edeb 100644
--- a/test/CodeGen/AVR/load.ll
+++ b/test/CodeGen/AVR/load.ll
@@ -9,8 +9,8 @@
 
 define i16 @load16(i16* %x) {
 ; CHECK-LABEL: load16:
-; CHECK: ld r24, {{[XYZ]}}+
-; CHECK: ld r25, {{[XYZ]}}
+; CHECK: ld r24,  [[PTR:[XYZ]]]
+; CHECK: ldd r25, [[PTR]]+1
   %1 = load i16, i16* %x
   ret i16 %1
 }
@@ -36,8 +36,8 @@
 
 define i16 @load16disp(i16* %x) {
 ; CHECK-LABEL: load16disp:
-; CHECK: ldd r24, {{[YZ]}}+62
-; CHECK: ldd r25, {{[YZ]}}+63
+; CHECK: ldd r24, [[PTR:[YZ]]]+62
+; CHECK: ldd r25, [[PTR]]+63
   %1 = getelementptr inbounds i16, i16* %x, i64 31
   %2 = load i16, i16* %1
   ret i16 %2
@@ -48,8 +48,8 @@
 ; CHECK: movw r26, r24
 ; CHECK: subi r26, 192
 ; CHECK: sbci r27, 255
-; CHECK: ld r24, {{[XYZ]}}+
-; CHECK: ld r25, {{[XYZ]}}
+; CHECK: ld r24,  [[PTR:[XYZ]]]
+; CHECK: ldd r25, [[PTR]]+1
   %1 = getelementptr inbounds i16, i16* %x, i64 32
   %2 = load i16, i16* %1
   ret i16 %2
diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir
index 4fc97f6..3c3a721 100644
--- a/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir
+++ b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir
@@ -18,9 +18,9 @@
 
     ; CHECK-LABEL: test_ldwrdptr
 
-    ; CHECK:      ld [[SCRATCH:r[0-9]+]], Z+
+    ; CHECK:      ld [[SCRATCH:r[0-9]+]], Z
     ; CHECK-NEXT: push [[SCRATCH]]
-    ; CHECK-NEXT: ld [[SCRATCH]], Z
+    ; CHECK-NEXT: ldd [[SCRATCH]], Z+1
     ; CHECK-NEXT: mov r31, [[SCRATCH]]
     ; CHECK-NEXT: pop r30
 
diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtr.mir b/test/CodeGen/AVR/pseudo/LDWRdPtr.mir
index 5398a8b..5bd4bf2 100644
--- a/test/CodeGen/AVR/pseudo/LDWRdPtr.mir
+++ b/test/CodeGen/AVR/pseudo/LDWRdPtr.mir
@@ -17,8 +17,8 @@
 
     ; CHECK-LABEL: test_ldwrdptr
 
-    ; CHECK:      $r0, $r31r30 = LDRdPtrPi $r31r30
-    ; CHECK-NEXT:          $r1 = LDRdPtr $r31r30
+    ; CHECK:      $r0, $r31r30 = LDRdPtr
+    ; CHECK-NEXT:          $r1 = LDDRdPtrQ $r31r30, 1
 
     $r1r0 = LDWRdPtr $r31r30
 ...