[ARM] Size reduce teq to eors

Add t2TEQrr to the map of instructions with can be reduced down into
a T1 instruction. This is a special case because TEQ just sets the
CPSR and doesn't write to a GPR, which is not the case for EOR. So,
we need to ensure that the EOR can write to the first operand.

Differential Revision: https://reviews.llvm.org/D56255


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350801 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2f510c5..7dcd1c7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -122,6 +122,7 @@
   { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,   1,   0,  2,0, 0,0,0 },
   { ARM::t2SXTB,  ARM::tSXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
   { ARM::t2SXTH,  ARM::tSXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
+  { ARM::t2TEQrr, ARM::tEOR,    0,             0,   0,   1,   0,  2,0, 0,1,0 },
   { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,   1,   0,  2,0, 0,0,0 },
   { ARM::t2UXTB,  ARM::tUXTB,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
   { ARM::t2UXTH,  ARM::tUXTH,   0,             0,   0,   1,   0,  1,0, 0,1,0 },
@@ -717,6 +718,16 @@
       return true;
     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
   }
+  case ARM::t2TEQrr: {
+    unsigned PredReg = 0;
+    // Can only convert to eors if we're not in an IT block.
+    if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
+      break;
+    // TODO if Operand 0 is not killed but Operand 1 is, then we could write
+    // to Op1 instead.
+    if (MI->getOperand(0).isKill())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
+  }
   }
   return false;
 }
@@ -903,9 +914,24 @@
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
-  MIB.add(MI->getOperand(0));
-  if (NewMCID.hasOptionalDef())
-    MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+
+  // TEQ is special in that it doesn't define a register but we're converting
+  // it into an EOR which does. So add the first operand as a def and then
+  // again as a use.
+  if (MCID.getOpcode() == ARM::t2TEQrr) {
+    MachineOperand MO = MI->getOperand(0);
+    MO.setIsKill(false);
+    MO.setIsDef(true);
+    MO.setIsDead(true);
+    MIB.add(MO);
+    if (NewMCID.hasOptionalDef())
+      MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+    MIB.add(MI->getOperand(0));
+  } else {
+    MIB.add(MI->getOperand(0));
+    if (NewMCID.hasOptionalDef())
+      MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
+  }
 
   // Transfer the rest of operands.
   unsigned NumOps = MCID.getNumOperands();
diff --git a/test/CodeGen/Thumb2/t2-teq-reduce.mir b/test/CodeGen/Thumb2/t2-teq-reduce.mir
new file mode 100644
index 0000000..d85d233a
--- /dev/null
+++ b/test/CodeGen/Thumb2/t2-teq-reduce.mir
@@ -0,0 +1,267 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=t2-reduce-size %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8m.main"
+
+  %list_head = type { %list_head*, %list_data* }
+  %list_data = type { i16, i16 }
+
+  define %list_head* @reg_reg_it_block(%list_head* %a, i16 zeroext %b) {
+  entry:
+    br label %while.begin
+
+  while.begin:                                      ; preds = %while.body.end, %entry
+    %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ]
+    %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1
+    %ld.0 = load %list_data*, %list_data** %info.i, align 4
+    %data16.i1 = bitcast %list_data* %ld.0 to i16*
+    %ld.1 = load i16, i16* %data16.i1, align 2
+    %xor.1 = xor i16 %ld.1, %b
+    %cmp.i = icmp eq i16 %xor.1, 0
+    br i1 %cmp.i, label %exit, label %while.body.a
+
+  while.body.a:                                     ; preds = %while.begin
+    %next.i2 = bitcast %list_head* %list.addr.i to %list_head**
+    %ld.2 = load %list_head*, %list_head** %next.i2, align 4
+    %cmp.i.1 = icmp eq %list_head* %ld.2, null
+    br i1 %cmp.i.1, label %exit, label %it.block
+
+  it.block:                                         ; preds = %while.body.a
+    %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1
+    %ld.3 = load %list_data*, %list_data** %info.i.1, align 4
+    %data16.i.13 = bitcast %list_data* %ld.3 to i16*
+    %ld.4 = load i16, i16* %data16.i.13, align 2
+    %xor.2 = xor i16 %ld.4, %b
+    %cmp.i.2 = icmp eq i16 %xor.2, 0
+    br i1 %cmp.i.2, label %exit, label %while.body.end
+
+  while.body.end:                                   ; preds = %it.block
+    %next.i.14 = bitcast %list_head* %ld.2 to %list_head**
+    %ld.5 = load %list_head*, %list_head** %next.i.14, align 4
+    %cmp.i.3 = icmp eq %list_head* %ld.5, null
+    br i1 %cmp.i.3, label %exit, label %while.begin
+
+  exit:                                             ; preds = %while.body.end, %it.block, %while.body.a, %while.begin
+    %res = phi %list_head* [ %list.addr.i, %while.begin ], [ %ld.2, %while.body.a ], [ %ld.2, %it.block ], [ %ld.5, %while.body.end ]
+    ret %list_head* %res
+  }
+
+  define i16 @op_not_killed(%list_head* %a, i16 zeroext %b) {
+  entry:
+    br label %while.begin
+
+  while.begin:                                      ; preds = %while.body.end, %entry
+    %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ]
+    %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1
+    %ld.0 = load %list_data*, %list_data** %info.i, align 4
+    %data16.i1 = bitcast %list_data* %ld.0 to i16*
+    %ld.1 = load i16, i16* %data16.i1, align 2
+    %xor.1 = xor i16 %ld.1, %b
+    %cmp.i = icmp eq i16 %xor.1, 0
+    br i1 %cmp.i, label %exit, label %while.body.a
+
+  while.body.a:                                     ; preds = %while.begin
+    %next.i2 = bitcast %list_head* %list.addr.i to %list_head**
+    %ld.2 = load %list_head*, %list_head** %next.i2, align 4
+    %cmp.i.1 = icmp eq %list_head* %ld.2, null
+    br i1 %cmp.i.1, label %exit, label %it.block
+
+  it.block:                                         ; preds = %while.body.a
+    %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1
+    %ld.3 = load %list_data*, %list_data** %info.i.1, align 4
+    %data16.i.13 = bitcast %list_data* %ld.3 to i16*
+    %ld.4 = load i16, i16* %data16.i.13, align 2
+    %xor.2 = xor i16 %ld.4, %b
+    %cmp.i.2 = icmp eq i16 %xor.2, 0
+    br i1 %cmp.i.2, label %exit, label %while.body.end
+
+  while.body.end:                                   ; preds = %it.block
+    %next.i.14 = bitcast %list_head* %ld.2 to %list_head**
+    %ld.5 = load %list_head*, %list_head** %next.i.14, align 4
+    %cmp.i.3 = icmp eq %list_head* %ld.5, null
+    br i1 %cmp.i.3, label %exit, label %while.begin
+
+  exit:                                             ; preds = %while.body.end, %it.block, %while.body.a, %while.begin
+    %res = phi i16 [ %ld.1, %while.begin ], [ %ld.1, %while.body.a ], [ %ld.4, %it.block ], [ %ld.4, %while.body.end ]
+    ret i16 %res
+  }
+
+...
+---
+name:            reg_reg_it_block
+tracksRegLiveness: true
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+body:             |
+  ; CHECK-LABEL: name: reg_reg_it_block
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   t2B %bb.2, 14, $noreg
+  ; CHECK: bb.1.while.body.end:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i.14)
+  ; CHECK:   tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   BUNDLE implicit-def dead $itstate, implicit killed $cpsr, implicit $r0 {
+  ; CHECK:     t2IT 0, 8, implicit-def $itstate
+  ; CHECK:     tBX_RET 0, killed $cpsr, implicit $r0, implicit internal killed $itstate
+  ; CHECK:   }
+  ; CHECK: bb.2.while.begin:
+  ; CHECK:   successors: %bb.4(0x04000000), %bb.3(0x7c000000)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info.i)
+  ; CHECK:   renamable $r2 = tLDRHi killed renamable $r2, 0, 14, $noreg :: (load 2 from %ir.data16.i1)
+  ; CHECK:   dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14, $noreg
+  ; CHECK:   t2Bcc %bb.4, 0, killed $cpsr
+  ; CHECK: bb.3.while.body.a:
+  ; CHECK:   successors: %bb.4(0x4207fef8), %bb.1(0x3df80108)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i2)
+  ; CHECK:   tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   BUNDLE implicit-def dead $itstate, implicit-def dead $r2, implicit-def $cpsr, implicit $r0, implicit killed $cpsr, implicit $r1 {
+  ; CHECK:     t2IT 1, 30, implicit-def $itstate
+  ; CHECK:     renamable $r2 = tLDRi renamable $r0, 1, 1, $cpsr, implicit internal $itstate :: (load 4 from %ir.info.i.1)
+  ; CHECK:     renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load 2 from %ir.data16.i.13)
+  ; CHECK:     t2TEQrr internal killed renamable $r2, renamable $r1, 1, killed $cpsr, implicit-def $cpsr, implicit internal killed $itstate
+  ; CHECK:   }
+  ; CHECK:   t2Bcc %bb.1, 1, killed $cpsr
+  ; CHECK: bb.4.exit:
+  ; CHECK:   liveins: $r0
+  ; CHECK:   tBX_RET 14, $noreg, implicit killed $r0
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1
+
+    t2B %bb.1, 14, $noreg
+
+  bb.3.while.body.end:
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1
+
+    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i.14)
+    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+    BUNDLE implicit-def dead $itstate, implicit killed $cpsr, implicit $r0 {
+      t2IT 0, 8, implicit-def $itstate
+      tBX_RET 0, killed $cpsr, implicit $r0, implicit internal killed $itstate
+    }
+
+  bb.1.while.begin:
+    successors: %bb.4(0x04000000), %bb.2(0x7c000000)
+    liveins: $r0, $r1
+
+    renamable $r2 = tLDRi renamable $r0, 1, 14, $noreg :: (load 4 from %ir.info.i)
+    renamable $r2 = tLDRHi killed renamable $r2, 0, 14, $noreg :: (load 2 from %ir.data16.i1)
+    dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14, $noreg
+    t2Bcc %bb.4, 0, killed $cpsr
+
+  bb.2.while.body.a:
+    successors: %bb.4(0x80000000), %bb.3(0x78200000)
+    liveins: $r0, $r1
+
+    renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (load 4 from %ir.next.i2)
+    tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr
+    BUNDLE implicit-def dead $itstate, implicit-def dead $r2, implicit-def $cpsr, implicit $r0, implicit killed $cpsr, implicit $r1 {
+      t2IT 1, 30, implicit-def $itstate
+      renamable $r2 = tLDRi renamable $r0, 1, 1, $cpsr, implicit internal $itstate :: (load 4 from %ir.info.i.1)
+      renamable $r2 = tLDRHi internal killed renamable $r2, 0, 1, $cpsr, implicit internal killed $r2, implicit internal $itstate :: (load 2 from %ir.data16.i.13)
+      t2TEQrr internal killed renamable $r2, renamable $r1, 1, killed $cpsr, implicit-def $cpsr, implicit internal killed $itstate
+    }
+    t2Bcc %bb.3, 1, killed $cpsr
+
+  bb.4.exit:
+    liveins: $r0
+
+    tBX_RET 14, $noreg, implicit killed $r0
+
+...
+---
+name:            op_not_killed
+tracksRegLiveness: true
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+body:             |
+  ; CHECK-LABEL: name: op_not_killed
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   $r2 = tMOVr $r0, 14, $noreg
+  ; CHECK: bb.1.while.begin:
+  ; CHECK:   successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+  ; CHECK:   liveins: $r1, $r2
+  ; CHECK:   renamable $r0 = tLDRi renamable $r2, 1, 14, $noreg :: (load 4 from %ir.info.i)
+  ; CHECK:   renamable $r0 = tLDRHi killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i1)
+  ; CHECK:   t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.5, 0, killed $cpsr
+  ; CHECK: bb.2.while.body.a:
+  ; CHECK:   successors: %bb.5(0x04000000), %bb.3(0x7c000000)
+  ; CHECK:   liveins: $r0, $r1, $r2
+  ; CHECK:   renamable $r2 = tLDRi killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i2)
+  ; CHECK:   tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.5, 0, killed $cpsr
+  ; CHECK: bb.3.it.block:
+  ; CHECK:   successors: %bb.5(0x04000000), %bb.4(0x7c000000)
+  ; CHECK:   liveins: $r1, $r2
+  ; CHECK:   renamable $r0 = tLDRi renamable $r2, 1, 14, $noreg :: (load 4 from %ir.info.i.1)
+  ; CHECK:   renamable $r0 = tLDRHi killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i.13)
+  ; CHECK:   t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.5, 0, killed $cpsr
+  ; CHECK: bb.4.while.body.end:
+  ; CHECK:   successors: %bb.5(0x04000000), %bb.1(0x7c000000)
+  ; CHECK:   liveins: $r0, $r1, $r2
+  ; CHECK:   renamable $r2 = tLDRi killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i.14)
+  ; CHECK:   tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.1, 1, killed $cpsr
+  ; CHECK: bb.5.exit:
+  ; CHECK:   liveins: $r0
+  ; CHECK:   tBX_RET 14, $noreg, implicit $r0
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1
+
+    $r2 = tMOVr $r0, 14, $noreg
+
+  bb.1.while.begin:
+    successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+    liveins: $r1, $r2
+
+    renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load 4 from %ir.info.i)
+    renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i1)
+    t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 0, killed $cpsr
+
+  bb.2.while.body.a:
+    successors: %bb.5(0x04000000), %bb.3(0x7c000000)
+    liveins: $r0, $r1, $r2
+
+    renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i2)
+    t2CMPri renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 0, killed $cpsr
+
+  bb.3.it.block:
+    successors: %bb.5(0x04000000), %bb.4(0x7c000000)
+    liveins: $r1, $r2
+
+    renamable $r0 = t2LDRi12 renamable $r2, 4, 14, $noreg :: (load 4 from %ir.info.i.1)
+    renamable $r0 = t2LDRHi12 killed renamable $r0, 0, 14, $noreg :: (load 2 from %ir.data16.i.13)
+    t2TEQrr renamable $r0, renamable $r1, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 0, killed $cpsr
+
+  bb.4.while.body.end:
+    successors: %bb.5(0x04000000), %bb.1(0x7c000000)
+    liveins: $r0, $r1, $r2
+
+    renamable $r2 = t2LDRi12 killed renamable $r2, 0, 14, $noreg :: (load 4 from %ir.next.i.14)
+    t2CMPri renamable $r2, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 1, killed $cpsr
+
+  bb.5.exit:
+    liveins: $r0
+
+    tBX_RET 14, $noreg, implicit $r0
+
+...
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index 22bde87..1f37dde 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -5,7 +5,7 @@
 
 define i32 @f2(i32 %a, i32 %b) {
 ; CHECK: f2
-; CHECK: teq.w {{.*}}, r1
+; CHECK: eors {{.*}}, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 %tmp, 0
     %ret = select i1 %tmp1, i32 42, i32 24
@@ -14,7 +14,7 @@
 
 define i32 @f4(i32 %a, i32 %b) {
 ; CHECK: f4
-; CHECK: teq.w  {{.*}}, r1
+; CHECK: eors  {{.*}}, r1
     %tmp = xor i32 %a, %b
     %tmp1 = icmp eq i32 0, %tmp
     %ret = select i1 %tmp1, i32 42, i32 24