[ARM] Add qadd lowering from a sadd_sat

This lowers a sadd_sat to a qadd by treating it as legal. Also adds qsub at the
same time.

The qadd instruction sets the q flag, but we already have many cases where we
do not model this in llvm.

Differential Revision: https://reviews.llvm.org/D68976


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375411 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 84aa502..db26feb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1027,6 +1027,10 @@
     setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
     setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
   }
+  if (Subtarget->hasBaseDSP()) {
+    setOperationAction(ISD::SADDSAT, MVT::i32, Legal);
+    setOperationAction(ISD::SSUBSAT, MVT::i32, Legal);
+  }
 
   // i64 operation support.
   setOperationAction(ISD::MUL,     MVT::i64, Expand);
@@ -11090,10 +11094,7 @@
 static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         const ARMSubtarget *Subtarget) {
-  if (Subtarget->isThumb()) {
-    if (!Subtarget->hasDSP())
-      return SDValue();
-  } else if (!Subtarget->hasV5TEOps())
+  if (!Subtarget->hasBaseDSP())
     return SDValue();
 
   // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 61e7f3d..43c3372 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -3755,6 +3755,10 @@
                 [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
 }
 
+def : ARMV5TEPat<(saddsat GPR:$a, GPR:$b),
+                 (QADD GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(ssubsat GPR:$a, GPR:$b),
+                 (QSUB GPR:$a, GPR:$b)>;
 def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
                (QADD8 rGPR:$Rm, rGPR:$Rn)>;
 def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4fb98fb..7d26541 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2395,6 +2395,10 @@
 def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
                    (t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
 
+def : Thumb2DSPPat<(saddsat rGPR:$Rm, rGPR:$Rn),
+                   (t2QADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(ssubsat rGPR:$Rm, rGPR:$Rn),
+                   (t2QSUB rGPR:$Rm, rGPR:$Rn)>;
 def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
                    (t2QADD8 rGPR:$Rm, rGPR:$Rn)>;
 def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index af6f691..ef46034 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -672,6 +672,12 @@
   bool hasSB() const { return HasSB; }
   bool genLongCalls() const { return GenLongCalls; }
   bool genExecuteOnly() const { return GenExecuteOnly; }
+  bool hasBaseDSP() const {
+    if (isThumb())
+      return hasDSP();
+    else
+      return hasV5TEOps();
+  }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD32() const { return HasD32; }
diff --git a/test/CodeGen/ARM/qdadd.ll b/test/CodeGen/ARM/qdadd.ll
index ea33c86..4973110 100644
--- a/test/CodeGen/ARM/qdadd.ll
+++ b/test/CodeGen/ARM/qdadd.ll
@@ -5,320 +5,184 @@
 ; RUN: llc < %s -mtriple=armv8a-none-eabi | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-ARM8
 
 define i32 @qdadd(i32 %x, i32 %y) nounwind {
-; CHECK-T2-LABEL: qdadd:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r7, lr}
-; CHECK-T2-NEXT:    push {r7, lr}
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    adds.w r12, r0, r0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    mov.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp r12, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r12
-; CHECK-T2-NEXT:    adds r0, r3, r1
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi.w lr, #1
-; CHECK-T2-NEXT:    cmp.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp r0, r3
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r0
-; CHECK-T2-NEXT:    mov r0, r2
-; CHECK-T2-NEXT:    pop {r7, pc}
+; CHECK-T2NODSP-LABEL: qdadd:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r7, lr}
+; CHECK-T2NODSP-NEXT:    movs r3, #0
+; CHECK-T2NODSP-NEXT:    adds.w r12, r0, r0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    mov.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r12, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r3, r12
+; CHECK-T2NODSP-NEXT:    adds r0, r3, r1
+; CHECK-T2NODSP-NEXT:    mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi.w lr, #1
+; CHECK-T2NODSP-NEXT:    cmp.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r0, r3
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r2, r0
+; CHECK-T2NODSP-NEXT:    mov r0, r2
+; CHECK-T2NODSP-NEXT:    pop {r7, pc}
 ;
-; CHECK-ARM6-LABEL: qdadd:
-; CHECK-ARM6:       @ %bb.0:
-; CHECK-ARM6-NEXT:    .save {r11, lr}
-; CHECK-ARM6-NEXT:    push {r11, lr}
-; CHECK-ARM6-NEXT:    adds r12, r0, r0
-; CHECK-ARM6-NEXT:    mov r3, #0
-; CHECK-ARM6-NEXT:    movmi r3, #1
-; CHECK-ARM6-NEXT:    cmp r3, #0
-; CHECK-ARM6-NEXT:    mov r3, #-2147483648
-; CHECK-ARM6-NEXT:    mov lr, #0
-; CHECK-ARM6-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r12, r0
-; CHECK-ARM6-NEXT:    movvc r3, r12
-; CHECK-ARM6-NEXT:    adds r0, r3, r1
-; CHECK-ARM6-NEXT:    movmi lr, #1
-; CHECK-ARM6-NEXT:    mov r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp lr, #0
-; CHECK-ARM6-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r0, r3
-; CHECK-ARM6-NEXT:    movvc r2, r0
-; CHECK-ARM6-NEXT:    mov r0, r2
-; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-T2DSP-LABEL: qdadd:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r0
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
-; CHECK-ARM8-LABEL: qdadd:
-; CHECK-ARM8:       @ %bb.0:
-; CHECK-ARM8-NEXT:    .save {r11, lr}
-; CHECK-ARM8-NEXT:    push {r11, lr}
-; CHECK-ARM8-NEXT:    adds r12, r0, r0
-; CHECK-ARM8-NEXT:    mov r3, #0
-; CHECK-ARM8-NEXT:    movwmi r3, #1
-; CHECK-ARM8-NEXT:    cmp r3, #0
-; CHECK-ARM8-NEXT:    mov r3, #-2147483648
-; CHECK-ARM8-NEXT:    mov lr, #0
-; CHECK-ARM8-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r12, r0
-; CHECK-ARM8-NEXT:    movvc r3, r12
-; CHECK-ARM8-NEXT:    adds r0, r3, r1
-; CHECK-ARM8-NEXT:    movwmi lr, #1
-; CHECK-ARM8-NEXT:    mov r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp lr, #0
-; CHECK-ARM8-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r0, r3
-; CHECK-ARM8-NEXT:    movvc r2, r0
-; CHECK-ARM8-NEXT:    mov r0, r2
-; CHECK-ARM8-NEXT:    pop {r11, pc}
+; CHECK-ARM-LABEL: qdadd:
+; CHECK-ARM:       @ %bb.0:
+; CHECK-ARM-NEXT:    qadd r0, r0, r0
+; CHECK-ARM-NEXT:    qadd r0, r0, r1
+; CHECK-ARM-NEXT:    bx lr
   %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
   %tmp = call i32 @llvm.sadd.sat.i32(i32 %z, i32 %y)
   ret i32 %tmp
 }
 
 define i32 @qdadd_c(i32 %x, i32 %y) nounwind {
-; CHECK-T2-LABEL: qdadd_c:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r7, lr}
-; CHECK-T2-NEXT:    push {r7, lr}
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    adds.w r12, r0, r0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    mov.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp r12, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r12
-; CHECK-T2-NEXT:    adds r0, r1, r3
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi.w lr, #1
-; CHECK-T2-NEXT:    cmp.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp r0, r1
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r0
-; CHECK-T2-NEXT:    mov r0, r2
-; CHECK-T2-NEXT:    pop {r7, pc}
+; CHECK-T2NODSP-LABEL: qdadd_c:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r7, lr}
+; CHECK-T2NODSP-NEXT:    movs r3, #0
+; CHECK-T2NODSP-NEXT:    adds.w r12, r0, r0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    mov.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r12, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r3, r12
+; CHECK-T2NODSP-NEXT:    adds r0, r1, r3
+; CHECK-T2NODSP-NEXT:    mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi.w lr, #1
+; CHECK-T2NODSP-NEXT:    cmp.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r0, r1
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r2, r0
+; CHECK-T2NODSP-NEXT:    mov r0, r2
+; CHECK-T2NODSP-NEXT:    pop {r7, pc}
 ;
-; CHECK-ARM6-LABEL: qdadd_c:
-; CHECK-ARM6:       @ %bb.0:
-; CHECK-ARM6-NEXT:    .save {r11, lr}
-; CHECK-ARM6-NEXT:    push {r11, lr}
-; CHECK-ARM6-NEXT:    adds r12, r0, r0
-; CHECK-ARM6-NEXT:    mov r3, #0
-; CHECK-ARM6-NEXT:    movmi r3, #1
-; CHECK-ARM6-NEXT:    cmp r3, #0
-; CHECK-ARM6-NEXT:    mov r3, #-2147483648
-; CHECK-ARM6-NEXT:    mov lr, #0
-; CHECK-ARM6-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r12, r0
-; CHECK-ARM6-NEXT:    movvc r3, r12
-; CHECK-ARM6-NEXT:    adds r0, r1, r3
-; CHECK-ARM6-NEXT:    movmi lr, #1
-; CHECK-ARM6-NEXT:    mov r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp lr, #0
-; CHECK-ARM6-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r0, r1
-; CHECK-ARM6-NEXT:    movvc r2, r0
-; CHECK-ARM6-NEXT:    mov r0, r2
-; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-T2DSP-LABEL: qdadd_c:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r0
+; CHECK-T2DSP-NEXT:    qadd r0, r1, r0
+; CHECK-T2DSP-NEXT:    bx lr
 ;
-; CHECK-ARM8-LABEL: qdadd_c:
-; CHECK-ARM8:       @ %bb.0:
-; CHECK-ARM8-NEXT:    .save {r11, lr}
-; CHECK-ARM8-NEXT:    push {r11, lr}
-; CHECK-ARM8-NEXT:    adds r12, r0, r0
-; CHECK-ARM8-NEXT:    mov r3, #0
-; CHECK-ARM8-NEXT:    movwmi r3, #1
-; CHECK-ARM8-NEXT:    cmp r3, #0
-; CHECK-ARM8-NEXT:    mov r3, #-2147483648
-; CHECK-ARM8-NEXT:    mov lr, #0
-; CHECK-ARM8-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r12, r0
-; CHECK-ARM8-NEXT:    movvc r3, r12
-; CHECK-ARM8-NEXT:    adds r0, r1, r3
-; CHECK-ARM8-NEXT:    movwmi lr, #1
-; CHECK-ARM8-NEXT:    mov r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp lr, #0
-; CHECK-ARM8-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r0, r1
-; CHECK-ARM8-NEXT:    movvc r2, r0
-; CHECK-ARM8-NEXT:    mov r0, r2
-; CHECK-ARM8-NEXT:    pop {r11, pc}
+; CHECK-ARM-LABEL: qdadd_c:
+; CHECK-ARM:       @ %bb.0:
+; CHECK-ARM-NEXT:    qadd r0, r0, r0
+; CHECK-ARM-NEXT:    qadd r0, r1, r0
+; CHECK-ARM-NEXT:    bx lr
   %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
   %tmp = call i32 @llvm.sadd.sat.i32(i32 %y, i32 %z)
   ret i32 %tmp
 }
 
 define i32 @qdsub(i32 %x, i32 %y) nounwind {
-; CHECK-T2-LABEL: qdsub:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r7, lr}
-; CHECK-T2-NEXT:    push {r7, lr}
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    adds.w r12, r0, r0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    mov.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp r12, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r12
-; CHECK-T2-NEXT:    subs r0, r1, r3
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi.w lr, #1
-; CHECK-T2-NEXT:    cmp.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp r1, r3
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r0
-; CHECK-T2-NEXT:    mov r0, r2
-; CHECK-T2-NEXT:    pop {r7, pc}
+; CHECK-T2NODSP-LABEL: qdsub:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r7, lr}
+; CHECK-T2NODSP-NEXT:    movs r3, #0
+; CHECK-T2NODSP-NEXT:    adds.w r12, r0, r0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    mov.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r12, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r3, r12
+; CHECK-T2NODSP-NEXT:    subs r0, r1, r3
+; CHECK-T2NODSP-NEXT:    mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi.w lr, #1
+; CHECK-T2NODSP-NEXT:    cmp.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r1, r3
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r2, r0
+; CHECK-T2NODSP-NEXT:    mov r0, r2
+; CHECK-T2NODSP-NEXT:    pop {r7, pc}
 ;
-; CHECK-ARM6-LABEL: qdsub:
-; CHECK-ARM6:       @ %bb.0:
-; CHECK-ARM6-NEXT:    .save {r11, lr}
-; CHECK-ARM6-NEXT:    push {r11, lr}
-; CHECK-ARM6-NEXT:    adds r12, r0, r0
-; CHECK-ARM6-NEXT:    mov r3, #0
-; CHECK-ARM6-NEXT:    movmi r3, #1
-; CHECK-ARM6-NEXT:    cmp r3, #0
-; CHECK-ARM6-NEXT:    mov r3, #-2147483648
-; CHECK-ARM6-NEXT:    mov lr, #0
-; CHECK-ARM6-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r12, r0
-; CHECK-ARM6-NEXT:    movvc r3, r12
-; CHECK-ARM6-NEXT:    subs r0, r1, r3
-; CHECK-ARM6-NEXT:    movmi lr, #1
-; CHECK-ARM6-NEXT:    mov r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp lr, #0
-; CHECK-ARM6-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r1, r3
-; CHECK-ARM6-NEXT:    movvc r2, r0
-; CHECK-ARM6-NEXT:    mov r0, r2
-; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-T2DSP-LABEL: qdsub:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r0
+; CHECK-T2DSP-NEXT:    qsub r0, r1, r0
+; CHECK-T2DSP-NEXT:    bx lr
 ;
-; CHECK-ARM8-LABEL: qdsub:
-; CHECK-ARM8:       @ %bb.0:
-; CHECK-ARM8-NEXT:    .save {r11, lr}
-; CHECK-ARM8-NEXT:    push {r11, lr}
-; CHECK-ARM8-NEXT:    adds r12, r0, r0
-; CHECK-ARM8-NEXT:    mov r3, #0
-; CHECK-ARM8-NEXT:    movwmi r3, #1
-; CHECK-ARM8-NEXT:    cmp r3, #0
-; CHECK-ARM8-NEXT:    mov r3, #-2147483648
-; CHECK-ARM8-NEXT:    mov lr, #0
-; CHECK-ARM8-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r12, r0
-; CHECK-ARM8-NEXT:    movvc r3, r12
-; CHECK-ARM8-NEXT:    subs r0, r1, r3
-; CHECK-ARM8-NEXT:    movwmi lr, #1
-; CHECK-ARM8-NEXT:    mov r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp lr, #0
-; CHECK-ARM8-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r1, r3
-; CHECK-ARM8-NEXT:    movvc r2, r0
-; CHECK-ARM8-NEXT:    mov r0, r2
-; CHECK-ARM8-NEXT:    pop {r11, pc}
+; CHECK-ARM-LABEL: qdsub:
+; CHECK-ARM:       @ %bb.0:
+; CHECK-ARM-NEXT:    qadd r0, r0, r0
+; CHECK-ARM-NEXT:    qsub r0, r1, r0
+; CHECK-ARM-NEXT:    bx lr
   %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
   %tmp = call i32 @llvm.ssub.sat.i32(i32 %y, i32 %z)
   ret i32 %tmp
 }
 
 define i32 @qdsub_c(i32 %x, i32 %y) nounwind {
-; CHECK-T2-LABEL: qdsub_c:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r7, lr}
-; CHECK-T2-NEXT:    push {r7, lr}
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    adds.w r12, r0, r0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    mov.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp r12, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r12
-; CHECK-T2-NEXT:    subs r0, r3, r1
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi.w lr, #1
-; CHECK-T2-NEXT:    cmp.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp r3, r1
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r0
-; CHECK-T2-NEXT:    mov r0, r2
-; CHECK-T2-NEXT:    pop {r7, pc}
+; CHECK-T2NODSP-LABEL: qdsub_c:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r7, lr}
+; CHECK-T2NODSP-NEXT:    movs r3, #0
+; CHECK-T2NODSP-NEXT:    adds.w r12, r0, r0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    mov.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r12, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r3, r12
+; CHECK-T2NODSP-NEXT:    subs r0, r3, r1
+; CHECK-T2NODSP-NEXT:    mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi.w lr, #1
+; CHECK-T2NODSP-NEXT:    cmp.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r3, r1
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r2, r0
+; CHECK-T2NODSP-NEXT:    mov r0, r2
+; CHECK-T2NODSP-NEXT:    pop {r7, pc}
 ;
-; CHECK-ARM6-LABEL: qdsub_c:
-; CHECK-ARM6:       @ %bb.0:
-; CHECK-ARM6-NEXT:    .save {r11, lr}
-; CHECK-ARM6-NEXT:    push {r11, lr}
-; CHECK-ARM6-NEXT:    adds r12, r0, r0
-; CHECK-ARM6-NEXT:    mov r3, #0
-; CHECK-ARM6-NEXT:    movmi r3, #1
-; CHECK-ARM6-NEXT:    cmp r3, #0
-; CHECK-ARM6-NEXT:    mov r3, #-2147483648
-; CHECK-ARM6-NEXT:    mov lr, #0
-; CHECK-ARM6-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r12, r0
-; CHECK-ARM6-NEXT:    movvc r3, r12
-; CHECK-ARM6-NEXT:    subs r0, r3, r1
-; CHECK-ARM6-NEXT:    movmi lr, #1
-; CHECK-ARM6-NEXT:    mov r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp lr, #0
-; CHECK-ARM6-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM6-NEXT:    cmp r3, r1
-; CHECK-ARM6-NEXT:    movvc r2, r0
-; CHECK-ARM6-NEXT:    mov r0, r2
-; CHECK-ARM6-NEXT:    pop {r11, pc}
+; CHECK-T2DSP-LABEL: qdsub_c:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r0
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
-; CHECK-ARM8-LABEL: qdsub_c:
-; CHECK-ARM8:       @ %bb.0:
-; CHECK-ARM8-NEXT:    .save {r11, lr}
-; CHECK-ARM8-NEXT:    push {r11, lr}
-; CHECK-ARM8-NEXT:    adds r12, r0, r0
-; CHECK-ARM8-NEXT:    mov r3, #0
-; CHECK-ARM8-NEXT:    movwmi r3, #1
-; CHECK-ARM8-NEXT:    cmp r3, #0
-; CHECK-ARM8-NEXT:    mov r3, #-2147483648
-; CHECK-ARM8-NEXT:    mov lr, #0
-; CHECK-ARM8-NEXT:    mvnne r3, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r12, r0
-; CHECK-ARM8-NEXT:    movvc r3, r12
-; CHECK-ARM8-NEXT:    subs r0, r3, r1
-; CHECK-ARM8-NEXT:    movwmi lr, #1
-; CHECK-ARM8-NEXT:    mov r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp lr, #0
-; CHECK-ARM8-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM8-NEXT:    cmp r3, r1
-; CHECK-ARM8-NEXT:    movvc r2, r0
-; CHECK-ARM8-NEXT:    mov r0, r2
-; CHECK-ARM8-NEXT:    pop {r11, pc}
+; CHECK-ARM-LABEL: qdsub_c:
+; CHECK-ARM:       @ %bb.0:
+; CHECK-ARM-NEXT:    qadd r0, r0, r0
+; CHECK-ARM-NEXT:    qsub r0, r0, r1
+; CHECK-ARM-NEXT:    bx lr
   %z = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %x)
   %tmp = call i32 @llvm.ssub.sat.i32(i32 %z, i32 %y)
   ret i32 %tmp
diff --git a/test/CodeGen/ARM/sadd_sat.ll b/test/CodeGen/ARM/sadd_sat.ll
index 1fcc460..386e750 100644
--- a/test/CodeGen/ARM/sadd_sat.ll
+++ b/test/CodeGen/ARM/sadd_sat.ll
@@ -43,34 +43,49 @@
 ; CHECK-T1-NEXT:  .LCPI0_0:
 ; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: func:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    adds r2, r0, r1
-; CHECK-T2-NEXT:    mov.w r3, #0
-; CHECK-T2-NEXT:    mov.w r1, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r1, #-2147483648
-; CHECK-T2-NEXT:    cmp r2, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r1, r2
-; CHECK-T2-NEXT:    mov r0, r1
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    adds r2, r0, r1
+; CHECK-T2NODSP-NEXT:    mov.w r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r1, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r1, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r2, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r1, r2
+; CHECK-T2NODSP-NEXT:    mov r0, r1
+; CHECK-T2NODSP-NEXT:    bx lr
 ;
-; CHECK-ARM-LABEL: func:
-; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    adds r2, r0, r1
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    movmi r3, #1
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, r0
-; CHECK-ARM-NEXT:    movvc r1, r2
-; CHECK-ARM-NEXT:    mov r0, r1
-; CHECK-ARM-NEXT:    bx lr
+; CHECK-T2DSP-LABEL: func:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
+;
+; CHECK-ARMNODPS-LABEL: func:
+; CHECK-ARMNODPS:       @ %bb.0:
+; CHECK-ARMNODPS-NEXT:    adds r2, r0, r1
+; CHECK-ARMNODPS-NEXT:    mov r3, #0
+; CHECK-ARMNODPS-NEXT:    movmi r3, #1
+; CHECK-ARMNODPS-NEXT:    mov r1, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r3, #0
+; CHECK-ARMNODPS-NEXT:    mvnne r1, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r2, r0
+; CHECK-ARMNODPS-NEXT:    movvc r1, r2
+; CHECK-ARMNODPS-NEXT:    mov r0, r1
+; CHECK-ARMNODPS-NEXT:    bx lr
+;
+; CHECK-ARMBASEDSP-LABEL: func:
+; CHECK-ARMBASEDSP:       @ %bb.0:
+; CHECK-ARMBASEDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    bx lr
+;
+; CHECK-ARMDSP-LABEL: func:
+; CHECK-ARMDSP:       @ %bb.0:
+; CHECK-ARMDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMDSP-NEXT:    bx lr
   %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %y)
   ret i32 %tmp
 }
@@ -271,19 +286,11 @@
 ;
 ; CHECK-ARMBASEDSP-LABEL: func16:
 ; CHECK-ARMBASEDSP:       @ %bb.0:
-; CHECK-ARMBASEDSP-NEXT:    add r0, r0, r1
-; CHECK-ARMBASEDSP-NEXT:    mov r1, #255
-; CHECK-ARMBASEDSP-NEXT:    orr r1, r1, #32512
-; CHECK-ARMBASEDSP-NEXT:    cmp r0, r1
-; CHECK-ARMBASEDSP-NEXT:    movlt r1, r0
-; CHECK-ARMBASEDSP-NEXT:    ldr r0, .LCPI2_0
-; CHECK-ARMBASEDSP-NEXT:    cmn r1, #32768
-; CHECK-ARMBASEDSP-NEXT:    movgt r0, r1
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #16
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #16
+; CHECK-ARMBASEDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #16
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
-; CHECK-ARMBASEDSP-NEXT:    .p2align 2
-; CHECK-ARMBASEDSP-NEXT:  @ %bb.1:
-; CHECK-ARMBASEDSP-NEXT:  .LCPI2_0:
-; CHECK-ARMBASEDSP-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-ARMDSP-LABEL: func16:
 ; CHECK-ARMDSP:       @ %bb.0:
@@ -340,11 +347,10 @@
 ;
 ; CHECK-ARMBASEDSP-LABEL: func8:
 ; CHECK-ARMBASEDSP:       @ %bb.0:
-; CHECK-ARMBASEDSP-NEXT:    add r0, r0, r1
-; CHECK-ARMBASEDSP-NEXT:    cmp r0, #127
-; CHECK-ARMBASEDSP-NEXT:    movge r0, #127
-; CHECK-ARMBASEDSP-NEXT:    cmn r0, #128
-; CHECK-ARMBASEDSP-NEXT:    mvnle r0, #127
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #24
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #24
+; CHECK-ARMBASEDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #24
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
 ;
 ; CHECK-ARMDSP-LABEL: func8:
@@ -374,25 +380,49 @@
 ; CHECK-T1-NEXT:  .LBB4_4:
 ; CHECK-T1-NEXT:    bx lr
 ;
-; CHECK-T2-LABEL: func3:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    add r0, r1
-; CHECK-T2-NEXT:    cmp r0, #7
-; CHECK-T2-NEXT:    it ge
-; CHECK-T2-NEXT:    movge r0, #7
-; CHECK-T2-NEXT:    cmn.w r0, #8
-; CHECK-T2-NEXT:    it le
-; CHECK-T2-NEXT:    mvnle r0, #7
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func3:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    add r0, r1
+; CHECK-T2NODSP-NEXT:    cmp r0, #7
+; CHECK-T2NODSP-NEXT:    it ge
+; CHECK-T2NODSP-NEXT:    movge r0, #7
+; CHECK-T2NODSP-NEXT:    cmn.w r0, #8
+; CHECK-T2NODSP-NEXT:    it le
+; CHECK-T2NODSP-NEXT:    mvnle r0, #7
+; CHECK-T2NODSP-NEXT:    bx lr
 ;
-; CHECK-ARM-LABEL: func3:
-; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    add r0, r0, r1
-; CHECK-ARM-NEXT:    cmp r0, #7
-; CHECK-ARM-NEXT:    movge r0, #7
-; CHECK-ARM-NEXT:    cmn r0, #8
-; CHECK-ARM-NEXT:    mvnle r0, #7
-; CHECK-ARM-NEXT:    bx lr
+; CHECK-T2DSP-LABEL: func3:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2DSP-NEXT:    lsls r0, r0, #28
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r1
+; CHECK-T2DSP-NEXT:    asrs r0, r0, #28
+; CHECK-T2DSP-NEXT:    bx lr
+;
+; CHECK-ARMNODPS-LABEL: func3:
+; CHECK-ARMNODPS:       @ %bb.0:
+; CHECK-ARMNODPS-NEXT:    add r0, r0, r1
+; CHECK-ARMNODPS-NEXT:    cmp r0, #7
+; CHECK-ARMNODPS-NEXT:    movge r0, #7
+; CHECK-ARMNODPS-NEXT:    cmn r0, #8
+; CHECK-ARMNODPS-NEXT:    mvnle r0, #7
+; CHECK-ARMNODPS-NEXT:    bx lr
+;
+; CHECK-ARMBASEDSP-LABEL: func3:
+; CHECK-ARMBASEDSP:       @ %bb.0:
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #28
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #28
+; CHECK-ARMBASEDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #28
+; CHECK-ARMBASEDSP-NEXT:    bx lr
+;
+; CHECK-ARMDSP-LABEL: func3:
+; CHECK-ARMDSP:       @ %bb.0:
+; CHECK-ARMDSP-NEXT:    lsl r0, r0, #28
+; CHECK-ARMDSP-NEXT:    lsl r1, r1, #28
+; CHECK-ARMDSP-NEXT:    qadd r0, r0, r1
+; CHECK-ARMDSP-NEXT:    asr r0, r0, #28
+; CHECK-ARMDSP-NEXT:    bx lr
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
 }
diff --git a/test/CodeGen/ARM/sadd_sat_plus.ll b/test/CodeGen/ARM/sadd_sat_plus.ll
index 370d5d9..0415068 100644
--- a/test/CodeGen/ARM/sadd_sat_plus.ll
+++ b/test/CodeGen/ARM/sadd_sat_plus.ll
@@ -42,35 +42,33 @@
 ; CHECK-T1-NEXT:  .LCPI0_0:
 ; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: func32:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    mla r2, r1, r2, r0
-; CHECK-T2-NEXT:    movs r3, #0
-; CHECK-T2-NEXT:    mov.w r1, #-2147483648
-; CHECK-T2-NEXT:    cmp r2, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r1, #-2147483648
-; CHECK-T2-NEXT:    cmp r2, r0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r1, r2
-; CHECK-T2-NEXT:    mov r0, r1
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func32:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    mla r2, r1, r2, r0
+; CHECK-T2NODSP-NEXT:    movs r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r1, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r2, #0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r1, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r2, r0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r1, r2
+; CHECK-T2NODSP-NEXT:    mov r0, r1
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func32:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    mla r2, r1, r2, r0
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    movwmi r3, #1
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, r0
-; CHECK-ARM-NEXT:    movvc r1, r2
-; CHECK-ARM-NEXT:    mov r0, r1
+; CHECK-ARM-NEXT:    mul r1, r1, r2
+; CHECK-ARM-NEXT:    qadd r0, r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i32 %y, %z
   %tmp = call i32 @llvm.sadd.sat.i32(i32 %x, i32 %a)
@@ -345,28 +343,35 @@
 ; CHECK-T1-NEXT:  .LBB4_4:
 ; CHECK-T1-NEXT:    bx lr
 ;
-; CHECK-T2-LABEL: func4:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r1, r1, #28
-; CHECK-T2-NEXT:    add.w r0, r0, r1, asr #28
-; CHECK-T2-NEXT:    cmp r0, #7
-; CHECK-T2-NEXT:    it ge
-; CHECK-T2-NEXT:    movge r0, #7
-; CHECK-T2-NEXT:    cmn.w r0, #8
-; CHECK-T2-NEXT:    it le
-; CHECK-T2-NEXT:    mvnle r0, #7
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func4:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2NODSP-NEXT:    add.w r0, r0, r1, asr #28
+; CHECK-T2NODSP-NEXT:    cmp r0, #7
+; CHECK-T2NODSP-NEXT:    it ge
+; CHECK-T2NODSP-NEXT:    movge r0, #7
+; CHECK-T2NODSP-NEXT:    cmn.w r0, #8
+; CHECK-T2NODSP-NEXT:    it le
+; CHECK-T2NODSP-NEXT:    mvnle r0, #7
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func4:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    lsls r0, r0, #28
+; CHECK-T2DSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2DSP-NEXT:    qadd r0, r0, r1
+; CHECK-T2DSP-NEXT:    asrs r0, r0, #28
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
+; CHECK-ARM-NEXT:    lsl r0, r0, #28
 ; CHECK-ARM-NEXT:    lsl r1, r1, #28
-; CHECK-ARM-NEXT:    add r0, r0, r1, asr #28
-; CHECK-ARM-NEXT:    cmp r0, #7
-; CHECK-ARM-NEXT:    movge r0, #7
-; CHECK-ARM-NEXT:    cmn r0, #8
-; CHECK-ARM-NEXT:    mvnle r0, #7
+; CHECK-ARM-NEXT:    qadd r0, r0, r1
+; CHECK-ARM-NEXT:    asr r0, r0, #28
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %a)
diff --git a/test/CodeGen/ARM/ssub_sat.ll b/test/CodeGen/ARM/ssub_sat.ll
index 0c9e125..9c3c5ba 100644
--- a/test/CodeGen/ARM/ssub_sat.ll
+++ b/test/CodeGen/ARM/ssub_sat.ll
@@ -46,34 +46,49 @@
 ; CHECK-T1-NEXT:  .LCPI0_0:
 ; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: func:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    subs.w r12, r0, r1
-; CHECK-T2-NEXT:    mov.w r3, #0
-; CHECK-T2-NEXT:    mov.w r2, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r3, #1
-; CHECK-T2-NEXT:    cmp r3, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r2, #-2147483648
-; CHECK-T2-NEXT:    cmp r0, r1
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r2, r12
-; CHECK-T2-NEXT:    mov r0, r2
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    subs.w r12, r0, r1
+; CHECK-T2NODSP-NEXT:    mov.w r3, #0
+; CHECK-T2NODSP-NEXT:    mov.w r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r3, #1
+; CHECK-T2NODSP-NEXT:    cmp r3, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r2, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r0, r1
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r2, r12
+; CHECK-T2NODSP-NEXT:    mov r0, r2
+; CHECK-T2NODSP-NEXT:    bx lr
 ;
-; CHECK-ARM-LABEL: func:
-; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    subs r12, r0, r1
-; CHECK-ARM-NEXT:    mov r3, #0
-; CHECK-ARM-NEXT:    movmi r3, #1
-; CHECK-ARM-NEXT:    mov r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    mvnne r2, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r1
-; CHECK-ARM-NEXT:    movvc r2, r12
-; CHECK-ARM-NEXT:    mov r0, r2
-; CHECK-ARM-NEXT:    bx lr
+; CHECK-T2DSP-LABEL: func:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
+;
+; CHECK-ARMNODPS-LABEL: func:
+; CHECK-ARMNODPS:       @ %bb.0:
+; CHECK-ARMNODPS-NEXT:    subs r12, r0, r1
+; CHECK-ARMNODPS-NEXT:    mov r3, #0
+; CHECK-ARMNODPS-NEXT:    movmi r3, #1
+; CHECK-ARMNODPS-NEXT:    mov r2, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r3, #0
+; CHECK-ARMNODPS-NEXT:    mvnne r2, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r0, r1
+; CHECK-ARMNODPS-NEXT:    movvc r2, r12
+; CHECK-ARMNODPS-NEXT:    mov r0, r2
+; CHECK-ARMNODPS-NEXT:    bx lr
+;
+; CHECK-ARMBASEDSP-LABEL: func:
+; CHECK-ARMBASEDSP:       @ %bb.0:
+; CHECK-ARMBASEDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    bx lr
+;
+; CHECK-ARMDSP-LABEL: func:
+; CHECK-ARMDSP:       @ %bb.0:
+; CHECK-ARMDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMDSP-NEXT:    bx lr
   %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %y)
   ret i32 %tmp
 }
@@ -273,19 +288,11 @@
 ;
 ; CHECK-ARMBASEDSP-LABEL: func16:
 ; CHECK-ARMBASEDSP:       @ %bb.0:
-; CHECK-ARMBASEDSP-NEXT:    sub r0, r0, r1
-; CHECK-ARMBASEDSP-NEXT:    mov r1, #255
-; CHECK-ARMBASEDSP-NEXT:    orr r1, r1, #32512
-; CHECK-ARMBASEDSP-NEXT:    cmp r0, r1
-; CHECK-ARMBASEDSP-NEXT:    movlt r1, r0
-; CHECK-ARMBASEDSP-NEXT:    ldr r0, .LCPI2_0
-; CHECK-ARMBASEDSP-NEXT:    cmn r1, #32768
-; CHECK-ARMBASEDSP-NEXT:    movgt r0, r1
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #16
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #16
+; CHECK-ARMBASEDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #16
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
-; CHECK-ARMBASEDSP-NEXT:    .p2align 2
-; CHECK-ARMBASEDSP-NEXT:  @ %bb.1:
-; CHECK-ARMBASEDSP-NEXT:  .LCPI2_0:
-; CHECK-ARMBASEDSP-NEXT:    .long 4294934528 @ 0xffff8000
 ;
 ; CHECK-ARMDSP-LABEL: func16:
 ; CHECK-ARMDSP:       @ %bb.0:
@@ -342,11 +349,10 @@
 ;
 ; CHECK-ARMBASEDSP-LABEL: func8:
 ; CHECK-ARMBASEDSP:       @ %bb.0:
-; CHECK-ARMBASEDSP-NEXT:    sub r0, r0, r1
-; CHECK-ARMBASEDSP-NEXT:    cmp r0, #127
-; CHECK-ARMBASEDSP-NEXT:    movge r0, #127
-; CHECK-ARMBASEDSP-NEXT:    cmn r0, #128
-; CHECK-ARMBASEDSP-NEXT:    mvnle r0, #127
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #24
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #24
+; CHECK-ARMBASEDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #24
 ; CHECK-ARMBASEDSP-NEXT:    bx lr
 ;
 ; CHECK-ARMDSP-LABEL: func8:
@@ -376,25 +382,49 @@
 ; CHECK-T1-NEXT:  .LBB4_4:
 ; CHECK-T1-NEXT:    bx lr
 ;
-; CHECK-T2-LABEL: func3:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    subs r0, r0, r1
-; CHECK-T2-NEXT:    cmp r0, #7
-; CHECK-T2-NEXT:    it ge
-; CHECK-T2-NEXT:    movge r0, #7
-; CHECK-T2-NEXT:    cmn.w r0, #8
-; CHECK-T2-NEXT:    it le
-; CHECK-T2-NEXT:    mvnle r0, #7
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func3:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    subs r0, r0, r1
+; CHECK-T2NODSP-NEXT:    cmp r0, #7
+; CHECK-T2NODSP-NEXT:    it ge
+; CHECK-T2NODSP-NEXT:    movge r0, #7
+; CHECK-T2NODSP-NEXT:    cmn.w r0, #8
+; CHECK-T2NODSP-NEXT:    it le
+; CHECK-T2NODSP-NEXT:    mvnle r0, #7
+; CHECK-T2NODSP-NEXT:    bx lr
 ;
-; CHECK-ARM-LABEL: func3:
-; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    sub r0, r0, r1
-; CHECK-ARM-NEXT:    cmp r0, #7
-; CHECK-ARM-NEXT:    movge r0, #7
-; CHECK-ARM-NEXT:    cmn r0, #8
-; CHECK-ARM-NEXT:    mvnle r0, #7
-; CHECK-ARM-NEXT:    bx lr
+; CHECK-T2DSP-LABEL: func3:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2DSP-NEXT:    lsls r0, r0, #28
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r1
+; CHECK-T2DSP-NEXT:    asrs r0, r0, #28
+; CHECK-T2DSP-NEXT:    bx lr
+;
+; CHECK-ARMNODPS-LABEL: func3:
+; CHECK-ARMNODPS:       @ %bb.0:
+; CHECK-ARMNODPS-NEXT:    sub r0, r0, r1
+; CHECK-ARMNODPS-NEXT:    cmp r0, #7
+; CHECK-ARMNODPS-NEXT:    movge r0, #7
+; CHECK-ARMNODPS-NEXT:    cmn r0, #8
+; CHECK-ARMNODPS-NEXT:    mvnle r0, #7
+; CHECK-ARMNODPS-NEXT:    bx lr
+;
+; CHECK-ARMBASEDSP-LABEL: func3:
+; CHECK-ARMBASEDSP:       @ %bb.0:
+; CHECK-ARMBASEDSP-NEXT:    lsl r0, r0, #28
+; CHECK-ARMBASEDSP-NEXT:    lsl r1, r1, #28
+; CHECK-ARMBASEDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMBASEDSP-NEXT:    asr r0, r0, #28
+; CHECK-ARMBASEDSP-NEXT:    bx lr
+;
+; CHECK-ARMDSP-LABEL: func3:
+; CHECK-ARMDSP:       @ %bb.0:
+; CHECK-ARMDSP-NEXT:    lsl r0, r0, #28
+; CHECK-ARMDSP-NEXT:    lsl r1, r1, #28
+; CHECK-ARMDSP-NEXT:    qsub r0, r0, r1
+; CHECK-ARMDSP-NEXT:    asr r0, r0, #28
+; CHECK-ARMDSP-NEXT:    bx lr
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y)
   ret i4 %tmp
 }
@@ -501,112 +531,148 @@
 ; CHECK-T1-NEXT:  .LCPI5_0:
 ; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: vec:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-T2-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-T2-NEXT:    .pad #4
-; CHECK-T2-NEXT:    sub sp, #4
-; CHECK-T2-NEXT:    ldr r4, [sp, #24]
-; CHECK-T2-NEXT:    mov lr, r0
-; CHECK-T2-NEXT:    ldr r7, [sp, #28]
-; CHECK-T2-NEXT:    movs r5, #0
-; CHECK-T2-NEXT:    subs r6, r0, r4
-; CHECK-T2-NEXT:    mov.w r0, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r0, #1
-; CHECK-T2-NEXT:    cmp r0, #0
-; CHECK-T2-NEXT:    mov.w r0, #-2147483648
-; CHECK-T2-NEXT:    mov.w r12, #-2147483648
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r0, #-2147483648
-; CHECK-T2-NEXT:    cmp lr, r4
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r0, r6
-; CHECK-T2-NEXT:    subs r6, r1, r7
-; CHECK-T2-NEXT:    mov.w r4, #0
-; CHECK-T2-NEXT:    mov.w lr, #-2147483648
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r4, #1
-; CHECK-T2-NEXT:    cmp r4, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne lr, #-2147483648
-; CHECK-T2-NEXT:    cmp r1, r7
-; CHECK-T2-NEXT:    ldr r1, [sp, #32]
-; CHECK-T2-NEXT:    mov.w r4, #0
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc lr, r6
-; CHECK-T2-NEXT:    subs r6, r2, r1
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r4, #1
-; CHECK-T2-NEXT:    cmp r4, #0
-; CHECK-T2-NEXT:    mov.w r4, #-2147483648
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r4, #-2147483648
-; CHECK-T2-NEXT:    cmp r2, r1
-; CHECK-T2-NEXT:    ldr r1, [sp, #36]
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r4, r6
-; CHECK-T2-NEXT:    subs r2, r3, r1
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi r5, #1
-; CHECK-T2-NEXT:    cmp r5, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r12, #-2147483648
-; CHECK-T2-NEXT:    cmp r3, r1
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r12, r2
-; CHECK-T2-NEXT:    mov r1, lr
-; CHECK-T2-NEXT:    mov r2, r4
-; CHECK-T2-NEXT:    mov r3, r12
-; CHECK-T2-NEXT:    add sp, #4
-; CHECK-T2-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-T2NODSP-LABEL: vec:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-T2NODSP-NEXT:    .pad #4
+; CHECK-T2NODSP-NEXT:    sub sp, #4
+; CHECK-T2NODSP-NEXT:    ldr r4, [sp, #24]
+; CHECK-T2NODSP-NEXT:    mov lr, r0
+; CHECK-T2NODSP-NEXT:    ldr r7, [sp, #28]
+; CHECK-T2NODSP-NEXT:    movs r5, #0
+; CHECK-T2NODSP-NEXT:    subs r6, r0, r4
+; CHECK-T2NODSP-NEXT:    mov.w r0, #0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r0, #1
+; CHECK-T2NODSP-NEXT:    cmp r0, #0
+; CHECK-T2NODSP-NEXT:    mov.w r0, #-2147483648
+; CHECK-T2NODSP-NEXT:    mov.w r12, #-2147483648
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r0, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp lr, r4
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r0, r6
+; CHECK-T2NODSP-NEXT:    subs r6, r1, r7
+; CHECK-T2NODSP-NEXT:    mov.w r4, #0
+; CHECK-T2NODSP-NEXT:    mov.w lr, #-2147483648
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r4, #1
+; CHECK-T2NODSP-NEXT:    cmp r4, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne lr, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r1, r7
+; CHECK-T2NODSP-NEXT:    ldr r1, [sp, #32]
+; CHECK-T2NODSP-NEXT:    mov.w r4, #0
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc lr, r6
+; CHECK-T2NODSP-NEXT:    subs r6, r2, r1
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r4, #1
+; CHECK-T2NODSP-NEXT:    cmp r4, #0
+; CHECK-T2NODSP-NEXT:    mov.w r4, #-2147483648
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r4, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r2, r1
+; CHECK-T2NODSP-NEXT:    ldr r1, [sp, #36]
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r4, r6
+; CHECK-T2NODSP-NEXT:    subs r2, r3, r1
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi r5, #1
+; CHECK-T2NODSP-NEXT:    cmp r5, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r12, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r3, r1
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r12, r2
+; CHECK-T2NODSP-NEXT:    mov r1, lr
+; CHECK-T2NODSP-NEXT:    mov r2, r4
+; CHECK-T2NODSP-NEXT:    mov r3, r12
+; CHECK-T2NODSP-NEXT:    add sp, #4
+; CHECK-T2NODSP-NEXT:    pop {r4, r5, r6, r7, pc}
 ;
-; CHECK-ARM-LABEL: vec:
-; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; CHECK-ARM-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; CHECK-ARM-NEXT:    ldr r4, [sp, #24]
-; CHECK-ARM-NEXT:    mov lr, r0
-; CHECK-ARM-NEXT:    ldr r7, [sp, #28]
-; CHECK-ARM-NEXT:    mov r5, #0
-; CHECK-ARM-NEXT:    subs r6, r0, r4
-; CHECK-ARM-NEXT:    mov r0, #0
-; CHECK-ARM-NEXT:    movmi r0, #1
-; CHECK-ARM-NEXT:    cmp r0, #0
-; CHECK-ARM-NEXT:    mov r0, #-2147483648
-; CHECK-ARM-NEXT:    mov r12, #-2147483648
-; CHECK-ARM-NEXT:    mvnne r0, #-2147483648
-; CHECK-ARM-NEXT:    cmp lr, r4
-; CHECK-ARM-NEXT:    movvc r0, r6
-; CHECK-ARM-NEXT:    subs r6, r1, r7
-; CHECK-ARM-NEXT:    mov r4, #0
-; CHECK-ARM-NEXT:    mov lr, #-2147483648
-; CHECK-ARM-NEXT:    movmi r4, #1
-; CHECK-ARM-NEXT:    cmp r4, #0
-; CHECK-ARM-NEXT:    mvnne lr, #-2147483648
-; CHECK-ARM-NEXT:    cmp r1, r7
-; CHECK-ARM-NEXT:    ldr r1, [sp, #32]
-; CHECK-ARM-NEXT:    movvc lr, r6
-; CHECK-ARM-NEXT:    mov r4, #0
-; CHECK-ARM-NEXT:    subs r6, r2, r1
-; CHECK-ARM-NEXT:    movmi r4, #1
-; CHECK-ARM-NEXT:    cmp r4, #0
-; CHECK-ARM-NEXT:    mov r4, #-2147483648
-; CHECK-ARM-NEXT:    mvnne r4, #-2147483648
-; CHECK-ARM-NEXT:    cmp r2, r1
-; CHECK-ARM-NEXT:    ldr r1, [sp, #36]
-; CHECK-ARM-NEXT:    movvc r4, r6
-; CHECK-ARM-NEXT:    subs r2, r3, r1
-; CHECK-ARM-NEXT:    movmi r5, #1
-; CHECK-ARM-NEXT:    cmp r5, #0
-; CHECK-ARM-NEXT:    mvnne r12, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, r1
-; CHECK-ARM-NEXT:    movvc r12, r2
-; CHECK-ARM-NEXT:    mov r1, lr
-; CHECK-ARM-NEXT:    mov r2, r4
-; CHECK-ARM-NEXT:    mov r3, r12
-; CHECK-ARM-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+; CHECK-T2DSP-LABEL: vec:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    ldr.w r12, [sp]
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r12
+; CHECK-T2DSP-NEXT:    ldr.w r12, [sp, #4]
+; CHECK-T2DSP-NEXT:    qsub r1, r1, r12
+; CHECK-T2DSP-NEXT:    ldr.w r12, [sp, #8]
+; CHECK-T2DSP-NEXT:    qsub r2, r2, r12
+; CHECK-T2DSP-NEXT:    ldr.w r12, [sp, #12]
+; CHECK-T2DSP-NEXT:    qsub r3, r3, r12
+; CHECK-T2DSP-NEXT:    bx lr
+;
+; CHECK-ARMNODPS-LABEL: vec:
+; CHECK-ARMNODPS:       @ %bb.0:
+; CHECK-ARMNODPS-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-ARMNODPS-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; CHECK-ARMNODPS-NEXT:    ldr r4, [sp, #24]
+; CHECK-ARMNODPS-NEXT:    mov lr, r0
+; CHECK-ARMNODPS-NEXT:    ldr r7, [sp, #28]
+; CHECK-ARMNODPS-NEXT:    mov r5, #0
+; CHECK-ARMNODPS-NEXT:    subs r6, r0, r4
+; CHECK-ARMNODPS-NEXT:    mov r0, #0
+; CHECK-ARMNODPS-NEXT:    movmi r0, #1
+; CHECK-ARMNODPS-NEXT:    cmp r0, #0
+; CHECK-ARMNODPS-NEXT:    mov r0, #-2147483648
+; CHECK-ARMNODPS-NEXT:    mov r12, #-2147483648
+; CHECK-ARMNODPS-NEXT:    mvnne r0, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp lr, r4
+; CHECK-ARMNODPS-NEXT:    movvc r0, r6
+; CHECK-ARMNODPS-NEXT:    subs r6, r1, r7
+; CHECK-ARMNODPS-NEXT:    mov r4, #0
+; CHECK-ARMNODPS-NEXT:    mov lr, #-2147483648
+; CHECK-ARMNODPS-NEXT:    movmi r4, #1
+; CHECK-ARMNODPS-NEXT:    cmp r4, #0
+; CHECK-ARMNODPS-NEXT:    mvnne lr, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r1, r7
+; CHECK-ARMNODPS-NEXT:    ldr r1, [sp, #32]
+; CHECK-ARMNODPS-NEXT:    movvc lr, r6
+; CHECK-ARMNODPS-NEXT:    mov r4, #0
+; CHECK-ARMNODPS-NEXT:    subs r6, r2, r1
+; CHECK-ARMNODPS-NEXT:    movmi r4, #1
+; CHECK-ARMNODPS-NEXT:    cmp r4, #0
+; CHECK-ARMNODPS-NEXT:    mov r4, #-2147483648
+; CHECK-ARMNODPS-NEXT:    mvnne r4, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r2, r1
+; CHECK-ARMNODPS-NEXT:    ldr r1, [sp, #36]
+; CHECK-ARMNODPS-NEXT:    movvc r4, r6
+; CHECK-ARMNODPS-NEXT:    subs r2, r3, r1
+; CHECK-ARMNODPS-NEXT:    movmi r5, #1
+; CHECK-ARMNODPS-NEXT:    cmp r5, #0
+; CHECK-ARMNODPS-NEXT:    mvnne r12, #-2147483648
+; CHECK-ARMNODPS-NEXT:    cmp r3, r1
+; CHECK-ARMNODPS-NEXT:    movvc r12, r2
+; CHECK-ARMNODPS-NEXT:    mov r1, lr
+; CHECK-ARMNODPS-NEXT:    mov r2, r4
+; CHECK-ARMNODPS-NEXT:    mov r3, r12
+; CHECK-ARMNODPS-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-ARMBASEDSP-LABEL: vec:
+; CHECK-ARMBASEDSP:       @ %bb.0:
+; CHECK-ARMBASEDSP-NEXT:    ldr r12, [sp]
+; CHECK-ARMBASEDSP-NEXT:    qsub r0, r0, r12
+; CHECK-ARMBASEDSP-NEXT:    ldr r12, [sp, #4]
+; CHECK-ARMBASEDSP-NEXT:    qsub r1, r1, r12
+; CHECK-ARMBASEDSP-NEXT:    ldr r12, [sp, #8]
+; CHECK-ARMBASEDSP-NEXT:    qsub r2, r2, r12
+; CHECK-ARMBASEDSP-NEXT:    ldr r12, [sp, #12]
+; CHECK-ARMBASEDSP-NEXT:    qsub r3, r3, r12
+; CHECK-ARMBASEDSP-NEXT:    bx lr
+;
+; CHECK-ARMDSP-LABEL: vec:
+; CHECK-ARMDSP:       @ %bb.0:
+; CHECK-ARMDSP-NEXT:    ldr r12, [sp]
+; CHECK-ARMDSP-NEXT:    qsub r0, r0, r12
+; CHECK-ARMDSP-NEXT:    ldr r12, [sp, #4]
+; CHECK-ARMDSP-NEXT:    qsub r1, r1, r12
+; CHECK-ARMDSP-NEXT:    ldr r12, [sp, #8]
+; CHECK-ARMDSP-NEXT:    qsub r2, r2, r12
+; CHECK-ARMDSP-NEXT:    ldr r12, [sp, #12]
+; CHECK-ARMDSP-NEXT:    qsub r3, r3, r12
+; CHECK-ARMDSP-NEXT:    bx lr
   %tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %tmp
 }
diff --git a/test/CodeGen/ARM/ssub_sat_plus.ll b/test/CodeGen/ARM/ssub_sat_plus.ll
index 02a0de2..147c0a0 100644
--- a/test/CodeGen/ARM/ssub_sat_plus.ll
+++ b/test/CodeGen/ARM/ssub_sat_plus.ll
@@ -44,39 +44,36 @@
 ; CHECK-T1-NEXT:  .LCPI0_0:
 ; CHECK-T1-NEXT:    .long 2147483647 @ 0x7fffffff
 ;
-; CHECK-T2-LABEL: func32:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    .save {r7, lr}
-; CHECK-T2-NEXT:    push {r7, lr}
-; CHECK-T2-NEXT:    mls r12, r1, r2, r0
-; CHECK-T2-NEXT:    mov.w lr, #0
-; CHECK-T2-NEXT:    mov.w r3, #-2147483648
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    cmp.w r12, #0
-; CHECK-T2-NEXT:    it mi
-; CHECK-T2-NEXT:    movmi.w lr, #1
-; CHECK-T2-NEXT:    cmp.w lr, #0
-; CHECK-T2-NEXT:    it ne
-; CHECK-T2-NEXT:    mvnne r3, #-2147483648
-; CHECK-T2-NEXT:    cmp r0, r1
-; CHECK-T2-NEXT:    it vc
-; CHECK-T2-NEXT:    movvc r3, r12
-; CHECK-T2-NEXT:    mov r0, r3
-; CHECK-T2-NEXT:    pop {r7, pc}
+; CHECK-T2NODSP-LABEL: func32:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    .save {r7, lr}
+; CHECK-T2NODSP-NEXT:    push {r7, lr}
+; CHECK-T2NODSP-NEXT:    mls r12, r1, r2, r0
+; CHECK-T2NODSP-NEXT:    mov.w lr, #0
+; CHECK-T2NODSP-NEXT:    mov.w r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    cmp.w r12, #0
+; CHECK-T2NODSP-NEXT:    it mi
+; CHECK-T2NODSP-NEXT:    movmi.w lr, #1
+; CHECK-T2NODSP-NEXT:    cmp.w lr, #0
+; CHECK-T2NODSP-NEXT:    it ne
+; CHECK-T2NODSP-NEXT:    mvnne r3, #-2147483648
+; CHECK-T2NODSP-NEXT:    cmp r0, r1
+; CHECK-T2NODSP-NEXT:    it vc
+; CHECK-T2NODSP-NEXT:    movvc r3, r12
+; CHECK-T2NODSP-NEXT:    mov r0, r3
+; CHECK-T2NODSP-NEXT:    pop {r7, pc}
+;
+; CHECK-T2DSP-LABEL: func32:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r1
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func32:
 ; CHECK-ARM:       @ %bb.0:
-; CHECK-ARM-NEXT:    mls r3, r1, r2, r0
-; CHECK-ARM-NEXT:    mul r12, r1, r2
-; CHECK-ARM-NEXT:    mov r2, #0
-; CHECK-ARM-NEXT:    mov r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r3, #0
-; CHECK-ARM-NEXT:    movwmi r2, #1
-; CHECK-ARM-NEXT:    cmp r2, #0
-; CHECK-ARM-NEXT:    mvnne r1, #-2147483648
-; CHECK-ARM-NEXT:    cmp r0, r12
-; CHECK-ARM-NEXT:    movvc r1, r3
-; CHECK-ARM-NEXT:    mov r0, r1
+; CHECK-ARM-NEXT:    mul r1, r1, r2
+; CHECK-ARM-NEXT:    qsub r0, r0, r1
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i32 %y, %z
   %tmp = call i32 @llvm.ssub.sat.i32(i32 %x, i32 %a)
@@ -351,28 +348,35 @@
 ; CHECK-T1-NEXT:  .LBB4_4:
 ; CHECK-T1-NEXT:    bx lr
 ;
-; CHECK-T2-LABEL: func4:
-; CHECK-T2:       @ %bb.0:
-; CHECK-T2-NEXT:    muls r1, r2, r1
-; CHECK-T2-NEXT:    lsls r1, r1, #28
-; CHECK-T2-NEXT:    sub.w r0, r0, r1, asr #28
-; CHECK-T2-NEXT:    cmp r0, #7
-; CHECK-T2-NEXT:    it ge
-; CHECK-T2-NEXT:    movge r0, #7
-; CHECK-T2-NEXT:    cmn.w r0, #8
-; CHECK-T2-NEXT:    it le
-; CHECK-T2-NEXT:    mvnle r0, #7
-; CHECK-T2-NEXT:    bx lr
+; CHECK-T2NODSP-LABEL: func4:
+; CHECK-T2NODSP:       @ %bb.0:
+; CHECK-T2NODSP-NEXT:    muls r1, r2, r1
+; CHECK-T2NODSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2NODSP-NEXT:    sub.w r0, r0, r1, asr #28
+; CHECK-T2NODSP-NEXT:    cmp r0, #7
+; CHECK-T2NODSP-NEXT:    it ge
+; CHECK-T2NODSP-NEXT:    movge r0, #7
+; CHECK-T2NODSP-NEXT:    cmn.w r0, #8
+; CHECK-T2NODSP-NEXT:    it le
+; CHECK-T2NODSP-NEXT:    mvnle r0, #7
+; CHECK-T2NODSP-NEXT:    bx lr
+;
+; CHECK-T2DSP-LABEL: func4:
+; CHECK-T2DSP:       @ %bb.0:
+; CHECK-T2DSP-NEXT:    muls r1, r2, r1
+; CHECK-T2DSP-NEXT:    lsls r0, r0, #28
+; CHECK-T2DSP-NEXT:    lsls r1, r1, #28
+; CHECK-T2DSP-NEXT:    qsub r0, r0, r1
+; CHECK-T2DSP-NEXT:    asrs r0, r0, #28
+; CHECK-T2DSP-NEXT:    bx lr
 ;
 ; CHECK-ARM-LABEL: func4:
 ; CHECK-ARM:       @ %bb.0:
 ; CHECK-ARM-NEXT:    smulbb r1, r1, r2
+; CHECK-ARM-NEXT:    lsl r0, r0, #28
 ; CHECK-ARM-NEXT:    lsl r1, r1, #28
-; CHECK-ARM-NEXT:    sub r0, r0, r1, asr #28
-; CHECK-ARM-NEXT:    cmp r0, #7
-; CHECK-ARM-NEXT:    movge r0, #7
-; CHECK-ARM-NEXT:    cmn r0, #8
-; CHECK-ARM-NEXT:    mvnle r0, #7
+; CHECK-ARM-NEXT:    qsub r0, r0, r1
+; CHECK-ARM-NEXT:    asr r0, r0, #28
 ; CHECK-ARM-NEXT:    bx lr
   %a = mul i4 %y, %z
   %tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %a)