[ARM][MVE] Add a MVE clmul tests. NFC (#192403)

Taken from the equivalent AArch64 fixed width tests.
diff --git a/llvm/test/CodeGen/Thumb2/mve-clmul.ll b/llvm/test/CodeGen/Thumb2/mve-clmul.ll
new file mode 100644
index 0000000..587f7ac
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-clmul.ll
@@ -0,0 +1,18621 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
+
+define i8 @clmul_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: clmul_i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    bic r1, r1, #127
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    bx lr
+  %a = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
+  ret i8 %a
+}
+
+define i16 @clmul_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: clmul_i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #128
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #256
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #512
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #1024
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #2048
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4096
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8192
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16384
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    movw r3, #32767
+; CHECK-NEXT:    bics r1, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    bx lr
+  %a = call i16 @llvm.clmul.i16(i16 %x, i16 %y)
+  ret i16 %a
+}
+
+define i32 @clmul_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: clmul_i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #128
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #256
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #512
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #1024
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #2048
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4096
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8192
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16384
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32768
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #65536
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #131072
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #262144
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #524288
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #1048576
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #2097152
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4194304
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8388608
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16777216
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #33554432
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #67108864
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #134217728
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #268435456
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #536870912
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #1073741824
+; CHECK-NEXT:    and r1, r1, #-2147483648
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    bx lr
+  %a = call i32 @llvm.clmul.i32(i32 %x, i32 %y)
+  ret i32 %a
+}
+
+define i64 @clmul_i64(i64 %x, i64 %y) {
+; CHECK-LABEL: clmul_i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #96
+; CHECK-NEXT:    sub sp, #96
+; CHECK-NEXT:    and r7, r2, #2
+; CHECK-NEXT:    and r6, r2, #1
+; CHECK-NEXT:    str r7, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    and r11, r2, #16777216
+; CHECK-NEXT:    str r6, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    and r10, r2, #33554432
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    and r9, r2, #67108864
+; CHECK-NEXT:    and r8, r2, #134217728
+; CHECK-NEXT:    mul r5, r1, r9
+; CHECK-NEXT:    and lr, r2, #1073741824
+; CHECK-NEXT:    and r12, r2, #-2147483648
+; CHECK-NEXT:    mul r4, r1, r8
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8
+; CHECK-NEXT:    str r6, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #64
+; CHECK-NEXT:    str r6, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #128
+; CHECK-NEXT:    str r6, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #256
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #512
+; CHECK-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1024
+; CHECK-NEXT:    str r6, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2048
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4096
+; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8192
+; CHECK-NEXT:    str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16384
+; CHECK-NEXT:    str r6, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32768
+; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #65536
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #131072
+; CHECK-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #262144
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #524288
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1048576
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2097152
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4194304
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8388608
+; CHECK-NEXT:    str r6, [sp] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, r11
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #268435456
+; CHECK-NEXT:    mul r7, r1, r5
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r2, #536870912
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    mul r6, r1, r4
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, lr
+; CHECK-NEXT:    mul r1, r1, r12
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    and r6, r3, #2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #32
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #64
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #128
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #256
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #512
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1024
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #2048
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4096
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8192
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16384
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #32768
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #65536
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #131072
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #262144
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #524288
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1048576
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #2097152
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4194304
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8388608
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16777216
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #67108864
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #134217728
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #268435456
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #536870912
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1073741824
+; CHECK-NEXT:    and r3, r3, #-2147483648
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    and r6, r2, #2
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    rbit r3, r0
+; CHECK-NEXT:    muls r6, r3, r6
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #32
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #64
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #128
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #256
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #512
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1024
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #2048
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4096
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8192
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16384
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #32768
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #65536
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #131072
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #262144
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #524288
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1048576
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #2097152
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4194304
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8388608
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16777216
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #33554432
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #67108864
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #134217728
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #268435456
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #536870912
+; CHECK-NEXT:    and r2, r2, #1073741824
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    muls r2, r3, r2
+; CHECK-NEXT:    ldr r3, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    eor.w r1, r1, r2, lsr #1
+; CHECK-NEXT:    ldr r2, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r11
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r9
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r8
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r5
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r4
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, lr
+; CHECK-NEXT:    mul r0, r0, r12
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    add sp, #96
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call i64 @llvm.clmul.i64(i64 %x, i64 %y)
+  ret i64 %a
+}
+
+define i16 @clmul_i16_zext(i8 %x, i8 %y) {
+; CHECK-LABEL: clmul_i16_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    uxtb r0, r0
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    and r1, r1, #128
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    bx lr
+  %zextx = zext i8 %x to i16
+  %zexty = zext i8 %y to i16
+  %a = call i16 @llvm.clmul.i16(i16 %zextx, i16 %zexty)
+  ret i16 %a
+}
+
+define i32 @clmul_i32_zext(i16 %x, i16 %y) {
+; CHECK-LABEL: clmul_i32_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #128
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #256
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #512
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #1024
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #2048
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #4096
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #8192
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    and r3, r1, #16384
+; CHECK-NEXT:    and r1, r1, #32768
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    bx lr
+  %zextx = zext i16 %x to i32
+  %zexty = zext i16 %y to i32
+  %a = call i32 @llvm.clmul.i32(i32 %zextx, i32 %zexty)
+  ret i32 %a
+}
+
+define i64 @clmul_i64_zext(i32 %x, i32 %y) {
+; CHECK-LABEL: clmul_i64_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    and r0, r1, #2
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    muls r0, r2, r0
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #128
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #256
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #512
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #1024
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #2048
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #4096
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #8192
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #16384
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #32768
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #65536
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #131072
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #262144
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #524288
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #1048576
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #2097152
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #4194304
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #8388608
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #16777216
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #33554432
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #67108864
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #134217728
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #268435456
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #536870912
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #1073741824
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #-2147483648
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    and r3, r1, #2
+; CHECK-NEXT:    mul r12, r2, r3
+; CHECK-NEXT:    and r3, r1, #1
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #4
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #8
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #16
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #32
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #64
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #128
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #256
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #512
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #1024
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #2048
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #4096
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #8192
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #16384
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #32768
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #65536
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #131072
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #262144
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #524288
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #1048576
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #2097152
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #4194304
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #8388608
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #16777216
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #33554432
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #67108864
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #134217728
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #268435456
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eor.w r12, r12, r3
+; CHECK-NEXT:    and r3, r1, #536870912
+; CHECK-NEXT:    and r1, r1, #1073741824
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    muls r1, r2, r1
+; CHECK-NEXT:    eor.w r3, r3, r12
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    lsrs r1, r1, #1
+; CHECK-NEXT:    bx lr
+  %zextx = zext i32 %x to i64
+  %zexty = zext i32 %y to i64
+  %a = call i64 @llvm.clmul.i64(i64 %zextx, i64 %zexty)
+  ret i64 %a
+}
+
+define i128 @clmul_i128_zext(i64 %x, i64 %y) {
+; CHECK-LABEL: clmul_i128_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #480
+; CHECK-NEXT:    sub sp, #480
+; CHECK-NEXT:    mov r12, r0
+; CHECK-NEXT:    and r0, r2, #2
+; CHECK-NEXT:    and r7, r2, #1
+; CHECK-NEXT:    str r0, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    str r7, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    and r6, r3, #1
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    str r6, [sp, #344] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4
+; CHECK-NEXT:    str r7, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8
+; CHECK-NEXT:    str r7, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16
+; CHECK-NEXT:    str r7, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #32
+; CHECK-NEXT:    str r7, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #64
+; CHECK-NEXT:    str r7, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #128
+; CHECK-NEXT:    str r7, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #256
+; CHECK-NEXT:    str r7, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #512
+; CHECK-NEXT:    str r7, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1024
+; CHECK-NEXT:    str r7, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #2048
+; CHECK-NEXT:    str r7, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4096
+; CHECK-NEXT:    str r7, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8192
+; CHECK-NEXT:    str r7, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16384
+; CHECK-NEXT:    str r7, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #32768
+; CHECK-NEXT:    str r7, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #65536
+; CHECK-NEXT:    str r7, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #131072
+; CHECK-NEXT:    str r7, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #262144
+; CHECK-NEXT:    str r7, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #524288
+; CHECK-NEXT:    str r7, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1048576
+; CHECK-NEXT:    str r7, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #2097152
+; CHECK-NEXT:    str r7, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4194304
+; CHECK-NEXT:    str r7, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8388608
+; CHECK-NEXT:    str r7, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16777216
+; CHECK-NEXT:    str r7, [sp, #380] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #33554432
+; CHECK-NEXT:    str r7, [sp, #376] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #67108864
+; CHECK-NEXT:    str r7, [sp, #372] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #134217728
+; CHECK-NEXT:    str r7, [sp, #368] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #268435456
+; CHECK-NEXT:    str r7, [sp, #364] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #536870912
+; CHECK-NEXT:    str r7, [sp, #360] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1073741824
+; CHECK-NEXT:    str r7, [sp, #356] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #-2147483648
+; CHECK-NEXT:    str r7, [sp, #352] @ 4-byte Spill
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    and r4, r2, #2
+; CHECK-NEXT:    and r5, r2, #1
+; CHECK-NEXT:    str r4, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    and r9, r2, #67108864
+; CHECK-NEXT:    str r5, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    and r8, r2, #134217728
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    str r7, [sp, #348] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r12, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4
+; CHECK-NEXT:    str r6, [sp, #340] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8
+; CHECK-NEXT:    str r6, [sp, #336] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16
+; CHECK-NEXT:    str r6, [sp, #332] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32
+; CHECK-NEXT:    str r6, [sp, #328] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #64
+; CHECK-NEXT:    str r6, [sp, #324] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #128
+; CHECK-NEXT:    str r6, [sp, #320] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #256
+; CHECK-NEXT:    str r6, [sp, #316] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #512
+; CHECK-NEXT:    str r6, [sp, #312] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1024
+; CHECK-NEXT:    str r6, [sp, #308] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2048
+; CHECK-NEXT:    str r6, [sp, #304] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4096
+; CHECK-NEXT:    str r6, [sp, #300] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8192
+; CHECK-NEXT:    str r6, [sp, #296] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16384
+; CHECK-NEXT:    str r6, [sp, #292] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32768
+; CHECK-NEXT:    str r6, [sp, #288] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #65536
+; CHECK-NEXT:    str r6, [sp, #284] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #131072
+; CHECK-NEXT:    str r6, [sp, #280] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #262144
+; CHECK-NEXT:    str r6, [sp, #276] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #524288
+; CHECK-NEXT:    str r6, [sp, #272] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1048576
+; CHECK-NEXT:    str r6, [sp, #268] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2097152
+; CHECK-NEXT:    str r6, [sp, #264] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4194304
+; CHECK-NEXT:    str r6, [sp, #260] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8388608
+; CHECK-NEXT:    str r6, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16777216
+; CHECK-NEXT:    str r6, [sp, #252] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #33554432
+; CHECK-NEXT:    str r6, [sp, #248] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #67108864
+; CHECK-NEXT:    str r6, [sp, #244] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #134217728
+; CHECK-NEXT:    str r6, [sp, #240] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #268435456
+; CHECK-NEXT:    str r6, [sp, #236] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #536870912
+; CHECK-NEXT:    str r6, [sp, #232] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1073741824
+; CHECK-NEXT:    str r6, [sp, #228] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #-2147483648
+; CHECK-NEXT:    rbit r3, r3
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #268435456
+; CHECK-NEXT:    eor.w lr, r7, r0
+; CHECK-NEXT:    rbit r0, r12
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4
+; CHECK-NEXT:    str r5, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8
+; CHECK-NEXT:    str r5, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16
+; CHECK-NEXT:    str r5, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #32
+; CHECK-NEXT:    str r5, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #64
+; CHECK-NEXT:    str r5, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #128
+; CHECK-NEXT:    str r5, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #256
+; CHECK-NEXT:    str r5, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #512
+; CHECK-NEXT:    str r5, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #1024
+; CHECK-NEXT:    str r5, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #2048
+; CHECK-NEXT:    str r5, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4096
+; CHECK-NEXT:    str r5, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8192
+; CHECK-NEXT:    str r5, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16384
+; CHECK-NEXT:    str r5, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #32768
+; CHECK-NEXT:    str r5, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #65536
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #131072
+; CHECK-NEXT:    str r5, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #262144
+; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #524288
+; CHECK-NEXT:    str r5, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #1048576
+; CHECK-NEXT:    str r5, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #2097152
+; CHECK-NEXT:    str r5, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4194304
+; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8388608
+; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16777216
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #33554432
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r9
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r8
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #536870912
+; CHECK-NEXT:    mul r11, r0, r5
+; CHECK-NEXT:    eor.w r4, r4, r11
+; CHECK-NEXT:    and r11, r2, #1073741824
+; CHECK-NEXT:    and r2, r2, #-2147483648
+; CHECK-NEXT:    mul r10, r0, r11
+; CHECK-NEXT:    eor.w r4, r4, r10
+; CHECK-NEXT:    and r10, r3, #1073741824
+; CHECK-NEXT:    rbit r4, r4
+; CHECK-NEXT:    eor.w r7, lr, r4, lsr #1
+; CHECK-NEXT:    str r7, [sp, #224] @ 4-byte Spill
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    str r7, [sp, #220] @ 4-byte Spill
+; CHECK-NEXT:    and lr, r3, #-2147483648
+; CHECK-NEXT:    mul r4, r0, r7
+; CHECK-NEXT:    and r7, r3, #1
+; CHECK-NEXT:    str r7, [sp, #216] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4
+; CHECK-NEXT:    str r4, [sp, #212] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8
+; CHECK-NEXT:    str r4, [sp, #208] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16
+; CHECK-NEXT:    str r4, [sp, #204] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #32
+; CHECK-NEXT:    str r4, [sp, #200] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #64
+; CHECK-NEXT:    str r4, [sp, #196] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #128
+; CHECK-NEXT:    str r4, [sp, #192] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #256
+; CHECK-NEXT:    str r4, [sp, #188] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #512
+; CHECK-NEXT:    str r4, [sp, #184] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #1024
+; CHECK-NEXT:    str r4, [sp, #180] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #2048
+; CHECK-NEXT:    str r4, [sp, #176] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4096
+; CHECK-NEXT:    str r4, [sp, #172] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8192
+; CHECK-NEXT:    str r4, [sp, #168] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16384
+; CHECK-NEXT:    str r4, [sp, #164] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #32768
+; CHECK-NEXT:    str r4, [sp, #160] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #65536
+; CHECK-NEXT:    str r4, [sp, #156] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #131072
+; CHECK-NEXT:    str r4, [sp, #152] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #262144
+; CHECK-NEXT:    str r4, [sp, #148] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #524288
+; CHECK-NEXT:    str r4, [sp, #144] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #1048576
+; CHECK-NEXT:    str r4, [sp, #140] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #2097152
+; CHECK-NEXT:    str r4, [sp, #136] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4194304
+; CHECK-NEXT:    str r4, [sp, #132] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8388608
+; CHECK-NEXT:    str r4, [sp, #128] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16777216
+; CHECK-NEXT:    str r4, [sp, #124] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #33554432
+; CHECK-NEXT:    str r4, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #67108864
+; CHECK-NEXT:    str r4, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #134217728
+; CHECK-NEXT:    str r4, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #268435456
+; CHECK-NEXT:    str r4, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #536870912
+; CHECK-NEXT:    str r4, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    mul r4, r0, r10
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    eor.w r3, r4, r0
+; CHECK-NEXT:    ldr r4, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    rbit r0, r1
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r0, r9
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r0, r8
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    eors r4, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r11
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    ldr r4, [sp, #344] @ 4-byte Reload
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #348] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #340] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #336] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #332] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #328] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #324] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #320] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #316] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #312] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #308] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #304] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #300] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #296] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #292] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #288] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #284] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #280] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #276] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #272] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #268] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #264] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #260] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #252] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #248] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #244] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #240] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #236] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #232] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #228] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r4, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #216] @ 4-byte Reload
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    eor.w r1, r2, r1, lsr #1
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    rbit r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #220] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r0, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #212] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #208] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #204] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #200] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #196] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #192] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #188] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #184] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #180] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #176] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #172] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #168] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #164] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #160] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #156] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #152] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #148] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #144] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #140] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #136] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #132] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #128] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #124] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    rbit r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r2, r3, #1
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    mul r0, r12, r0
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #360] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #352] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #224] @ 4-byte Reload
+; CHECK-NEXT:    add sp, #480
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %zextx = zext i64 %x to i128
+  %zexty = zext i64 %y to i128
+  %a = call i128 @llvm.clmul.i128(i128 %zextx, i128 %zexty)
+  ret i128 %a
+}
+
+define <16 x i8> @clmul_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: clmul_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i8 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i8 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q2, q1, q2
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i8 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x80
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i8 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a = call <16 x i8> @llvm.clmul.v16i8(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i8> %a
+}
+
+define <8 x i8> @clmul_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: clmul_v8i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i16 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q2, q1, q2
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i16 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a = call <8 x i8> @llvm.clmul.v8i8(<8 x i8> %x, <8 x i8> %y)
+  ret <8 x i8> %a
+}
+
+define <8 x i16> @clmul_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: clmul_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i16 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q2, q1, q2
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i16 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %x, <8 x i16> %y)
+  ret <8 x i16> %a
+}
+
+define <4 x i16> @clmul_v4i16(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: clmul_v4i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a = call <4 x i16> @llvm.clmul.v4i16(<4 x i16> %x, <4 x i16> %y)
+  ret <4 x i16> %a
+}
+
+define <4 x i32> @clmul_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: clmul_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %x, <4 x i32> %y)
+  ret <4 x i32> %a
+}
+
+; FIXME
+;define <2 x i32> @clmul_v2i32(<2 x i32> %x, <2 x i32> %y) {
+;  %a = call <2 x i32> @llvm.clmul.v2i32(<2 x i32> %x, <2 x i32> %y)
+;  ret <2 x i32> %a
+;}
+
+define <2 x i64> @clmul_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: clmul_v2i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    adr.w r12, .LCPI13_65
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vldrw.u32 q1, [r12]
+; CHECK-NEXT:    add.w r12, sp, #40
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    lsll r6, r7, #1
+; CHECK-NEXT:    vmov r12, s4
+; CHECK-NEXT:    vmov r4, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    mov.w r12, #0
+; CHECK-NEXT:    csetm lr, eq
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    bfi r5, lr, #0, #8
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    lsll r4, r5, #1
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_66
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    vpsel q2, q1, q2
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #2
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    vmov d6, r0, r1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    vmov d7, r2, r3
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    adr.w r7, .LCPI13_67
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #2
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_68
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_69
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #4
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #4
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_70
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #5
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #5
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_71
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #6
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #6
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_72
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #7
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #7
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_73
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #8
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #8
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_74
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #9
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #9
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_10
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #10
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #10
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_11
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #11
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #11
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_12
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #12
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #12
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_13
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #13
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #13
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_14
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #14
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #14
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_15
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #15
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #15
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_16
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #16
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #16
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_17
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #17
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #17
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_18
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #18
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #18
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_19
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #19
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #19
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_20
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #20
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #20
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_21
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #21
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #21
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_22
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #22
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #22
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_23
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #23
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #23
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_24
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #24
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #24
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_25
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #25
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #25
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_26
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #26
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #26
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_27
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #27
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #27
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_28
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #28
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #28
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_29
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #29
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #29
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_30
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    lsll r4, r5, #30
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r7, #30
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI13_31
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    lsll r4, r3, #31
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r6, r7, #8, #8
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsll r6, r1, #31
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r1, r3
+; CHECK-NEXT:    adr.w r1, .LCPI13_32
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q3, q2, q3
+; CHECK-NEXT:    vldrw.u32 q2, [r1]
+; CHECK-NEXT:    vand q2, q0, q2
+; CHECK-NEXT:    vmov r1, s9
+; CHECK-NEXT:    vldr s8, .LCPI13_75
+; CHECK-NEXT:    vmov s9, r0
+; CHECK-NEXT:    vmov.f32 s10, s8
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s11
+; CHECK-NEXT:    vmov s11, r2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    adr.w r1, .LCPI13_34
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #1
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #1
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_35
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #2
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #2
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_36
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #3
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #3
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_37
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #4
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #4
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_38
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #5
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #5
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_39
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #6
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #6
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_40
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #7
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #7
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_41
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #8
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #8
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_42
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #9
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #9
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_43
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #10
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #10
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_44
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #11
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #11
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_45
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #12
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_46
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #13
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #13
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_47
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #14
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #14
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_48
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    b.w .LBB13_2
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI13_75:
+; CHECK-NEXT:    .long 0x00000000 @ float 0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB13_2:
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #15
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #15
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_49
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #16
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #16
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_50
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #17
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #17
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_51
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #18
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #18
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_52
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #19
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #19
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_53
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #20
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #20
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_54
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #21
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #21
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_55
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #22
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #22
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_56
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #23
+; CHECK-NEXT:    b.w .LBB13_13
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI13_65:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.4:
+; CHECK-NEXT:  .LCPI13_66:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LCPI13_67:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.6:
+; CHECK-NEXT:  .LCPI13_68:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.7:
+; CHECK-NEXT:  .LCPI13_69:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.8:
+; CHECK-NEXT:  .LCPI13_70:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.9:
+; CHECK-NEXT:  .LCPI13_71:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.10:
+; CHECK-NEXT:  .LCPI13_72:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.11:
+; CHECK-NEXT:  .LCPI13_73:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.12:
+; CHECK-NEXT:  .LCPI13_74:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB13_13:
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #23
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_57
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #24
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #24
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_58
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #25
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #25
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_59
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #26
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #26
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_60
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #27
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #27
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_61
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #28
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #28
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr.w r1, .LCPI13_62
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #29
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #29
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr r1, .LCPI13_63
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r1, s17
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #30
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    lsls r1, r0, #30
+; CHECK-NEXT:    vmov s9, r1
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    adr r1, .LCPI13_64
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r1]
+; CHECK-NEXT:    lsls r0, r0, #31
+; CHECK-NEXT:    vand q0, q0, q4
+; CHECK-NEXT:    vmov s9, r0
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r12, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r12, r1, #8, #8
+; CHECK-NEXT:    lsls r1, r2, #31
+; CHECK-NEXT:    vmsr p0, r12
+; CHECK-NEXT:    vmov s11, r1
+; CHECK-NEXT:    vpsel q0, q1, q2
+; CHECK-NEXT:    veor q0, q3, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.14:
+; CHECK-NEXT:  .LCPI13_10:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI13_32:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:  .LCPI13_34:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:  .LCPI13_35:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:  .LCPI13_36:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:  .LCPI13_37:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:  .LCPI13_38:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:  .LCPI13_39:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:  .LCPI13_40:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:  .LCPI13_41:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:  .LCPI13_42:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:  .LCPI13_43:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:  .LCPI13_44:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:  .LCPI13_45:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:  .LCPI13_46:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:  .LCPI13_47:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:  .LCPI13_48:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:  .LCPI13_49:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:  .LCPI13_50:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:  .LCPI13_51:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:  .LCPI13_52:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:  .LCPI13_53:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:  .LCPI13_54:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:  .LCPI13_55:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:  .LCPI13_56:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:  .LCPI13_57:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:  .LCPI13_58:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:  .LCPI13_59:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:  .LCPI13_60:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:  .LCPI13_61:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:  .LCPI13_62:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:  .LCPI13_63:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:  .LCPI13_64:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+  %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %a
+}
+
+define <1 x i64> @clmul_v1i64(<1 x i64> %x, <1 x i64> %y) {
+; CHECK-LABEL: clmul_v1i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #108
+; CHECK-NEXT:    sub sp, #108
+; CHECK-NEXT:    and r7, r2, #2
+; CHECK-NEXT:    and r6, r2, #1
+; CHECK-NEXT:    str r7, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    and r11, r2, #16777216
+; CHECK-NEXT:    str r6, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    and r10, r2, #33554432
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    and r9, r2, #67108864
+; CHECK-NEXT:    and r8, r2, #134217728
+; CHECK-NEXT:    mul r5, r1, r9
+; CHECK-NEXT:    and lr, r2, #1073741824
+; CHECK-NEXT:    and r12, r2, #-2147483648
+; CHECK-NEXT:    mul r4, r1, r8
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8
+; CHECK-NEXT:    str r6, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #64
+; CHECK-NEXT:    str r6, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #128
+; CHECK-NEXT:    str r6, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #256
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #512
+; CHECK-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1024
+; CHECK-NEXT:    str r6, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2048
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4096
+; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8192
+; CHECK-NEXT:    str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16384
+; CHECK-NEXT:    str r6, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32768
+; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #65536
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #131072
+; CHECK-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #262144
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #524288
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1048576
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2097152
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4194304
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8388608
+; CHECK-NEXT:    str r6, [sp] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, r11
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #268435456
+; CHECK-NEXT:    mul r7, r1, r5
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r2, #536870912
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    mul r6, r1, r4
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r1, lr
+; CHECK-NEXT:    mul r1, r1, r12
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    and r6, r3, #2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #32
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #64
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #128
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #256
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #512
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1024
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #2048
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4096
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8192
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16384
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #32768
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #65536
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #131072
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #262144
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #524288
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1048576
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #2097152
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #4194304
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #8388608
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #16777216
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #67108864
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #134217728
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #268435456
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #536870912
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r3, #1073741824
+; CHECK-NEXT:    and r3, r3, #-2147483648
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    and r6, r2, #2
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    rbit r3, r0
+; CHECK-NEXT:    muls r6, r3, r6
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #32
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #64
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #128
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #256
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #512
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1024
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #2048
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4096
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8192
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16384
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #32768
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #65536
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #131072
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #262144
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #524288
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #1048576
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #2097152
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #4194304
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #8388608
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #16777216
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #33554432
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #67108864
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #134217728
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #268435456
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r2, #536870912
+; CHECK-NEXT:    and r2, r2, #1073741824
+; CHECK-NEXT:    muls r7, r3, r7
+; CHECK-NEXT:    muls r2, r3, r2
+; CHECK-NEXT:    ldr r3, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    eor.w r1, r1, r2, lsr #1
+; CHECK-NEXT:    ldr r2, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r11
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r9
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r8
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r5
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, r4
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    mul r3, r0, lr
+; CHECK-NEXT:    mul r0, r0, r12
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    add sp, #108
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <1 x i64> @llvm.clmul.v1i64(<1 x i64> %x, <1 x i64> %y)
+  ret <1 x i64> %a
+}
+
+define <1 x i128> @clmul_v1i128(<1 x i128> %x, <1 x i128> %y) {
+; CHECK-LABEL: clmul_v1i128:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #636
+; CHECK-NEXT:    sub.w sp, sp, #636
+; CHECK-NEXT:    ldr.w r10, [sp, #672]
+; CHECK-NEXT:    mov lr, r3
+; CHECK-NEXT:    mov r3, r1
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    and r0, r10, #2
+; CHECK-NEXT:    str r0, [sp, #528] @ 4-byte Spill
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    ldr r6, [sp, #680]
+; CHECK-NEXT:    muls r0, r2, r0
+; CHECK-NEXT:    and r2, r10, #1
+; CHECK-NEXT:    str r2, [sp, #524] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    and r4, r6, #1
+; CHECK-NEXT:    str r4, [sp, #252] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #4
+; CHECK-NEXT:    str r2, [sp, #520] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #8
+; CHECK-NEXT:    str r2, [sp, #508] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #16
+; CHECK-NEXT:    str r2, [sp, #552] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #32
+; CHECK-NEXT:    str r2, [sp, #548] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #64
+; CHECK-NEXT:    str r2, [sp, #556] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #128
+; CHECK-NEXT:    str r2, [sp, #516] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #256
+; CHECK-NEXT:    str r2, [sp, #512] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #512
+; CHECK-NEXT:    str r2, [sp, #544] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #1024
+; CHECK-NEXT:    str r2, [sp, #540] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #2048
+; CHECK-NEXT:    str r2, [sp, #536] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #4096
+; CHECK-NEXT:    str r2, [sp, #532] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #8192
+; CHECK-NEXT:    str r2, [sp, #632] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #16384
+; CHECK-NEXT:    str r2, [sp, #628] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #32768
+; CHECK-NEXT:    str r2, [sp, #624] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #65536
+; CHECK-NEXT:    str r2, [sp, #620] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #131072
+; CHECK-NEXT:    str r2, [sp, #616] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #262144
+; CHECK-NEXT:    str r2, [sp, #612] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #524288
+; CHECK-NEXT:    str r2, [sp, #608] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #1048576
+; CHECK-NEXT:    str r2, [sp, #604] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #2097152
+; CHECK-NEXT:    str r2, [sp, #600] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #4194304
+; CHECK-NEXT:    str r2, [sp, #596] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #8388608
+; CHECK-NEXT:    str r2, [sp, #592] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #16777216
+; CHECK-NEXT:    str r2, [sp, #588] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #33554432
+; CHECK-NEXT:    str r2, [sp, #584] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #67108864
+; CHECK-NEXT:    str r2, [sp, #580] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #134217728
+; CHECK-NEXT:    str r2, [sp, #576] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #268435456
+; CHECK-NEXT:    str r2, [sp, #572] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #536870912
+; CHECK-NEXT:    str r2, [sp, #568] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #1073741824
+; CHECK-NEXT:    str r2, [sp, #564] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r10, #-2147483648
+; CHECK-NEXT:    str r2, [sp, #560] @ 4-byte Spill
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    and r2, r6, #2
+; CHECK-NEXT:    str r2, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    muls r2, r1, r2
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #4
+; CHECK-NEXT:    str r4, [sp, #248] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #8
+; CHECK-NEXT:    str r4, [sp, #244] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #16
+; CHECK-NEXT:    str r4, [sp, #236] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #32
+; CHECK-NEXT:    str r4, [sp, #232] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #64
+; CHECK-NEXT:    str r4, [sp, #228] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #128
+; CHECK-NEXT:    str r4, [sp, #224] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #256
+; CHECK-NEXT:    str r4, [sp, #220] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #512
+; CHECK-NEXT:    str r4, [sp, #216] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #1024
+; CHECK-NEXT:    str r4, [sp, #212] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #2048
+; CHECK-NEXT:    str r4, [sp, #208] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #4096
+; CHECK-NEXT:    str r4, [sp, #204] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #8192
+; CHECK-NEXT:    str r4, [sp, #200] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #16384
+; CHECK-NEXT:    str r4, [sp, #196] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #32768
+; CHECK-NEXT:    str r4, [sp, #192] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #65536
+; CHECK-NEXT:    str r4, [sp, #188] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #131072
+; CHECK-NEXT:    str r4, [sp, #184] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #262144
+; CHECK-NEXT:    str r4, [sp, #180] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #524288
+; CHECK-NEXT:    str r4, [sp, #176] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #1048576
+; CHECK-NEXT:    str r4, [sp, #172] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #2097152
+; CHECK-NEXT:    str r4, [sp, #168] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #4194304
+; CHECK-NEXT:    str r4, [sp, #164] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #8388608
+; CHECK-NEXT:    str r4, [sp, #160] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #16777216
+; CHECK-NEXT:    str r4, [sp, #156] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #33554432
+; CHECK-NEXT:    str r4, [sp, #152] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #67108864
+; CHECK-NEXT:    str r4, [sp, #148] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #134217728
+; CHECK-NEXT:    str r4, [sp, #144] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #268435456
+; CHECK-NEXT:    str r4, [sp, #140] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #536870912
+; CHECK-NEXT:    str r4, [sp, #136] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #1073741824
+; CHECK-NEXT:    str r4, [sp, #132] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    and r4, r6, #-2147483648
+; CHECK-NEXT:    str r4, [sp, #128] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    str r0, [sp, #264] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #676]
+; CHECK-NEXT:    rbit r2, r1
+; CHECK-NEXT:    rbit r0, r0
+; CHECK-NEXT:    and r5, r0, #1
+; CHECK-NEXT:    and r4, r0, #2
+; CHECK-NEXT:    str r4, [sp, #240] @ 4-byte Spill
+; CHECK-NEXT:    and r9, r0, #-2147483648
+; CHECK-NEXT:    muls r4, r2, r4
+; CHECK-NEXT:    str r5, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #4
+; CHECK-NEXT:    str r5, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #8
+; CHECK-NEXT:    str r5, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #16
+; CHECK-NEXT:    str r5, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #32
+; CHECK-NEXT:    str r5, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #64
+; CHECK-NEXT:    str r5, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #128
+; CHECK-NEXT:    str r5, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #256
+; CHECK-NEXT:    str r5, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #512
+; CHECK-NEXT:    str r5, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #1024
+; CHECK-NEXT:    str r5, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #2048
+; CHECK-NEXT:    str r5, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #4096
+; CHECK-NEXT:    str r5, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #8192
+; CHECK-NEXT:    str r5, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #16384
+; CHECK-NEXT:    str r5, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #32768
+; CHECK-NEXT:    str r5, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #65536
+; CHECK-NEXT:    str r5, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #131072
+; CHECK-NEXT:    str r5, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #262144
+; CHECK-NEXT:    str r5, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #524288
+; CHECK-NEXT:    str r5, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #1048576
+; CHECK-NEXT:    str r5, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #2097152
+; CHECK-NEXT:    str r5, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #4194304
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #8388608
+; CHECK-NEXT:    str r5, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #16777216
+; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #33554432
+; CHECK-NEXT:    str r5, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #67108864
+; CHECK-NEXT:    str r5, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #134217728
+; CHECK-NEXT:    str r5, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #268435456
+; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #536870912
+; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    and r5, r0, #1073741824
+; CHECK-NEXT:    mul r0, r2, r9
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r2, r5
+; CHECK-NEXT:    rbit r5, r10
+; CHECK-NEXT:    and r10, r5, #268435456
+; CHECK-NEXT:    str.w r10, [sp, #124] @ 4-byte Spill
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    and r7, r5, #1
+; CHECK-NEXT:    eor.w r8, r4, r0
+; CHECK-NEXT:    rbit r0, r3
+; CHECK-NEXT:    and r4, r5, #2
+; CHECK-NEXT:    str r4, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    mul r11, r0, r7
+; CHECK-NEXT:    str r7, [sp, #504] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eor.w r11, r11, r4
+; CHECK-NEXT:    and r4, r5, #4
+; CHECK-NEXT:    str r4, [sp, #500] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #8
+; CHECK-NEXT:    str r4, [sp, #496] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #16
+; CHECK-NEXT:    str r4, [sp, #492] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #32
+; CHECK-NEXT:    str r4, [sp, #488] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #64
+; CHECK-NEXT:    str r4, [sp, #484] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #128
+; CHECK-NEXT:    str r4, [sp, #480] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #256
+; CHECK-NEXT:    str r4, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #512
+; CHECK-NEXT:    str r4, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #1024
+; CHECK-NEXT:    str r4, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #2048
+; CHECK-NEXT:    str r4, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #4096
+; CHECK-NEXT:    str r4, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #8192
+; CHECK-NEXT:    str r4, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #16384
+; CHECK-NEXT:    str r4, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #32768
+; CHECK-NEXT:    str r4, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #65536
+; CHECK-NEXT:    str r4, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #131072
+; CHECK-NEXT:    str r4, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #262144
+; CHECK-NEXT:    str r4, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #524288
+; CHECK-NEXT:    str r4, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #1048576
+; CHECK-NEXT:    str r4, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #2097152
+; CHECK-NEXT:    str r4, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #4194304
+; CHECK-NEXT:    str r4, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #8388608
+; CHECK-NEXT:    str r4, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #16777216
+; CHECK-NEXT:    str r4, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #33554432
+; CHECK-NEXT:    str r4, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #67108864
+; CHECK-NEXT:    str r4, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #134217728
+; CHECK-NEXT:    str r4, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #536870912
+; CHECK-NEXT:    str r4, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r10
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    and r4, r5, #1073741824
+; CHECK-NEXT:    and r5, r5, #-2147483648
+; CHECK-NEXT:    str r4, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r0, r4
+; CHECK-NEXT:    eor.w r4, r11, r7
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    ldr r5, [sp, #676]
+; CHECK-NEXT:    eor.w r8, r8, r4
+; CHECK-NEXT:    and r4, r5, #2
+; CHECK-NEXT:    str r4, [sp, #260] @ 4-byte Spill
+; CHECK-NEXT:    mul r11, r3, r4
+; CHECK-NEXT:    and r4, r5, #1
+; CHECK-NEXT:    str r4, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #4
+; CHECK-NEXT:    str r4, [sp, #380] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #8
+; CHECK-NEXT:    str r4, [sp, #376] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #16
+; CHECK-NEXT:    str r4, [sp, #372] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #32
+; CHECK-NEXT:    str r4, [sp, #368] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #64
+; CHECK-NEXT:    str r4, [sp, #364] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #128
+; CHECK-NEXT:    str r4, [sp, #360] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #256
+; CHECK-NEXT:    str r4, [sp, #356] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #512
+; CHECK-NEXT:    str r4, [sp, #352] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #1024
+; CHECK-NEXT:    str r4, [sp, #348] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #2048
+; CHECK-NEXT:    str r4, [sp, #344] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #4096
+; CHECK-NEXT:    str r4, [sp, #340] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #8192
+; CHECK-NEXT:    str r4, [sp, #336] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #16384
+; CHECK-NEXT:    str r4, [sp, #332] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #32768
+; CHECK-NEXT:    str r4, [sp, #328] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #65536
+; CHECK-NEXT:    str r4, [sp, #324] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #131072
+; CHECK-NEXT:    str r4, [sp, #320] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #262144
+; CHECK-NEXT:    str r4, [sp, #316] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #524288
+; CHECK-NEXT:    str r4, [sp, #312] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #1048576
+; CHECK-NEXT:    str r4, [sp, #308] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #2097152
+; CHECK-NEXT:    str r4, [sp, #304] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #4194304
+; CHECK-NEXT:    str r4, [sp, #300] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #8388608
+; CHECK-NEXT:    str r4, [sp, #296] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #16777216
+; CHECK-NEXT:    str r4, [sp, #292] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #33554432
+; CHECK-NEXT:    str r4, [sp, #288] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #67108864
+; CHECK-NEXT:    str r4, [sp, #284] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #134217728
+; CHECK-NEXT:    str r4, [sp, #280] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #268435456
+; CHECK-NEXT:    str r4, [sp, #276] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #536870912
+; CHECK-NEXT:    str r4, [sp, #272] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    and r4, r5, #1073741824
+; CHECK-NEXT:    ldr r5, [sp, #240] @ 4-byte Reload
+; CHECK-NEXT:    str r4, [sp, #268] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r3, r4
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eor.w r4, r11, r7
+; CHECK-NEXT:    ldr r7, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    rbit r4, r4
+; CHECK-NEXT:    ldr.w r11, [sp, #524] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eor.w r4, r8, r4, lsr #1
+; CHECK-NEXT:    rbit r4, r4
+; CHECK-NEXT:    ldr.w r8, [sp, #520] @ 4-byte Reload
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    mul r0, r0, r9
+; CHECK-NEXT:    ldr.w r9, [sp, #516] @ 4-byte Reload
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #684]
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    rbit r5, r0
+; CHECK-NEXT:    ldr r0, [sp, #264] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r4, r5, #1
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #252] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #264] @ 4-byte Spill
+; CHECK-NEXT:    ldr r0, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    str r5, [sp, #240] @ 4-byte Spill
+; CHECK-NEXT:    and r5, r7, #1
+; CHECK-NEXT:    muls r0, r3, r0
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #248] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #244] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #236] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #232] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #228] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #224] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #220] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #216] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #212] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #208] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #204] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #200] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #196] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #192] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #188] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #184] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #180] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #176] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #172] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #168] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #164] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #160] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #156] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #152] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #148] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #144] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #140] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #136] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #132] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    ldr r4, [sp, #128] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r3, r4
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    and r4, r7, #2
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #4
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #8
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #16
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #32
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #64
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #128
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #256
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #512
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #1024
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #2048
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #4096
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #8192
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #16384
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #32768
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #65536
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #131072
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #262144
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #524288
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #1048576
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #2097152
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #4194304
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #8388608
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #16777216
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #33554432
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #67108864
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #134217728
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #268435456
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #536870912
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #1073741824
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r7, #-2147483648
+; CHECK-NEXT:    ldr r7, [sp, #504] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    ldr r5, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r4
+; CHECK-NEXT:    rbit r4, r12
+; CHECK-NEXT:    muls r5, r4, r5
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #500] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #496] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #492] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #488] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #484] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #480] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    mul r7, r4, r10
+; CHECK-NEXT:    ldr.w r10, [sp, #512] @ 4-byte Reload
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    ldr r7, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r7, r4
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    rbit r7, r4
+; CHECK-NEXT:    rbit r4, r6
+; CHECK-NEXT:    and r5, r4, #2
+; CHECK-NEXT:    and r6, r4, #1
+; CHECK-NEXT:    muls r5, r2, r5
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #4
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #8
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #16
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #32
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #64
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #128
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #256
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #512
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #1024
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #2048
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #4096
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #8192
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #16384
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #32768
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #65536
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #131072
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #262144
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #524288
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #1048576
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #2097152
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #4194304
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #8388608
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #16777216
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #33554432
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #67108864
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #134217728
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #268435456
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    and r6, r4, #536870912
+; CHECK-NEXT:    and r4, r4, #1073741824
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    muls r4, r2, r4
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, lr, r11
+; CHECK-NEXT:    rbit r4, r4
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    eor.w r0, r0, r4, lsr #1
+; CHECK-NEXT:    ldr r4, [sp, #528] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    mul r0, lr, r4
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    mul r5, lr, r8
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #508] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #552] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #548] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #556] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    mul r5, lr, r9
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    mul r5, lr, r10
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #544] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #540] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #536] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #532] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #632] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #628] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #624] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #620] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #616] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #612] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #608] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #604] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #600] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #596] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #592] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #588] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #584] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #580] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #576] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #572] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #568] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #564] @ 4-byte Reload
+; CHECK-NEXT:    mul r5, lr, r5
+; CHECK-NEXT:    eors r0, r5
+; CHECK-NEXT:    ldr r5, [sp, #560] @ 4-byte Reload
+; CHECK-NEXT:    mul r7, lr, r5
+; CHECK-NEXT:    ldr r5, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w lr, [sp, #260] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    eors r7, r0
+; CHECK-NEXT:    mul r0, r12, lr
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #360] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #352] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #348] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #344] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #340] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #336] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #332] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #328] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #324] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #320] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #316] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #312] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #308] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #304] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #300] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #296] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #292] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #288] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #284] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #280] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #276] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #272] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    ldr r5, [sp, #268] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    mul r6, r12, r5
+; CHECK-NEXT:    eors r6, r0
+; CHECK-NEXT:    ldr r0, [sp, #676]
+; CHECK-NEXT:    and r0, r0, #-2147483648
+; CHECK-NEXT:    mul r5, r12, r0
+; CHECK-NEXT:    ldr.w r12, [sp, #548] @ 4-byte Reload
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r11
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w r11, [sp, #532] @ 4-byte Reload
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #240] @ 4-byte Reload
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r3, r4
+; CHECK-NEXT:    str r7, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    ldr r7, [sp, #508] @ 4-byte Reload
+; CHECK-NEXT:    ldr r4, [sp, #556] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r8
+; CHECK-NEXT:    ldr.w r8, [sp, #552] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r7
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r8
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r12
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #540] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r9
+; CHECK-NEXT:    ldr.w r9, [sp, #536] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r10
+; CHECK-NEXT:    ldr.w r10, [sp, #544] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r10
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r4
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r9
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    mul r5, r3, r11
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #632] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #628] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #624] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #620] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #616] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #612] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #608] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #604] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #600] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #596] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #592] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #588] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #584] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #580] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #576] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #572] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #568] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #564] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r3, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #560] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r5, r3
+; CHECK-NEXT:    ldr r5, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r1, lr
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #360] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #352] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #348] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #344] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #340] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #336] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #332] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #328] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #324] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #320] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #316] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #312] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #308] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #304] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #300] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #296] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #292] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #288] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #284] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #280] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #276] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #272] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #268] @ 4-byte Reload
+; CHECK-NEXT:    muls r5, r1, r5
+; CHECK-NEXT:    eors r6, r5
+; CHECK-NEXT:    ldr r5, [sp, #504] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r6
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #500] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r2, r3
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #496] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #492] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #488] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #484] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #480] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    ldr r6, [sp, #124] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    mul r6, r2, r5
+; CHECK-NEXT:    ldr r5, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    muls r2, r5, r2
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #524] @ 4-byte Reload
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    eor.w r2, r0, r2, lsr #1
+; CHECK-NEXT:    ldr r0, [sp, #528] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #520] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r7
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r8
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r12
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #556] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #516] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #512] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r10
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r4
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r9
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    mul r3, r1, r11
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #632] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #628] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #624] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #620] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #616] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #612] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #608] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #604] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #600] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #596] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #592] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #588] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #584] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #580] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #576] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #572] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #568] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #564] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    ldr r3, [sp, #560] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r3, r1
+; CHECK-NEXT:    ldr r3, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    ldr r2, [sp, #264] @ 4-byte Reload
+; CHECK-NEXT:    add.w sp, sp, #636
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %x, <1 x i128> %y)
+  ret <1 x i128> %a
+}
+
+define <8 x i16> @clmul_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: clmul_v8i16_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vmov.i16 q2, #0x2
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vmovlb.u8 q1, q1
+; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vand q2, q1, q2
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q2, q0, q2
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q1, q1, q3
+; CHECK-NEXT:    vmul.i16 q0, q0, q1
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %zextx = zext <8 x i8> %x to <8 x i16>
+  %zexty = zext <8 x i8> %y to <8 x i16>
+  %a = call <8 x i16> @llvm.clmul.v8i16(<8 x i16> %zextx, <8 x i16> %zexty)
+  ret <8 x i16> %a
+}
+
+define <16 x i16> @clmul_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: clmul_v16i16_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vmov d0, r2, r3
+; CHECK-NEXT:    add r2, sp, #88
+; CHECK-NEXT:    vldr d1, [sp, #80]
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vldrb.u16 q6, [r2, #8]
+; CHECK-NEXT:    vmov.i16 q2, #0x1
+; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    vmov.i16 q0, #0x2
+; CHECK-NEXT:    vand q1, q6, q0
+; CHECK-NEXT:    vldrb.u16 q0, [r1, #8]
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i16 q1, q0, q1
+; CHECK-NEXT:    vmul.i16 q2, q0, q2
+; CHECK-NEXT:    veor q1, q2, q1
+; CHECK-NEXT:    vmov.i16 q2, #0x4
+; CHECK-NEXT:    vand q3, q6, q2
+; CHECK-NEXT:    vmov.i16 q2, #0x8
+; CHECK-NEXT:    vand q4, q6, q2
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q1, q1, q3
+; CHECK-NEXT:    vmul.i16 q4, q0, q4
+; CHECK-NEXT:    veor q1, q1, q4
+; CHECK-NEXT:    vmov.i16 q4, #0x10
+; CHECK-NEXT:    vand q5, q6, q4
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vmul.i16 q5, q0, q5
+; CHECK-NEXT:    veor q1, q1, q5
+; CHECK-NEXT:    vmov.i16 q5, #0x20
+; CHECK-NEXT:    vand q7, q6, q5
+; CHECK-NEXT:    vmul.i16 q7, q0, q7
+; CHECK-NEXT:    veor q1, q1, q7
+; CHECK-NEXT:    vmov.i16 q7, #0x40
+; CHECK-NEXT:    vand q2, q6, q7
+; CHECK-NEXT:    vmul.i16 q2, q0, q2
+; CHECK-NEXT:    veor q2, q1, q2
+; CHECK-NEXT:    vmov.i16 q1, #0x80
+; CHECK-NEXT:    vand q6, q6, q1
+; CHECK-NEXT:    vmul.i16 q0, q0, q6
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov.i16 q2, #0x2
+; CHECK-NEXT:    vstrw.32 q0, [r0, #16]
+; CHECK-NEXT:    vldrb.u16 q0, [r2]
+; CHECK-NEXT:    vldrb.u16 q6, [r1]
+; CHECK-NEXT:    vand q2, q0, q2
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q2, q6, q2
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vand q3, q0, q4
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vand q3, q0, q5
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vand q3, q0, q7
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vmul.i16 q3, q6, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmul.i16 q0, q6, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vstrw.32 q0, [r0]
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    bx lr
+  %zextx = zext <16 x i8> %x to <16 x i16>
+  %zexty = zext <16 x i8> %y to <16 x i16>
+  %a = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %zextx, <16 x i16> %zexty)
+  ret <16 x i16> %a
+}
+
+define <4 x i32> @clmul_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: clmul_v4i32_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmovlb.u16 q1, q1
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %zextx = zext <4 x i16> %x to <4 x i32>
+  %zexty = zext <4 x i16> %y to <4 x i32>
+  %a = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %zextx, <4 x i32> %zexty)
+  ret <4 x i32> %a
+}
+
+define <8 x i32> @clmul_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: clmul_v8i32_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vldr d1, [sp, #80]
+; CHECK-NEXT:    vmov d0, r2, r3
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    add r2, sp, #88
+; CHECK-NEXT:    vstrw.32 q0, [r1]
+; CHECK-NEXT:    vldrh.u32 q6, [r2, #8]
+; CHECK-NEXT:    vmov.i32 q2, #0x1
+; CHECK-NEXT:    vmov.i32 q5, #0x2
+; CHECK-NEXT:    vldrh.u32 q0, [r1, #8]
+; CHECK-NEXT:    vand q1, q6, q5
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmov.i32 q4, #0x1000
+; CHECK-NEXT:    vmul.i32 q1, q0, q1
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q2, q1
+; CHECK-NEXT:    vmov.i32 q2, #0x4
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x8
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x10
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x20
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x40
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x80
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x100
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x200
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x400
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x800
+; CHECK-NEXT:    vand q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vand q2, q6, q4
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vand q2, q6, q3
+; CHECK-NEXT:    vmul.i32 q2, q0, q2
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vmov.i32 q2, #0x4000
+; CHECK-NEXT:    vand q7, q6, q2
+; CHECK-NEXT:    vmul.i32 q7, q0, q7
+; CHECK-NEXT:    veor q7, q1, q7
+; CHECK-NEXT:    vmov.i32 q1, #0x8000
+; CHECK-NEXT:    vand q6, q6, q1
+; CHECK-NEXT:    vmul.i32 q0, q0, q6
+; CHECK-NEXT:    veor q0, q7, q0
+; CHECK-NEXT:    vstrw.32 q0, [r0, #16]
+; CHECK-NEXT:    vldrh.u32 q0, [r2]
+; CHECK-NEXT:    vldrh.u32 q6, [r1]
+; CHECK-NEXT:    vand q7, q0, q5
+; CHECK-NEXT:    vmov.i32 q5, #0x1
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    vmul.i32 q5, q6, q5
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x4
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    vmul.i32 q4, q6, q4
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x8
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vand q2, q0, q2
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    vmul.i32 q3, q6, q3
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x10
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q2, q6, q2
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x20
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x40
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x80
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x100
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x200
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x400
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    vmov.i32 q7, #0x800
+; CHECK-NEXT:    vand q7, q0, q7
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vmul.i32 q7, q6, q7
+; CHECK-NEXT:    vmul.i32 q0, q6, q0
+; CHECK-NEXT:    veor q5, q5, q7
+; CHECK-NEXT:    veor q4, q5, q4
+; CHECK-NEXT:    veor q3, q4, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vstrw.32 q0, [r0]
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    bx lr
+  %zextx = zext <8 x i16> %x to <8 x i32>
+  %zexty = zext <8 x i16> %y to <8 x i32>
+  %a = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %zextx, <8 x i32> %zexty)
+  ret <8 x i32> %a
+}
+
+define <2 x i64> @clmul_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: clmul_v2i64_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    adr.w r5, .LCPI20_0
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    vldrw.u32 q1, [r5]
+; CHECK-NEXT:    add r5, sp, #40
+; CHECK-NEXT:    vldrw.u32 q0, [r5]
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov.i64 q4, #0xffffffff
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    vmov r5, s4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    bfi r7, r5, #0, #8
+; CHECK-NEXT:    vmov r5, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    bfi r7, r5, #8, #8
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vmsr p0, r7
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    lsll r12, r5, #1
+; CHECK-NEXT:    lsll r4, r7, #1
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI20_1
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vpsel q2, q1, q2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    vmov d6, r0, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    vmov d7, r2, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q3, q3, q4
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #2
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_3
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #3
+; CHECK-NEXT:    lsll r4, r3, #3
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_4
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #4
+; CHECK-NEXT:    lsll r4, r3, #4
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #5
+; CHECK-NEXT:    lsll r4, r3, #5
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_6
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #6
+; CHECK-NEXT:    lsll r4, r3, #6
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_7
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #7
+; CHECK-NEXT:    lsll r4, r3, #7
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_8
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #8
+; CHECK-NEXT:    lsll r4, r3, #8
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_9
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #9
+; CHECK-NEXT:    lsll r4, r3, #9
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_10
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #10
+; CHECK-NEXT:    lsll r4, r3, #10
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_11
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #11
+; CHECK-NEXT:    lsll r4, r3, #11
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_12
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #12
+; CHECK-NEXT:    lsll r4, r3, #12
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_13
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #13
+; CHECK-NEXT:    lsll r4, r3, #13
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_14
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #14
+; CHECK-NEXT:    lsll r4, r3, #14
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_15
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #15
+; CHECK-NEXT:    lsll r4, r3, #15
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_16
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #16
+; CHECK-NEXT:    lsll r4, r3, #16
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_17
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #17
+; CHECK-NEXT:    lsll r4, r3, #17
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_18
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #18
+; CHECK-NEXT:    lsll r4, r3, #18
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_19
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #19
+; CHECK-NEXT:    lsll r4, r3, #19
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_20
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #20
+; CHECK-NEXT:    lsll r4, r3, #20
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_21
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #21
+; CHECK-NEXT:    lsll r4, r3, #21
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_22
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #22
+; CHECK-NEXT:    lsll r4, r3, #22
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_23
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #23
+; CHECK-NEXT:    lsll r4, r3, #23
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI20_24
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #24
+; CHECK-NEXT:    lsll r4, r3, #24
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_25
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #25
+; CHECK-NEXT:    lsll r4, r3, #25
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_26
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #26
+; CHECK-NEXT:    lsll r4, r3, #26
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_27
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #27
+; CHECK-NEXT:    lsll r4, r3, #27
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_28
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #28
+; CHECK-NEXT:    lsll r4, r3, #28
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_29
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #29
+; CHECK-NEXT:    lsll r4, r3, #29
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_30
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    lsll r0, r9, #31
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #30
+; CHECK-NEXT:    lsll r4, r3, #30
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI20_31
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    lsll r2, r1, #31
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r9, r1
+; CHECK-NEXT:    vpsel q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r9, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI20_0:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_1:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_2:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_3:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_4:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_5:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_6:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_7:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_8:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_9:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_10:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI20_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+  %zextx = zext <2 x i32> %x to <2 x i64>
+  %zexty = zext <2 x i32> %y to <2 x i64>
+  %a = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %zextx, <2 x i64> %zexty)
+  ret <2 x i64> %a
+}
+
+define <4 x i64> @clmul_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: clmul_v4i64_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    .pad #480
+; CHECK-NEXT:    sub sp, #480
+; CHECK-NEXT:    vmov d6, r2, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vmov.f32 s14, s13
+; CHECK-NEXT:    lsll r6, r5, #2
+; CHECK-NEXT:    vmov.i32 q5, #0x0
+; CHECK-NEXT:    mov.w lr, #0
+; CHECK-NEXT:    vmov r12, s14
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT:    add r3, sp, #584
+; CHECK-NEXT:    vldrw.u32 q6, [r3]
+; CHECK-NEXT:    adr.w r3, .LCPI21_32
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmov.f32 s8, s24
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vmov.f32 s10, s25
+; CHECK-NEXT:    vstrw.32 q1, [sp, #464] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #1
+; CHECK-NEXT:    lsll r6, r5, #1
+; CHECK-NEXT:    vpsel q4, q5, q0
+; CHECK-NEXT:    vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_33
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vstrw.32 q1, [sp, #448] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    adr.w r3, .LCPI21_34
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    vpsel q0, q5, q0
+; CHECK-NEXT:    vstrw.32 q1, [sp, #432] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    vmov.i64 q1, #0xffffffff
+; CHECK-NEXT:    vand q1, q3, q1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #3
+; CHECK-NEXT:    lsll r6, r5, #3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vpsel q1, q5, q1
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_35
+; CHECK-NEXT:    veor q0, q1, q0
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #416] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    veor q0, q0, q4
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #4
+; CHECK-NEXT:    lsll r6, r5, #4
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_36
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #400] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #5
+; CHECK-NEXT:    lsll r6, r5, #5
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_37
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #384] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #6
+; CHECK-NEXT:    lsll r6, r5, #6
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_38
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #368] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #7
+; CHECK-NEXT:    lsll r6, r5, #7
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_39
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #352] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #8
+; CHECK-NEXT:    lsll r6, r5, #8
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_40
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #336] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #9
+; CHECK-NEXT:    lsll r6, r5, #9
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_41
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #320] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #10
+; CHECK-NEXT:    lsll r6, r5, #10
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_42
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #304] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #11
+; CHECK-NEXT:    lsll r6, r5, #11
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_43
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #288] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #12
+; CHECK-NEXT:    lsll r6, r5, #12
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_44
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #272] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #13
+; CHECK-NEXT:    lsll r6, r5, #13
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_45
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #256] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #14
+; CHECK-NEXT:    lsll r6, r5, #14
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_14
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #240] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #15
+; CHECK-NEXT:    lsll r6, r5, #15
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_15
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #224] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #16
+; CHECK-NEXT:    lsll r6, r5, #16
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_16
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #208] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #17
+; CHECK-NEXT:    lsll r6, r5, #17
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_17
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #192] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #18
+; CHECK-NEXT:    lsll r6, r5, #18
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_18
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #176] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #19
+; CHECK-NEXT:    lsll r6, r5, #19
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_19
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #160] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #20
+; CHECK-NEXT:    lsll r6, r5, #20
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_20
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #144] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #21
+; CHECK-NEXT:    lsll r6, r5, #21
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_21
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #128] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #22
+; CHECK-NEXT:    lsll r6, r5, #22
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_22
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #112] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #23
+; CHECK-NEXT:    lsll r6, r5, #23
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_23
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #96] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #24
+; CHECK-NEXT:    lsll r6, r5, #24
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_24
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #80] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #25
+; CHECK-NEXT:    lsll r6, r5, #25
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_25
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #64] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #26
+; CHECK-NEXT:    lsll r6, r5, #26
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_26
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #27
+; CHECK-NEXT:    lsll r6, r5, #27
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_27
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #28
+; CHECK-NEXT:    lsll r6, r5, #28
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_28
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #29
+; CHECK-NEXT:    lsll r6, r5, #29
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_29
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [r3]
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vstrw.32 q1, [sp] @ 16-byte Spill
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    lsll r4, r3, #30
+; CHECK-NEXT:    lsll r6, r5, #30
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov q3[2], q3[0], r6, r4
+; CHECK-NEXT:    vmov q3[3], q3[1], r5, r3
+; CHECK-NEXT:    adr.w r3, .LCPI21_30
+; CHECK-NEXT:    vldrw.u32 q7, [r3]
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vand q1, q2, q7
+; CHECK-NEXT:    lsll r2, r5, #31
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #0, #8
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r3, eq
+; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmsr p0, r6
+; CHECK-NEXT:    lsll r12, r3, #31
+; CHECK-NEXT:    vpsel q1, q5, q3
+; CHECK-NEXT:    vmov.i64 q3, #0xffffffff
+; CHECK-NEXT:    veor q1, q0, q1
+; CHECK-NEXT:    vmov q0[2], q0[0], r2, r12
+; CHECK-NEXT:    adr.w r2, .LCPI21_31
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q2, q2, q4
+; CHECK-NEXT:    vmov r2, s8
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s10
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vpsel q0, q5, q0
+; CHECK-NEXT:    veor q0, q1, q0
+; CHECK-NEXT:    vmov.f32 s4, s26
+; CHECK-NEXT:    vstrw.32 q0, [r0]
+; CHECK-NEXT:    vldr d1, [sp, #576]
+; CHECK-NEXT:    vmov.f32 s6, s27
+; CHECK-NEXT:    vmov.f32 s0, s2
+; CHECK-NEXT:    vand q1, q1, q3
+; CHECK-NEXT:    vmov.f32 s2, s3
+; CHECK-NEXT:    vand q2, q0, q3
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #464] @ 16-byte Reload
+; CHECK-NEXT:    vmov r3, s11
+; CHECK-NEXT:    vmov r1, s9
+; CHECK-NEXT:    vand q6, q1, q3
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    vmov r12, s2
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    lsll r6, r5, #2
+; CHECK-NEXT:    lsll r4, r7, #2
+; CHECK-NEXT:    vmov q0[2], q0[0], r6, r4
+; CHECK-NEXT:    vmov r4, s24
+; CHECK-NEXT:    vmov q0[3], q0[1], r5, r7
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s26
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q3, q5, q0
+; CHECK-NEXT:    vldrw.u32 q0, [sp, #448] @ 16-byte Reload
+; CHECK-NEXT:    lsll r4, r5, #1
+; CHECK-NEXT:    vand q0, q1, q0
+; CHECK-NEXT:    vmov q6[2], q6[0], r6, r4
+; CHECK-NEXT:    vmov r4, s0
+; CHECK-NEXT:    vmov q6[3], q6[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #3
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s2
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vpsel q0, q5, q6
+; CHECK-NEXT:    vldrw.u32 q6, [sp, #432] @ 16-byte Reload
+; CHECK-NEXT:    vand q6, q1, q6
+; CHECK-NEXT:    vmov r4, s24
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s26
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #3
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    veor q0, q0, q3
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #416] @ 16-byte Reload
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #4
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #400] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #4
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #5
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #384] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #5
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #6
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #368] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #6
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #7
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #352] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #7
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #8
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #336] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #8
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #9
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #320] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #9
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #10
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #304] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #10
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #11
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #288] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #11
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #12
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #272] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #12
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #13
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #256] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #13
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #240] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #14
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #15
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #224] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #15
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #16
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #208] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #16
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #17
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #192] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #17
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #18
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #176] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #18
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #19
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #160] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #19
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #20
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    b.w .LBB21_15
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI21_32:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:  .LCPI21_33:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI21_34:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.4:
+; CHECK-NEXT:  .LCPI21_35:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LCPI21_36:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.6:
+; CHECK-NEXT:  .LCPI21_37:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.7:
+; CHECK-NEXT:  .LCPI21_38:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.8:
+; CHECK-NEXT:  .LCPI21_39:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.9:
+; CHECK-NEXT:  .LCPI21_40:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.10:
+; CHECK-NEXT:  .LCPI21_41:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.11:
+; CHECK-NEXT:  .LCPI21_42:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.12:
+; CHECK-NEXT:  .LCPI21_43:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.13:
+; CHECK-NEXT:  .LCPI21_44:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.14:
+; CHECK-NEXT:  .LCPI21_45:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB21_15:
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #144] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #20
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #21
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #128] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #21
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #22
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #112] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #22
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #23
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #96] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #23
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #24
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #80] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #24
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #25
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #64] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #25
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #26
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #48] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #26
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #27
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #27
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #28
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #28
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r6, r7, #29
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vldrw.u32 q3, [sp] @ 16-byte Reload
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #29
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    lsll r2, r1, #31
+; CHECK-NEXT:    lsll r6, r7, #30
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    vand q3, q1, q7
+; CHECK-NEXT:    vand q1, q1, q4
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    mov r4, r12
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    mov r5, r3
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    lsll r4, r5, #30
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r6, r4
+; CHECK-NEXT:    vmov r4, s12
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    lsll r12, r3, #31
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #0, #8
+; CHECK-NEXT:    vmov r4, s14
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csetm r4, eq
+; CHECK-NEXT:    bfi r5, r4, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vpsel q2, q5, q2
+; CHECK-NEXT:    veor q0, q0, q2
+; CHECK-NEXT:    vmov q2[2], q2[0], r2, r12
+; CHECK-NEXT:    vmov q2[3], q2[1], r1, r3
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi lr, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s6
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi lr, r1, #8, #8
+; CHECK-NEXT:    vmsr p0, lr
+; CHECK-NEXT:    vpsel q1, q5, q2
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    vstrw.32 q0, [r0, #16]
+; CHECK-NEXT:    add sp, #480
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.16:
+; CHECK-NEXT:  .LCPI21_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI21_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+  %zextx = zext <4 x i32> %x to <4 x i64>
+  %zexty = zext <4 x i32> %y to <4 x i64>
+  %a = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %zextx, <4 x i64> %zexty)
+  ret <4 x i64> %a
+}
+
+define <1 x i128> @clmul_v1i128_zext(<1 x i64> %x, <1 x i64> %y) {
+; CHECK-LABEL: clmul_v1i128_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #480
+; CHECK-NEXT:    sub sp, #480
+; CHECK-NEXT:    mov r12, r0
+; CHECK-NEXT:    and r0, r2, #2
+; CHECK-NEXT:    and r7, r2, #1
+; CHECK-NEXT:    str r0, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    str r7, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    and r6, r3, #1
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    str r6, [sp, #344] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4
+; CHECK-NEXT:    str r7, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8
+; CHECK-NEXT:    str r7, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16
+; CHECK-NEXT:    str r7, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #32
+; CHECK-NEXT:    str r7, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #64
+; CHECK-NEXT:    str r7, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #128
+; CHECK-NEXT:    str r7, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #256
+; CHECK-NEXT:    str r7, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #512
+; CHECK-NEXT:    str r7, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1024
+; CHECK-NEXT:    str r7, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #2048
+; CHECK-NEXT:    str r7, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4096
+; CHECK-NEXT:    str r7, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8192
+; CHECK-NEXT:    str r7, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16384
+; CHECK-NEXT:    str r7, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #32768
+; CHECK-NEXT:    str r7, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #65536
+; CHECK-NEXT:    str r7, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #131072
+; CHECK-NEXT:    str r7, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #262144
+; CHECK-NEXT:    str r7, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #524288
+; CHECK-NEXT:    str r7, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1048576
+; CHECK-NEXT:    str r7, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #2097152
+; CHECK-NEXT:    str r7, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #4194304
+; CHECK-NEXT:    str r7, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #8388608
+; CHECK-NEXT:    str r7, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #16777216
+; CHECK-NEXT:    str r7, [sp, #380] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #33554432
+; CHECK-NEXT:    str r7, [sp, #376] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #67108864
+; CHECK-NEXT:    str r7, [sp, #372] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #134217728
+; CHECK-NEXT:    str r7, [sp, #368] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #268435456
+; CHECK-NEXT:    str r7, [sp, #364] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #536870912
+; CHECK-NEXT:    str r7, [sp, #360] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #1073741824
+; CHECK-NEXT:    str r7, [sp, #356] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r2, #-2147483648
+; CHECK-NEXT:    str r7, [sp, #352] @ 4-byte Spill
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    and r4, r2, #2
+; CHECK-NEXT:    and r5, r2, #1
+; CHECK-NEXT:    str r4, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    and r9, r2, #67108864
+; CHECK-NEXT:    str r5, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    and r8, r2, #134217728
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    str r7, [sp, #348] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r12, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4
+; CHECK-NEXT:    str r6, [sp, #340] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8
+; CHECK-NEXT:    str r6, [sp, #336] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16
+; CHECK-NEXT:    str r6, [sp, #332] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32
+; CHECK-NEXT:    str r6, [sp, #328] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #64
+; CHECK-NEXT:    str r6, [sp, #324] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #128
+; CHECK-NEXT:    str r6, [sp, #320] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #256
+; CHECK-NEXT:    str r6, [sp, #316] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #512
+; CHECK-NEXT:    str r6, [sp, #312] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1024
+; CHECK-NEXT:    str r6, [sp, #308] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2048
+; CHECK-NEXT:    str r6, [sp, #304] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4096
+; CHECK-NEXT:    str r6, [sp, #300] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8192
+; CHECK-NEXT:    str r6, [sp, #296] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16384
+; CHECK-NEXT:    str r6, [sp, #292] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32768
+; CHECK-NEXT:    str r6, [sp, #288] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #65536
+; CHECK-NEXT:    str r6, [sp, #284] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #131072
+; CHECK-NEXT:    str r6, [sp, #280] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #262144
+; CHECK-NEXT:    str r6, [sp, #276] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #524288
+; CHECK-NEXT:    str r6, [sp, #272] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1048576
+; CHECK-NEXT:    str r6, [sp, #268] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2097152
+; CHECK-NEXT:    str r6, [sp, #264] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4194304
+; CHECK-NEXT:    str r6, [sp, #260] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8388608
+; CHECK-NEXT:    str r6, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16777216
+; CHECK-NEXT:    str r6, [sp, #252] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #33554432
+; CHECK-NEXT:    str r6, [sp, #248] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #67108864
+; CHECK-NEXT:    str r6, [sp, #244] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #134217728
+; CHECK-NEXT:    str r6, [sp, #240] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #268435456
+; CHECK-NEXT:    str r6, [sp, #236] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #536870912
+; CHECK-NEXT:    str r6, [sp, #232] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1073741824
+; CHECK-NEXT:    str r6, [sp, #228] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #-2147483648
+; CHECK-NEXT:    rbit r3, r3
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #268435456
+; CHECK-NEXT:    eor.w lr, r7, r0
+; CHECK-NEXT:    rbit r0, r12
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4
+; CHECK-NEXT:    str r5, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8
+; CHECK-NEXT:    str r5, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16
+; CHECK-NEXT:    str r5, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #32
+; CHECK-NEXT:    str r5, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #64
+; CHECK-NEXT:    str r5, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #128
+; CHECK-NEXT:    str r5, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #256
+; CHECK-NEXT:    str r5, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #512
+; CHECK-NEXT:    str r5, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #1024
+; CHECK-NEXT:    str r5, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #2048
+; CHECK-NEXT:    str r5, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4096
+; CHECK-NEXT:    str r5, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8192
+; CHECK-NEXT:    str r5, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16384
+; CHECK-NEXT:    str r5, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #32768
+; CHECK-NEXT:    str r5, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #65536
+; CHECK-NEXT:    str r5, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #131072
+; CHECK-NEXT:    str r5, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #262144
+; CHECK-NEXT:    str r5, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #524288
+; CHECK-NEXT:    str r5, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #1048576
+; CHECK-NEXT:    str r5, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #2097152
+; CHECK-NEXT:    str r5, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #4194304
+; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #8388608
+; CHECK-NEXT:    str r5, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #16777216
+; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #33554432
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r9
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r8
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #536870912
+; CHECK-NEXT:    mul r11, r0, r5
+; CHECK-NEXT:    eor.w r4, r4, r11
+; CHECK-NEXT:    and r11, r2, #1073741824
+; CHECK-NEXT:    and r2, r2, #-2147483648
+; CHECK-NEXT:    mul r10, r0, r11
+; CHECK-NEXT:    eor.w r4, r4, r10
+; CHECK-NEXT:    and r10, r3, #1073741824
+; CHECK-NEXT:    rbit r4, r4
+; CHECK-NEXT:    eor.w r7, lr, r4, lsr #1
+; CHECK-NEXT:    str r7, [sp, #224] @ 4-byte Spill
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    str r7, [sp, #220] @ 4-byte Spill
+; CHECK-NEXT:    and lr, r3, #-2147483648
+; CHECK-NEXT:    mul r4, r0, r7
+; CHECK-NEXT:    and r7, r3, #1
+; CHECK-NEXT:    str r7, [sp, #216] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4
+; CHECK-NEXT:    str r4, [sp, #212] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8
+; CHECK-NEXT:    str r4, [sp, #208] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16
+; CHECK-NEXT:    str r4, [sp, #204] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #32
+; CHECK-NEXT:    str r4, [sp, #200] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #64
+; CHECK-NEXT:    str r4, [sp, #196] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #128
+; CHECK-NEXT:    str r4, [sp, #192] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #256
+; CHECK-NEXT:    str r4, [sp, #188] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #512
+; CHECK-NEXT:    str r4, [sp, #184] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #1024
+; CHECK-NEXT:    str r4, [sp, #180] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #2048
+; CHECK-NEXT:    str r4, [sp, #176] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4096
+; CHECK-NEXT:    str r4, [sp, #172] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8192
+; CHECK-NEXT:    str r4, [sp, #168] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16384
+; CHECK-NEXT:    str r4, [sp, #164] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #32768
+; CHECK-NEXT:    str r4, [sp, #160] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #65536
+; CHECK-NEXT:    str r4, [sp, #156] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #131072
+; CHECK-NEXT:    str r4, [sp, #152] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #262144
+; CHECK-NEXT:    str r4, [sp, #148] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #524288
+; CHECK-NEXT:    str r4, [sp, #144] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #1048576
+; CHECK-NEXT:    str r4, [sp, #140] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #2097152
+; CHECK-NEXT:    str r4, [sp, #136] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #4194304
+; CHECK-NEXT:    str r4, [sp, #132] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #8388608
+; CHECK-NEXT:    str r4, [sp, #128] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #16777216
+; CHECK-NEXT:    str r4, [sp, #124] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #33554432
+; CHECK-NEXT:    str r4, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #67108864
+; CHECK-NEXT:    str r4, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #134217728
+; CHECK-NEXT:    str r4, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #268435456
+; CHECK-NEXT:    str r4, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r3, #536870912
+; CHECK-NEXT:    str r4, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    mul r4, r0, r10
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    eor.w r3, r4, r0
+; CHECK-NEXT:    ldr r4, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    rbit r0, r1
+; CHECK-NEXT:    muls r4, r0, r4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r0, r9
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    mul r7, r0, r8
+; CHECK-NEXT:    eors r4, r7
+; CHECK-NEXT:    eors r4, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    mul r5, r0, r11
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    eors r2, r4
+; CHECK-NEXT:    ldr r4, [sp, #344] @ 4-byte Reload
+; CHECK-NEXT:    eors r2, r3
+; CHECK-NEXT:    ldr r3, [sp, #348] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #340] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #336] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #332] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #328] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #324] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #320] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #316] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #312] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #308] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #304] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #300] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #296] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #292] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #288] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #284] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #280] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #276] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #272] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #268] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #264] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #260] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #252] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #248] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #244] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #240] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #236] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #232] @ 4-byte Reload
+; CHECK-NEXT:    muls r4, r1, r4
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    ldr r4, [sp, #228] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r4, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #216] @ 4-byte Reload
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    eor.w r1, r2, r1, lsr #1
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    rbit r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #220] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r0, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #212] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #208] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #204] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #200] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #196] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #192] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #188] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #184] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #180] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #176] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #172] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #168] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #164] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #160] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #156] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #152] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #148] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #144] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #140] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #136] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #132] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #128] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #124] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    rbit r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r2, r3, #1
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    mul r0, r12, r0
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #360] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #352] @ 4-byte Reload
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    ldr r1, [sp, #224] @ 4-byte Reload
+; CHECK-NEXT:    add sp, #480
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %zextx = zext <1 x i64> %x to <1 x i128>
+  %zexty = zext <1 x i64> %y to <1 x i128>
+  %a = call <1 x i128> @llvm.clmul.v2i128(<1 x i128> %zextx, <1 x i128> %zexty)
+  ret <1 x i128> %a
+}
+
+define <2 x i128> @clmul_v2i128_zext(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: clmul_v2i128_zext:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #612
+; CHECK-NEXT:    sub.w sp, sp, #612
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    add r0, sp, #656
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    ldrd r0, r4, [sp, #648]
+; CHECK-NEXT:    mov r12, r3
+; CHECK-NEXT:    vmov r1, lr, d1
+; CHECK-NEXT:    and r3, r1, #2
+; CHECK-NEXT:    and r5, r1, #1
+; CHECK-NEXT:    str r3, [sp, #480] @ 4-byte Spill
+; CHECK-NEXT:    and r10, r1, #1073741824
+; CHECK-NEXT:    str r5, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    and r9, r1, #-2147483648
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    mul r7, r0, r9
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #4
+; CHECK-NEXT:    str r5, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #8
+; CHECK-NEXT:    str r5, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #16
+; CHECK-NEXT:    str r5, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #32
+; CHECK-NEXT:    str r5, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #64
+; CHECK-NEXT:    str r5, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #128
+; CHECK-NEXT:    str r5, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #256
+; CHECK-NEXT:    str r5, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #512
+; CHECK-NEXT:    str r5, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #1024
+; CHECK-NEXT:    str r5, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #2048
+; CHECK-NEXT:    str r5, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #4096
+; CHECK-NEXT:    str r5, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #8192
+; CHECK-NEXT:    str r5, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #16384
+; CHECK-NEXT:    str r5, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #32768
+; CHECK-NEXT:    str r5, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #65536
+; CHECK-NEXT:    str r5, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #131072
+; CHECK-NEXT:    str r5, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #262144
+; CHECK-NEXT:    str r5, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #524288
+; CHECK-NEXT:    str r5, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #1048576
+; CHECK-NEXT:    str r5, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #2097152
+; CHECK-NEXT:    str r5, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #4194304
+; CHECK-NEXT:    str r5, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #8388608
+; CHECK-NEXT:    str r5, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #16777216
+; CHECK-NEXT:    str r5, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #33554432
+; CHECK-NEXT:    str r5, [sp, #380] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #67108864
+; CHECK-NEXT:    str r5, [sp, #376] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #134217728
+; CHECK-NEXT:    str r5, [sp, #372] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #268435456
+; CHECK-NEXT:    str r5, [sp, #368] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r1, #536870912
+; CHECK-NEXT:    str r5, [sp, #364] @ 4-byte Spill
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    mul r5, r0, r10
+; CHECK-NEXT:    eors r5, r3
+; CHECK-NEXT:    mov r3, r6
+; CHECK-NEXT:    eors r5, r7
+; CHECK-NEXT:    str r5, [r6, #16]
+; CHECK-NEXT:    vmov r8, r5, d0
+; CHECK-NEXT:    and r6, r8, #2
+; CHECK-NEXT:    str r6, [sp, #604] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #1
+; CHECK-NEXT:    str r6, [sp, #600] @ 4-byte Spill
+; CHECK-NEXT:    mul r11, r2, r6
+; CHECK-NEXT:    and r6, r8, #4
+; CHECK-NEXT:    str r6, [sp, #596] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r2, r6
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    eor.w r11, r11, r6
+; CHECK-NEXT:    and r6, r8, #8
+; CHECK-NEXT:    str r6, [sp, #592] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #16
+; CHECK-NEXT:    str r6, [sp, #588] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #32
+; CHECK-NEXT:    str r6, [sp, #584] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #64
+; CHECK-NEXT:    str r6, [sp, #580] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #128
+; CHECK-NEXT:    str r6, [sp, #576] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #256
+; CHECK-NEXT:    str r6, [sp, #572] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #512
+; CHECK-NEXT:    str r6, [sp, #568] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #1024
+; CHECK-NEXT:    str r6, [sp, #564] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #2048
+; CHECK-NEXT:    str r6, [sp, #560] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #4096
+; CHECK-NEXT:    str r6, [sp, #556] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #8192
+; CHECK-NEXT:    str r6, [sp, #552] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #16384
+; CHECK-NEXT:    str r6, [sp, #548] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #32768
+; CHECK-NEXT:    str r6, [sp, #544] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #65536
+; CHECK-NEXT:    str r6, [sp, #540] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #131072
+; CHECK-NEXT:    str r6, [sp, #536] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #262144
+; CHECK-NEXT:    str r6, [sp, #532] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #524288
+; CHECK-NEXT:    str r6, [sp, #528] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #1048576
+; CHECK-NEXT:    str r6, [sp, #524] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #2097152
+; CHECK-NEXT:    str r6, [sp, #520] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #4194304
+; CHECK-NEXT:    str r6, [sp, #516] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #8388608
+; CHECK-NEXT:    str r6, [sp, #512] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #16777216
+; CHECK-NEXT:    str r6, [sp, #508] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #33554432
+; CHECK-NEXT:    str r6, [sp, #504] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #67108864
+; CHECK-NEXT:    str r6, [sp, #500] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #134217728
+; CHECK-NEXT:    str r6, [sp, #496] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #268435456
+; CHECK-NEXT:    str r6, [sp, #492] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #536870912
+; CHECK-NEXT:    str r6, [sp, #488] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #1073741824
+; CHECK-NEXT:    str r6, [sp, #484] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    and r6, r8, #-2147483648
+; CHECK-NEXT:    str r6, [sp, #356] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r11, r11, r7
+; CHECK-NEXT:    mul r7, r2, r6
+; CHECK-NEXT:    eor.w r6, r11, r7
+; CHECK-NEXT:    str r6, [r3]
+; CHECK-NEXT:    mov r11, r3
+; CHECK-NEXT:    ldr r3, [sp, #480] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r3
+; CHECK-NEXT:    mul r3, r4, r9
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r4, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eor.w r9, r6, r3
+; CHECK-NEXT:    and r3, lr, #2
+; CHECK-NEXT:    str r3, [sp, #480] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r0, r3
+; CHECK-NEXT:    and r3, lr, #1
+; CHECK-NEXT:    str r3, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #4
+; CHECK-NEXT:    str r3, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #8
+; CHECK-NEXT:    str r3, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #16
+; CHECK-NEXT:    str r3, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #32
+; CHECK-NEXT:    str r3, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #64
+; CHECK-NEXT:    str r3, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #128
+; CHECK-NEXT:    str r3, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #256
+; CHECK-NEXT:    str r3, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #512
+; CHECK-NEXT:    str r3, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #1024
+; CHECK-NEXT:    str r3, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #2048
+; CHECK-NEXT:    str r3, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #4096
+; CHECK-NEXT:    str r3, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #8192
+; CHECK-NEXT:    str r3, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #16384
+; CHECK-NEXT:    str r3, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #32768
+; CHECK-NEXT:    str r3, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #65536
+; CHECK-NEXT:    str r3, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #131072
+; CHECK-NEXT:    str r3, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #262144
+; CHECK-NEXT:    str r3, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #524288
+; CHECK-NEXT:    str r3, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #1048576
+; CHECK-NEXT:    str r3, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #2097152
+; CHECK-NEXT:    str r3, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #4194304
+; CHECK-NEXT:    str r3, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #8388608
+; CHECK-NEXT:    str r3, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #16777216
+; CHECK-NEXT:    str r3, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #33554432
+; CHECK-NEXT:    str r3, [sp, #380] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #67108864
+; CHECK-NEXT:    str r3, [sp, #376] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #134217728
+; CHECK-NEXT:    str r3, [sp, #372] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #268435456
+; CHECK-NEXT:    str r3, [sp, #368] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #536870912
+; CHECK-NEXT:    str r3, [sp, #364] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    and r3, lr, #1073741824
+; CHECK-NEXT:    str r3, [sp, #360] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r0, r3
+; CHECK-NEXT:    rbit r3, r1
+; CHECK-NEXT:    and r10, r3, #2
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, lr, #-2147483648
+; CHECK-NEXT:    rbit lr, lr
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    rbit r0, r0
+; CHECK-NEXT:    mul r1, r0, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eor.w r9, r9, r6
+; CHECK-NEXT:    and r6, r3, #1
+; CHECK-NEXT:    str r6, [sp, #232] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #4
+; CHECK-NEXT:    str r6, [sp, #228] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #8
+; CHECK-NEXT:    str r6, [sp, #224] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #16
+; CHECK-NEXT:    str r6, [sp, #220] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #32
+; CHECK-NEXT:    str r6, [sp, #216] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #64
+; CHECK-NEXT:    str r6, [sp, #212] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #128
+; CHECK-NEXT:    str r6, [sp, #208] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #256
+; CHECK-NEXT:    str r6, [sp, #204] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #512
+; CHECK-NEXT:    str r6, [sp, #200] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #1024
+; CHECK-NEXT:    str r6, [sp, #196] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #2048
+; CHECK-NEXT:    str r6, [sp, #192] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #4096
+; CHECK-NEXT:    str r6, [sp, #188] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #8192
+; CHECK-NEXT:    str r6, [sp, #184] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #16384
+; CHECK-NEXT:    str r6, [sp, #180] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #32768
+; CHECK-NEXT:    str r6, [sp, #176] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #65536
+; CHECK-NEXT:    str r6, [sp, #172] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #131072
+; CHECK-NEXT:    str r6, [sp, #168] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #262144
+; CHECK-NEXT:    str r6, [sp, #164] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #524288
+; CHECK-NEXT:    str r6, [sp, #160] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #1048576
+; CHECK-NEXT:    str r6, [sp, #156] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #2097152
+; CHECK-NEXT:    str r6, [sp, #152] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #4194304
+; CHECK-NEXT:    str r6, [sp, #148] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #8388608
+; CHECK-NEXT:    str r6, [sp, #144] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #16777216
+; CHECK-NEXT:    str r6, [sp, #140] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #33554432
+; CHECK-NEXT:    str r6, [sp, #136] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #67108864
+; CHECK-NEXT:    str r6, [sp, #132] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #134217728
+; CHECK-NEXT:    str r6, [sp, #128] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #268435456
+; CHECK-NEXT:    str r6, [sp, #124] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #536870912
+; CHECK-NEXT:    str r6, [sp, #120] @ 4-byte Spill
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, r3, #1073741824
+; CHECK-NEXT:    str r6, [sp, #116] @ 4-byte Spill
+; CHECK-NEXT:    and r3, r3, #-2147483648
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    eor.w r1, r9, r1, lsr #1
+; CHECK-NEXT:    str.w r1, [r11, #20]
+; CHECK-NEXT:    ldr r1, [sp, #604] @ 4-byte Reload
+; CHECK-NEXT:    ldr r6, [sp, #600] @ 4-byte Reload
+; CHECK-NEXT:    str.w r11, [sp, #608] @ 4-byte Spill
+; CHECK-NEXT:    mul r1, r12, r1
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #596] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #592] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #588] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #584] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #580] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #576] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #572] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #568] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #564] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #560] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #556] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #552] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #548] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #544] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #540] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #536] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #532] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #528] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #524] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #520] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #516] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #512] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #508] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #504] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #500] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #496] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #492] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #488] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #484] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eor.w r9, r1, r6
+; CHECK-NEXT:    and r1, r5, #2
+; CHECK-NEXT:    str r1, [sp, #604] @ 4-byte Spill
+; CHECK-NEXT:    mul r6, r2, r1
+; CHECK-NEXT:    and r1, r5, #1
+; CHECK-NEXT:    str r1, [sp, #600] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #4
+; CHECK-NEXT:    str r1, [sp, #596] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #8
+; CHECK-NEXT:    str r1, [sp, #592] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #16
+; CHECK-NEXT:    str r1, [sp, #588] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #32
+; CHECK-NEXT:    str r1, [sp, #584] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #64
+; CHECK-NEXT:    str r1, [sp, #580] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #128
+; CHECK-NEXT:    str r1, [sp, #576] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #256
+; CHECK-NEXT:    str r1, [sp, #572] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #512
+; CHECK-NEXT:    str r1, [sp, #568] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #1024
+; CHECK-NEXT:    str r1, [sp, #564] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #2048
+; CHECK-NEXT:    str r1, [sp, #560] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #4096
+; CHECK-NEXT:    str r1, [sp, #556] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #8192
+; CHECK-NEXT:    str r1, [sp, #552] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #16384
+; CHECK-NEXT:    str r1, [sp, #548] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #32768
+; CHECK-NEXT:    str r1, [sp, #544] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #65536
+; CHECK-NEXT:    str r1, [sp, #540] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #131072
+; CHECK-NEXT:    str r1, [sp, #536] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #262144
+; CHECK-NEXT:    str r1, [sp, #532] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #524288
+; CHECK-NEXT:    str r1, [sp, #528] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #1048576
+; CHECK-NEXT:    str r1, [sp, #524] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #2097152
+; CHECK-NEXT:    str r1, [sp, #520] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #4194304
+; CHECK-NEXT:    str r1, [sp, #516] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #8388608
+; CHECK-NEXT:    str r1, [sp, #512] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #16777216
+; CHECK-NEXT:    str r1, [sp, #508] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #33554432
+; CHECK-NEXT:    str r1, [sp, #504] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #67108864
+; CHECK-NEXT:    str r1, [sp, #500] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #134217728
+; CHECK-NEXT:    str r1, [sp, #496] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #268435456
+; CHECK-NEXT:    str r1, [sp, #492] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #536870912
+; CHECK-NEXT:    str r1, [sp, #488] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    and r1, r5, #1073741824
+; CHECK-NEXT:    str r1, [sp, #484] @ 4-byte Spill
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    mul r7, r2, r1
+; CHECK-NEXT:    rbit r1, r2
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r5, #-2147483648
+; CHECK-NEXT:    muls r7, r2, r7
+; CHECK-NEXT:    rbit r2, r8
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eor.w r9, r9, r6
+; CHECK-NEXT:    and r6, r2, #2
+; CHECK-NEXT:    str r6, [sp, #356] @ 4-byte Spill
+; CHECK-NEXT:    mul r8, r1, r6
+; CHECK-NEXT:    and r6, r2, #1
+; CHECK-NEXT:    str r6, [sp, #352] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eor.w r8, r8, r6
+; CHECK-NEXT:    and r6, r2, #4
+; CHECK-NEXT:    str r6, [sp, #348] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #8
+; CHECK-NEXT:    str r6, [sp, #344] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #16
+; CHECK-NEXT:    str r6, [sp, #340] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #32
+; CHECK-NEXT:    str r6, [sp, #336] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #64
+; CHECK-NEXT:    str r6, [sp, #332] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #128
+; CHECK-NEXT:    str r6, [sp, #328] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #256
+; CHECK-NEXT:    str r6, [sp, #324] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #512
+; CHECK-NEXT:    str r6, [sp, #320] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #1024
+; CHECK-NEXT:    str r6, [sp, #316] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #2048
+; CHECK-NEXT:    str r6, [sp, #312] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #4096
+; CHECK-NEXT:    str r6, [sp, #308] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #8192
+; CHECK-NEXT:    str r6, [sp, #304] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #16384
+; CHECK-NEXT:    str r6, [sp, #300] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #32768
+; CHECK-NEXT:    str r6, [sp, #296] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #65536
+; CHECK-NEXT:    str r6, [sp, #292] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #131072
+; CHECK-NEXT:    str r6, [sp, #288] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #262144
+; CHECK-NEXT:    str r6, [sp, #284] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #524288
+; CHECK-NEXT:    str r6, [sp, #280] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #1048576
+; CHECK-NEXT:    str r6, [sp, #276] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #2097152
+; CHECK-NEXT:    str r6, [sp, #272] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #4194304
+; CHECK-NEXT:    str r6, [sp, #268] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #8388608
+; CHECK-NEXT:    str r6, [sp, #264] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #16777216
+; CHECK-NEXT:    str r6, [sp, #260] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #33554432
+; CHECK-NEXT:    str r6, [sp, #256] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #67108864
+; CHECK-NEXT:    str r6, [sp, #252] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #134217728
+; CHECK-NEXT:    str r6, [sp, #248] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #268435456
+; CHECK-NEXT:    str r6, [sp, #244] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #536870912
+; CHECK-NEXT:    str r6, [sp, #240] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    and r6, r2, #1073741824
+; CHECK-NEXT:    str r6, [sp, #236] @ 4-byte Spill
+; CHECK-NEXT:    and r2, r2, #-2147483648
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    eor.w r6, r8, r7
+; CHECK-NEXT:    rbit r6, r6
+; CHECK-NEXT:    eor.w r6, r9, r6, lsr #1
+; CHECK-NEXT:    str.w r6, [r11, #4]
+; CHECK-NEXT:    and r6, lr, #2
+; CHECK-NEXT:    str r6, [sp, #112] @ 4-byte Spill
+; CHECK-NEXT:    and r9, lr, #536870912
+; CHECK-NEXT:    mul r8, r0, r6
+; CHECK-NEXT:    and r6, lr, #1
+; CHECK-NEXT:    str r6, [sp, #108] @ 4-byte Spill
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #4
+; CHECK-NEXT:    str r6, [sp, #104] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #8
+; CHECK-NEXT:    str r6, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #16
+; CHECK-NEXT:    str r6, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #32
+; CHECK-NEXT:    str r6, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #64
+; CHECK-NEXT:    str r6, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #128
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #256
+; CHECK-NEXT:    str r6, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #512
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #1024
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #2048
+; CHECK-NEXT:    str r6, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #4096
+; CHECK-NEXT:    str r6, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #8192
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #16384
+; CHECK-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #32768
+; CHECK-NEXT:    str r6, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #65536
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #131072
+; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #262144
+; CHECK-NEXT:    str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #524288
+; CHECK-NEXT:    str r6, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #1048576
+; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #2097152
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #4194304
+; CHECK-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #8388608
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #16777216
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #33554432
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #67108864
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #134217728
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    and r6, lr, #268435456
+; CHECK-NEXT:    str r6, [sp] @ 4-byte Spill
+; CHECK-NEXT:    eor.w r8, r8, r7
+; CHECK-NEXT:    mul r7, r0, r6
+; CHECK-NEXT:    eor.w r6, r8, r7
+; CHECK-NEXT:    and r8, lr, #1073741824
+; CHECK-NEXT:    mul r7, r0, r9
+; CHECK-NEXT:    mul r11, r0, r8
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #232] @ 4-byte Reload
+; CHECK-NEXT:    eor.w r6, r6, r11
+; CHECK-NEXT:    and r11, lr, #-2147483648
+; CHECK-NEXT:    mul r0, r0, r11
+; CHECK-NEXT:    eor.w lr, r6, r0
+; CHECK-NEXT:    rbit r0, r4
+; CHECK-NEXT:    mul r6, r0, r10
+; CHECK-NEXT:    mul r10, r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #228] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eor.w r6, r6, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #224] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #220] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #216] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #212] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #208] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #204] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #200] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #196] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #192] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #188] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #184] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #180] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #176] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #172] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #168] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #164] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #160] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #156] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #152] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #148] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #144] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #140] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #136] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #132] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #128] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #124] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #120] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #116] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #480] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    ldr r6, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    eor.w r3, r3, lr
+; CHECK-NEXT:    muls r7, r4, r7
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #380] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #376] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #372] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #368] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #364] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    ldr r6, [sp, #360] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    rbit r7, r7
+; CHECK-NEXT:    eor.w r3, r3, r7, lsr #1
+; CHECK-NEXT:    ldr r7, [sp, #108] @ 4-byte Reload
+; CHECK-NEXT:    rbit r4, r3
+; CHECK-NEXT:    ldr r3, [sp, #112] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #104] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    mul r7, r0, r9
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    mul r7, r0, r8
+; CHECK-NEXT:    mul r0, r0, r11
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    eors r0, r3
+; CHECK-NEXT:    rbit r3, r0
+; CHECK-NEXT:    ldr r0, [sp, #608] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r4, r3, #1
+; CHECK-NEXT:    strd r4, r3, [r0, #24]
+; CHECK-NEXT:    rbit r0, r5
+; CHECK-NEXT:    and r3, r0, #2
+; CHECK-NEXT:    and r7, r0, #1
+; CHECK-NEXT:    str r3, [sp, #480] @ 4-byte Spill
+; CHECK-NEXT:    and r10, r0, #33554432
+; CHECK-NEXT:    str r7, [sp, #476] @ 4-byte Spill
+; CHECK-NEXT:    muls r3, r1, r3
+; CHECK-NEXT:    and r9, r0, #67108864
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    and r8, r0, #134217728
+; CHECK-NEXT:    and lr, r0, #1073741824
+; CHECK-NEXT:    mul r6, r1, r9
+; CHECK-NEXT:    mul r5, r1, r8
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #4
+; CHECK-NEXT:    str r7, [sp, #472] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #8
+; CHECK-NEXT:    str r7, [sp, #468] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #16
+; CHECK-NEXT:    str r7, [sp, #464] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #32
+; CHECK-NEXT:    str r7, [sp, #460] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #64
+; CHECK-NEXT:    str r7, [sp, #456] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #128
+; CHECK-NEXT:    str r7, [sp, #452] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #256
+; CHECK-NEXT:    str r7, [sp, #448] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #512
+; CHECK-NEXT:    str r7, [sp, #444] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #1024
+; CHECK-NEXT:    str r7, [sp, #440] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #2048
+; CHECK-NEXT:    str r7, [sp, #436] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #4096
+; CHECK-NEXT:    str r7, [sp, #432] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #8192
+; CHECK-NEXT:    str r7, [sp, #428] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #16384
+; CHECK-NEXT:    str r7, [sp, #424] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #32768
+; CHECK-NEXT:    str r7, [sp, #420] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #65536
+; CHECK-NEXT:    str r7, [sp, #416] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #131072
+; CHECK-NEXT:    str r7, [sp, #412] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #262144
+; CHECK-NEXT:    str r7, [sp, #408] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #524288
+; CHECK-NEXT:    str r7, [sp, #404] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #1048576
+; CHECK-NEXT:    str r7, [sp, #400] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #2097152
+; CHECK-NEXT:    str r7, [sp, #396] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #4194304
+; CHECK-NEXT:    str r7, [sp, #392] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #8388608
+; CHECK-NEXT:    str r7, [sp, #388] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    and r7, r0, #16777216
+; CHECK-NEXT:    str r7, [sp, #384] @ 4-byte Spill
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    mul r7, r1, r10
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    eors r3, r6
+; CHECK-NEXT:    ldr r6, [sp, #356] @ 4-byte Reload
+; CHECK-NEXT:    eors r3, r5
+; CHECK-NEXT:    and r5, r0, #268435456
+; CHECK-NEXT:    mul r4, r1, r5
+; CHECK-NEXT:    eors r3, r4
+; CHECK-NEXT:    and r4, r0, #536870912
+; CHECK-NEXT:    mul r7, r1, r4
+; CHECK-NEXT:    eors r3, r7
+; CHECK-NEXT:    mul r7, r1, lr
+; CHECK-NEXT:    eors r7, r3
+; CHECK-NEXT:    and r3, r0, #-2147483648
+; CHECK-NEXT:    mul r0, r1, r3
+; CHECK-NEXT:    rbit r1, r12
+; CHECK-NEXT:    muls r2, r1, r2
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #352] @ 4-byte Reload
+; CHECK-NEXT:    mul r11, r1, r6
+; CHECK-NEXT:    ldr r6, [sp, #348] @ 4-byte Reload
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eor.w r7, r7, r11
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #344] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #340] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #336] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #332] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #328] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #324] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #320] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #316] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #312] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #308] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #304] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #300] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #296] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #292] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #288] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #284] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #280] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #276] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #272] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #268] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #264] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #260] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #256] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #252] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #248] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #244] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #240] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    ldr r7, [sp, #236] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #600] @ 4-byte Reload
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    ldr r2, [sp, #604] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    mul r2, r12, r2
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #596] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #592] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #588] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #584] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #580] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #576] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #572] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #568] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #564] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #560] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #556] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #552] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #548] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #544] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #540] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #536] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #532] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #528] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #524] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #520] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #516] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #512] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #508] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #504] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #500] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #496] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #492] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #488] @ 4-byte Reload
+; CHECK-NEXT:    mul r6, r12, r6
+; CHECK-NEXT:    eors r2, r6
+; CHECK-NEXT:    ldr r6, [sp, #484] @ 4-byte Reload
+; CHECK-NEXT:    mul r7, r12, r6
+; CHECK-NEXT:    eors r2, r7
+; CHECK-NEXT:    ldr r7, [sp, #476] @ 4-byte Reload
+; CHECK-NEXT:    rbit r2, r2
+; CHECK-NEXT:    eor.w r0, r0, r2, lsr #1
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    rbit r2, r0
+; CHECK-NEXT:    ldr r0, [sp, #480] @ 4-byte Reload
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #472] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #468] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #464] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #460] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #456] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #452] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #448] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #444] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #440] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #436] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #432] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #428] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #424] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #420] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #416] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #412] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #408] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #404] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #400] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #396] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #392] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #388] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    ldr r7, [sp, #384] @ 4-byte Reload
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r10
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r9
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r8
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r5
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, r4
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    mul r7, r1, lr
+; CHECK-NEXT:    muls r1, r3, r1
+; CHECK-NEXT:    eors r0, r7
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    rbit r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #608] @ 4-byte Reload
+; CHECK-NEXT:    lsrl r2, r1, #1
+; CHECK-NEXT:    strd r2, r1, [r0, #8]
+; CHECK-NEXT:    add.w sp, sp, #612
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %zextx = zext <2 x i64> %x to <2 x i128>
+  %zexty = zext <2 x i64> %y to <2 x i128>
+  %a = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %zextx, <2 x i128> %zexty)
+  ret <2 x i128> %a
+}
+
+define <16 x i8> @clmulr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECK-LABEL: clmulr_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i8 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #8
+; CHECK-NEXT:    vmov.i8 q3, #0x1
+; CHECK-NEXT:    vbrsr.8 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.8 q1, q1, r12
+; CHECK-NEXT:    vmul.i8 q2, q1, q2
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i8 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x80
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i8 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.8 q0, q0, r12
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <16 x i8> %a to <16 x i16>
+  %b.ext = zext <16 x i8> %b to <16 x i16>
+  %clmul = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %a.ext, <16 x i16> %b.ext)
+  %res.ext = lshr <16 x i16> %clmul, splat (i16 7)
+  %res = trunc <16 x i16> %res.ext to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <8 x i8> @clmulr_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
+; CHECK-LABEL: clmulr_v8i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vmov.i16 q2, #0x2
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vmovlb.u8 q1, q1
+; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vand q2, q1, q2
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q2, q0, q2
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q1, q1, q3
+; CHECK-NEXT:    vmul.i16 q0, q0, q1
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vshr.u16 q0, q0, #7
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <8 x i8> %a to <8 x i16>
+  %b.ext = zext <8 x i8> %b to <8 x i16>
+  %clmul = call <8 x i16> @llvm.clmul.v16i16(<8 x i16> %a.ext, <8 x i16> %b.ext)
+  %res.ext = lshr <8 x i16> %clmul, splat (i16 7)
+  %res = trunc <8 x i16> %res.ext to <8 x i8>
+  ret <8 x i8> %res
+}
+
+define <8 x i16> @clmulr_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; CHECK-LABEL: clmulr_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i16 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #16
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vbrsr.16 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.16 q1, q1, r12
+; CHECK-NEXT:    vmul.i16 q2, q1, q2
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i16 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.16 q0, q0, r12
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <8 x i16> %a to <8 x i32>
+  %b.ext = zext <8 x i16> %b to <8 x i32>
+  %clmul = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %a.ext, <8 x i32> %b.ext)
+  %res.ext = lshr <8 x i32> %clmul, splat (i32 15)
+  %res = trunc <8 x i32> %res.ext to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <4 x i16> @clmulr_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
+; CHECK-LABEL: clmulr_v4i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmovlb.u16 q1, q1
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vshr.u32 q0, q0, #15
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <4 x i16> %a to <4 x i32>
+  %b.ext = zext <4 x i16> %b to <4 x i32>
+  %clmul = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a.ext, <4 x i32> %b.ext)
+  %res.ext = lshr <4 x i32> %clmul, splat (i32 15)
+  %res = trunc <4 x i32> %res.ext to <4 x i16>
+  ret <4 x i16> %res
+}
+
+define <4 x i32> @clmulr_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; CHECK-LABEL: clmulr_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #32
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vbrsr.32 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.32 q1, q1, r12
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.32 q0, q0, r12
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <4 x i32> %a to <4 x i64>
+  %b.ext = zext <4 x i32> %b to <4 x i64>
+  %clmul = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %a.ext, <4 x i64> %b.ext)
+  %res.ext = lshr <4 x i64> %clmul, splat (i64 31)
+  %res = trunc <4 x i64> %res.ext to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @clmulr_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
+; CHECK-LABEL: clmulr_v2i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    adr.w r5, .LCPI29_0
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    vldrw.u32 q1, [r5]
+; CHECK-NEXT:    add r5, sp, #40
+; CHECK-NEXT:    vldrw.u32 q0, [r5]
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov.i64 q4, #0xffffffff
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    vmov r5, s4
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    bfi r7, r5, #0, #8
+; CHECK-NEXT:    vmov r5, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    bfi r7, r5, #8, #8
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vmsr p0, r7
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    lsll r12, r5, #1
+; CHECK-NEXT:    lsll r4, r7, #1
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI29_1
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vpsel q2, q1, q2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    vmov d6, r0, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    vmov d7, r2, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q3, q3, q4
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #2
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_3
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #3
+; CHECK-NEXT:    lsll r4, r3, #3
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_4
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #4
+; CHECK-NEXT:    lsll r4, r3, #4
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #5
+; CHECK-NEXT:    lsll r4, r3, #5
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_6
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #6
+; CHECK-NEXT:    lsll r4, r3, #6
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_7
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #7
+; CHECK-NEXT:    lsll r4, r3, #7
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_8
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #8
+; CHECK-NEXT:    lsll r4, r3, #8
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_9
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #9
+; CHECK-NEXT:    lsll r4, r3, #9
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_10
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #10
+; CHECK-NEXT:    lsll r4, r3, #10
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_11
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #11
+; CHECK-NEXT:    lsll r4, r3, #11
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_12
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #12
+; CHECK-NEXT:    lsll r4, r3, #12
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_13
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #13
+; CHECK-NEXT:    lsll r4, r3, #13
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_14
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #14
+; CHECK-NEXT:    lsll r4, r3, #14
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_15
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #15
+; CHECK-NEXT:    lsll r4, r3, #15
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_16
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #16
+; CHECK-NEXT:    lsll r4, r3, #16
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_17
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #17
+; CHECK-NEXT:    lsll r4, r3, #17
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_18
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #18
+; CHECK-NEXT:    lsll r4, r3, #18
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_19
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #19
+; CHECK-NEXT:    lsll r4, r3, #19
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_20
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #20
+; CHECK-NEXT:    lsll r4, r3, #20
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_21
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #21
+; CHECK-NEXT:    lsll r4, r3, #21
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_22
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #22
+; CHECK-NEXT:    lsll r4, r3, #22
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_23
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #23
+; CHECK-NEXT:    lsll r4, r3, #23
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI29_24
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #24
+; CHECK-NEXT:    lsll r4, r3, #24
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_25
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #25
+; CHECK-NEXT:    lsll r4, r3, #25
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_26
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #26
+; CHECK-NEXT:    lsll r4, r3, #26
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_27
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #27
+; CHECK-NEXT:    lsll r4, r3, #27
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_28
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #28
+; CHECK-NEXT:    lsll r4, r3, #28
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_29
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #29
+; CHECK-NEXT:    lsll r4, r3, #29
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_30
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    lsll r0, r9, #31
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #30
+; CHECK-NEXT:    lsll r4, r3, #30
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI29_31
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    lsll r2, r1, #31
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r9, r1
+; CHECK-NEXT:    vpsel q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    lsrl r0, r1, #31
+; CHECK-NEXT:    lsrl r2, r3, #31
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r9, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI29_0:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_1:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_2:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_3:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_4:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_5:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_6:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_7:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_8:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_9:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_10:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI29_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+  %a.ext = zext <2 x i32> %a to <2 x i64>
+  %b.ext = zext <2 x i32> %b to <2 x i64>
+  %clmul = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a.ext, <2 x i64> %b.ext)
+  %res.ext = lshr <2 x i64> %clmul, splat (i64 31)
+  %res = trunc <2 x i64> %res.ext to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <2 x i64> @clmulr_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; CHECK-LABEL: clmulr_v2i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    mov.w lr, #8
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q2, q1, lr
+; CHECK-NEXT:    vmov r0, r5, d4
+; CHECK-NEXT:    vmov r12, r9, d5
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    lsll r2, r1, #1
+; CHECK-NEXT:    lsll r4, r7, #1
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r2
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r1
+; CHECK-NEXT:    add r1, sp, #56
+; CHECK-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-NEXT:    adr.w r1, .LCPI30_65
+; CHECK-NEXT:    mov r7, r9
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q0, q1, lr
+; CHECK-NEXT:    vldrw.u32 q1, [r1]
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #8, #8
+; CHECK-NEXT:    adr.w r2, .LCPI30_66
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vmsr p0, r4
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s18
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r4
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    lsll r2, r7, #2
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    veor q3, q4, q3
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_67
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    vmov q4[3], q4[1], r3, r7
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    lsll r4, r7, #3
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #3
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_68
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #4
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #4
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_69
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #5
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_70
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #6
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #6
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_71
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #7
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #7
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_72
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #8
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #8
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_73
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #9
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #9
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_74
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #10
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #10
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_10
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #11
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #11
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_11
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #12
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #12
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_12
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #13
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #13
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_13
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #14
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #14
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_14
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #15
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #15
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_15
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #16
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #16
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_16
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #17
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #17
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_17
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #18
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #18
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_18
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #19
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #19
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_19
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #20
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #20
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_20
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #21
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #21
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_21
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #22
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #22
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_22
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #23
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #23
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_23
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #24
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #24
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_24
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #25
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #25
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_25
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #26
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #26
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_26
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #27
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #27
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_27
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #28
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #28
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_28
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #29
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #29
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_29
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #30
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #30
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_30
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r5, #31
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    lsll r2, r9, #31
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_31
+; CHECK-NEXT:    vmov q4[3], q4[1], r5, r9
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    adr.w r2, .LCPI30_33
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    veor q4, q3, q4
+; CHECK-NEXT:    vmov.f32 s13, s8
+; CHECK-NEXT:    vmov.f32 s15, s10
+; CHECK-NEXT:    vldrw.u32 q2, [r2]
+; CHECK-NEXT:    vldr s12, .LCPI30_75
+; CHECK-NEXT:    vand q2, q0, q2
+; CHECK-NEXT:    vmov r2, s9
+; CHECK-NEXT:    vmov.f32 s14, s12
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s11
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #1
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q2, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #1
+; CHECK-NEXT:    veor q2, q4, q2
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_34
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #2
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #2
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_35
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #3
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #3
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_36
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #4
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #4
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_37
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #5
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #5
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_38
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #6
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #6
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_39
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #7
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #7
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_40
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #8
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #8
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_41
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #9
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #9
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_42
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #10
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #10
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_43
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #11
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #11
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_44
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #12
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_45
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #13
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #13
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_46
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #14
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #14
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_47
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #15
+; CHECK-NEXT:    b.w .LBB30_2
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI30_75:
+; CHECK-NEXT:    .long 0x00000000 @ float 0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB30_2:
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #15
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_48
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #16
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #16
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_49
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #17
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #17
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_50
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #18
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #18
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_51
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #19
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #19
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_52
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #20
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #20
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_53
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #21
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #21
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_54
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #22
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #22
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_55
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #23
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #23
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_56
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    b.w .LBB30_13
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI30_65:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.4:
+; CHECK-NEXT:  .LCPI30_66:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LCPI30_67:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.6:
+; CHECK-NEXT:  .LCPI30_68:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.7:
+; CHECK-NEXT:  .LCPI30_69:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.8:
+; CHECK-NEXT:  .LCPI30_70:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.9:
+; CHECK-NEXT:  .LCPI30_71:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.10:
+; CHECK-NEXT:  .LCPI30_72:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.11:
+; CHECK-NEXT:  .LCPI30_73:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.12:
+; CHECK-NEXT:  .LCPI30_74:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB30_13:
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #24
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #24
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_57
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #25
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #25
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_58
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #26
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #26
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_59
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #27
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #27
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_60
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #28
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #28
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_61
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #29
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #29
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI30_62
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #30
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #30
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr r2, .LCPI30_63
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    lsls r0, r0, #31
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #31
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s13, r0
+; CHECK-NEXT:    adr r0, .LCPI30_64
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r0]
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    vand q0, q0, q4
+; CHECK-NEXT:    vmov r0, s1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    vmov r0, s3
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    vmsr p0, r1
+; CHECK-NEXT:    vpsel q0, q1, q3
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q0, q1, lr
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r9, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.14:
+; CHECK-NEXT:  .LCPI30_10:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI30_33:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:  .LCPI30_34:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:  .LCPI30_35:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:  .LCPI30_36:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:  .LCPI30_37:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:  .LCPI30_38:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:  .LCPI30_39:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:  .LCPI30_40:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:  .LCPI30_41:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:  .LCPI30_42:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:  .LCPI30_43:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:  .LCPI30_44:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:  .LCPI30_45:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:  .LCPI30_46:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:  .LCPI30_47:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:  .LCPI30_48:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:  .LCPI30_49:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:  .LCPI30_50:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:  .LCPI30_51:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:  .LCPI30_52:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:  .LCPI30_53:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:  .LCPI30_54:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:  .LCPI30_55:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:  .LCPI30_56:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:  .LCPI30_57:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:  .LCPI30_58:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:  .LCPI30_59:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:  .LCPI30_60:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:  .LCPI30_61:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:  .LCPI30_62:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:  .LCPI30_63:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:  .LCPI30_64:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+  %a.ext = zext <2 x i64> %a to <2 x i128>
+  %b.ext = zext <2 x i64> %b to <2 x i128>
+  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
+  %res.ext = lshr <2 x i128> %clmul, splat (i128 63)
+  %res = trunc <2 x i128> %res.ext to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <1 x i64> @clmulr_v1i64(<1 x i64> %a, <1 x i64> %b) nounwind {
+; CHECK-LABEL: clmulr_v1i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #116
+; CHECK-NEXT:    sub sp, #116
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    rbit r2, r3
+; CHECK-NEXT:    and r7, r2, #2
+; CHECK-NEXT:    and r6, r2, #1
+; CHECK-NEXT:    rbit r0, r0
+; CHECK-NEXT:    str r7, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    str r6, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    and r11, r2, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    and r10, r2, #67108864
+; CHECK-NEXT:    and r9, r2, #134217728
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    and r8, r2, #268435456
+; CHECK-NEXT:    and lr, r2, #-2147483648
+; CHECK-NEXT:    mul r5, r0, r9
+; CHECK-NEXT:    rbit r12, r12
+; CHECK-NEXT:    mul r4, r0, r8
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4
+; CHECK-NEXT:    str r6, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8
+; CHECK-NEXT:    str r6, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32
+; CHECK-NEXT:    str r6, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #64
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #128
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #256
+; CHECK-NEXT:    str r6, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #512
+; CHECK-NEXT:    str r6, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1024
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2048
+; CHECK-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4096
+; CHECK-NEXT:    str r6, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8192
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16384
+; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32768
+; CHECK-NEXT:    str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #65536
+; CHECK-NEXT:    str r6, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #131072
+; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #262144
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #524288
+; CHECK-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1048576
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2097152
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4194304
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8388608
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16777216
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r0, r11
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r0, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #536870912
+; CHECK-NEXT:    mul r7, r0, r5
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r2, #1073741824
+; CHECK-NEXT:    mul r6, r0, r4
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1
+; CHECK-NEXT:    eor.w r2, r6, r0
+; CHECK-NEXT:    rbit r0, r1
+; CHECK-NEXT:    and r6, r12, #2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #32
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #64
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #128
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #256
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #512
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1024
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #2048
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4096
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8192
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16384
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #32768
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #65536
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #131072
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #262144
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #524288
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1048576
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #2097152
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4194304
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8388608
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16777216
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #67108864
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #134217728
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #268435456
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #536870912
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1073741824
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #-2147483648
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1
+; CHECK-NEXT:    eors r2, r7
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #64
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #128
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #256
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #512
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1024
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2048
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4096
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8192
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16384
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32768
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #65536
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #131072
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #262144
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #524288
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1048576
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2097152
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4194304
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8388608
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16777216
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #33554432
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #67108864
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #134217728
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #268435456
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #536870912
+; CHECK-NEXT:    and r3, r3, #1073741824
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    muls r1, r3, r1
+; CHECK-NEXT:    ldr r3, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    eor.w r1, r2, r1, lsr #1
+; CHECK-NEXT:    rbit r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r0, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r11
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r9
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r8
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r5
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r4
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    rbit r1, r0
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    add sp, #116
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a.ext = zext <1 x i64> %a to <1 x i128>
+  %b.ext = zext <1 x i64> %b to <1 x i128>
+  %clmul = call <1 x i128> @llvm.clmul.v2i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
+  %res.ext = lshr <1 x i128> %clmul, splat (i128 63)
+  %res = trunc <1 x i128> %res.ext to <1 x i64>
+  ret <1 x i64> %res
+}
+
+define <16 x i8> @clmulh_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECK-LABEL: clmulh_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i8 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #8
+; CHECK-NEXT:    vmov.i8 q3, #0x1
+; CHECK-NEXT:    vbrsr.8 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.8 q1, q1, r12
+; CHECK-NEXT:    vmul.i8 q2, q1, q2
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i8 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i8 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i8 q3, #0x80
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i8 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.8 q0, q0, r12
+; CHECK-NEXT:    vshr.u8 q0, q0, #1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <16 x i8> %a to <16 x i16>
+  %b.ext = zext <16 x i8> %b to <16 x i16>
+  %clmul = call <16 x i16> @llvm.clmul.v16i16(<16 x i16> %a.ext, <16 x i16> %b.ext)
+  %res.ext = lshr <16 x i16> %clmul, splat (i16 8)
+  %res = trunc <16 x i16> %res.ext to <16 x i8>
+  ret <16 x i8> %res
+}
+
+define <8 x i8> @clmulh_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
+; CHECK-LABEL: clmulh_v8i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vmov.i16 q2, #0x2
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vmovlb.u8 q1, q1
+; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    vand q2, q1, q2
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q2, q0, q2
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q1, q3
+; CHECK-NEXT:    vmul.i16 q3, q0, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q1, q1, q3
+; CHECK-NEXT:    vmul.i16 q0, q0, q1
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vshr.u16 q0, q0, #8
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <8 x i8> %a to <8 x i16>
+  %b.ext = zext <8 x i8> %b to <8 x i16>
+  %clmul = call <8 x i16> @llvm.clmul.v16i16(<8 x i16> %a.ext, <8 x i16> %b.ext)
+  %res.ext = lshr <8 x i16> %clmul, splat (i16 8)
+  %res = trunc <8 x i16> %res.ext to <8 x i8>
+  ret <8 x i8> %res
+}
+
+define <8 x i16> @clmulh_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; CHECK-LABEL: clmulh_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i16 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #16
+; CHECK-NEXT:    vmov.i16 q3, #0x1
+; CHECK-NEXT:    vbrsr.16 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.16 q1, q1, r12
+; CHECK-NEXT:    vmul.i16 q2, q1, q2
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i16 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i16 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i16 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i16 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.16 q0, q0, r12
+; CHECK-NEXT:    vshr.u16 q0, q0, #1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <8 x i16> %a to <8 x i32>
+  %b.ext = zext <8 x i16> %b to <8 x i32>
+  %clmul = call <8 x i32> @llvm.clmul.v8i32(<8 x i32> %a.ext, <8 x i32> %b.ext)
+  %res.ext = lshr <8 x i32> %clmul, splat (i32 16)
+  %res = trunc <8 x i32> %res.ext to <8 x i16>
+  ret <8 x i16> %res
+}
+
+define <4 x i16> @clmulh_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
+; CHECK-LABEL: clmulh_v4i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmovlb.u16 q1, q1
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vshr.u32 q0, q0, #16
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <4 x i16> %a to <4 x i32>
+  %b.ext = zext <4 x i16> %b to <4 x i32>
+  %clmul = call <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a.ext, <4 x i32> %b.ext)
+  %res.ext = lshr <4 x i32> %clmul, splat (i32 16)
+  %res = trunc <4 x i32> %res.ext to <4 x i16>
+  ret <4 x i16> %res
+}
+
+define <4 x i32> @clmulh_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; CHECK-LABEL: clmulh_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vmov.i32 q1, #0x2
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    mov.w r12, #32
+; CHECK-NEXT:    vmov.i32 q3, #0x1
+; CHECK-NEXT:    vbrsr.32 q0, q0, r12
+; CHECK-NEXT:    vand q2, q0, q1
+; CHECK-NEXT:    vmov d3, r2, r3
+; CHECK-NEXT:    vmov d2, r0, r1
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vbrsr.32 q1, q1, r12
+; CHECK-NEXT:    vmul.i32 q2, q1, q2
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vmov.i32 q3, #0x4
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x100000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x200000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x400000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x800000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x1000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x2000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x4000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x8000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x10000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x20000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x40000000
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmul.i32 q3, q1, q3
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vmov.i32 q3, #0x80000000
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmul.i32 q0, q1, q0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vbrsr.32 q0, q0, r12
+; CHECK-NEXT:    vshr.u32 q0, q0, #1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+  %a.ext = zext <4 x i32> %a to <4 x i64>
+  %b.ext = zext <4 x i32> %b to <4 x i64>
+  %clmul = call <4 x i64> @llvm.clmul.v4i64(<4 x i64> %a.ext, <4 x i64> %b.ext)
+  %res.ext = lshr <4 x i64> %clmul, splat (i64 32)
+  %res = trunc <4 x i64> %res.ext to <4 x i32>
+  ret <4 x i32> %res
+}
+
+define <2 x i32> @clmulh_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
+; CHECK-LABEL: clmulh_v2i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    adr.w r12, .LCPI37_0
+; CHECK-NEXT:    mov.w lr, #0
+; CHECK-NEXT:    vldrw.u32 q1, [r12]
+; CHECK-NEXT:    add.w r12, sp, #32
+; CHECK-NEXT:    vldrw.u32 q0, [r12]
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    lsll r4, r7, #1
+; CHECK-NEXT:    vmov r12, s4
+; CHECK-NEXT:    vmov.i64 q4, #0xffffffff
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csetm r12, eq
+; CHECK-NEXT:    bfi lr, r12, #0, #8
+; CHECK-NEXT:    vmov r12, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    csetm r12, eq
+; CHECK-NEXT:    bfi lr, r12, #8, #8
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    lsll r12, r5, #1
+; CHECK-NEXT:    vmsr p0, lr
+; CHECK-NEXT:    vmov q2[2], q2[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q2[3], q2[1], r7, r5
+; CHECK-NEXT:    adr.w r7, .LCPI37_1
+; CHECK-NEXT:    vldrw.u32 q3, [r7]
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    vpsel q2, q1, q2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r7, s12
+; CHECK-NEXT:    vmov d6, r0, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_2
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #0, #8
+; CHECK-NEXT:    vmov r7, s14
+; CHECK-NEXT:    vmov d7, r2, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vand q3, q3, q4
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r7, eq
+; CHECK-NEXT:    bfi r5, r7, #8, #8
+; CHECK-NEXT:    vmsr p0, r5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    veor q2, q3, q2
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #2
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_3
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #3
+; CHECK-NEXT:    lsll r4, r3, #3
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_4
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #4
+; CHECK-NEXT:    lsll r4, r3, #4
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_5
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #5
+; CHECK-NEXT:    lsll r4, r3, #5
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_6
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #6
+; CHECK-NEXT:    lsll r4, r3, #6
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_7
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #7
+; CHECK-NEXT:    lsll r4, r3, #7
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_8
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #8
+; CHECK-NEXT:    lsll r4, r3, #8
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_9
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #9
+; CHECK-NEXT:    lsll r4, r3, #9
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_10
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #10
+; CHECK-NEXT:    lsll r4, r3, #10
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_11
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #11
+; CHECK-NEXT:    lsll r4, r3, #11
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_12
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #12
+; CHECK-NEXT:    lsll r4, r3, #12
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_13
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #13
+; CHECK-NEXT:    lsll r4, r3, #13
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_14
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #14
+; CHECK-NEXT:    lsll r4, r3, #14
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_15
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #15
+; CHECK-NEXT:    lsll r4, r3, #15
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_16
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #16
+; CHECK-NEXT:    lsll r4, r3, #16
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_17
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #17
+; CHECK-NEXT:    lsll r4, r3, #17
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_18
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #18
+; CHECK-NEXT:    lsll r4, r3, #18
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_19
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #19
+; CHECK-NEXT:    lsll r4, r3, #19
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_20
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #20
+; CHECK-NEXT:    lsll r4, r3, #20
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_21
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #21
+; CHECK-NEXT:    lsll r4, r3, #21
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_22
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #22
+; CHECK-NEXT:    lsll r4, r3, #22
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_23
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #23
+; CHECK-NEXT:    lsll r4, r3, #23
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr.w r1, .LCPI37_24
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #24
+; CHECK-NEXT:    lsll r4, r3, #24
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_25
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #25
+; CHECK-NEXT:    lsll r4, r3, #25
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_26
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #26
+; CHECK-NEXT:    lsll r4, r3, #26
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_27
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #27
+; CHECK-NEXT:    lsll r4, r3, #27
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_28
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #28
+; CHECK-NEXT:    lsll r4, r3, #28
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_29
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #29
+; CHECK-NEXT:    lsll r4, r3, #29
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_30
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q3, q0, q3
+; CHECK-NEXT:    vmov r1, s12
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s14
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r12, r1, #30
+; CHECK-NEXT:    lsll r4, r3, #30
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r12
+; CHECK-NEXT:    vmov q3[3], q3[1], r3, r1
+; CHECK-NEXT:    adr r1, .LCPI37_31
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q2, q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [r1]
+; CHECK-NEXT:    vand q0, q0, q3
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #0, #8
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, eq
+; CHECK-NEXT:    bfi r3, r1, #8, #8
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    lsll r2, r1, #31
+; CHECK-NEXT:    lsll r0, r3, #31
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vpsel q0, q1, q0
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vmov r0, s1
+; CHECK-NEXT:    vmov r2, s3
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI37_0:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_1:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_2:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_3:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_4:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_5:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_6:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_7:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_8:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_9:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_10:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI37_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+  %a.ext = zext <2 x i32> %a to <2 x i64>
+  %b.ext = zext <2 x i32> %b to <2 x i64>
+  %clmul = call <2 x i64> @llvm.clmul.v2i64(<2 x i64> %a.ext, <2 x i64> %b.ext)
+  %res.ext = lshr <2 x i64> %clmul, splat (i64 32)
+  %res = trunc <2 x i64> %res.ext to <2 x i32>
+  ret <2 x i32> %res
+}
+
+define <2 x i64> @clmulh_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; CHECK-LABEL: clmulh_v2i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r9, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    mov.w lr, #8
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q2, q1, lr
+; CHECK-NEXT:    vmov r0, r5, d4
+; CHECK-NEXT:    vmov r12, r9, d5
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    mov r1, r9
+; CHECK-NEXT:    lsll r2, r1, #1
+; CHECK-NEXT:    lsll r4, r7, #1
+; CHECK-NEXT:    vmov q3[2], q3[0], r4, r2
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vmov q3[3], q3[1], r7, r1
+; CHECK-NEXT:    add r1, sp, #56
+; CHECK-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-NEXT:    adr.w r1, .LCPI38_65
+; CHECK-NEXT:    mov r7, r9
+; CHECK-NEXT:    mov r3, r5
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q0, q1, lr
+; CHECK-NEXT:    vldrw.u32 q1, [r1]
+; CHECK-NEXT:    vand q1, q0, q1
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #8, #8
+; CHECK-NEXT:    adr.w r2, .LCPI38_66
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vmsr p0, r4
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vpsel q3, q1, q3
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s18
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r4, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r4
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vpsel q4, q1, q2
+; CHECK-NEXT:    lsll r2, r7, #2
+; CHECK-NEXT:    lsll r4, r3, #2
+; CHECK-NEXT:    veor q3, q4, q3
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_67
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    vmov q4[3], q4[1], r3, r7
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    lsll r4, r7, #3
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #3
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_68
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #4
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #4
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_69
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #5
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_70
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #6
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #6
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_71
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #7
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #7
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_72
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #8
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #8
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_73
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #9
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #9
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_74
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #10
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #10
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_76
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #11
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #11
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_11
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #12
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #12
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_12
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #13
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #13
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_13
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #14
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #14
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_14
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #15
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #15
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_15
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #16
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #16
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_16
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #17
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #17
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_17
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #18
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #18
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_18
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #19
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #19
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_19
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #20
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #20
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_20
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #21
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #21
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_21
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #22
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #22
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_22
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #23
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #23
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_23
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #24
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #24
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_24
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #25
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #25
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_25
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #26
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #26
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_26
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #27
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #27
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_27
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #28
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #28
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_28
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #29
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #29
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_29
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r7, #30
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    lsll r2, r3, #30
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_30
+; CHECK-NEXT:    vmov q4[3], q4[1], r7, r3
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    lsll r4, r5, #31
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    lsll r2, r9, #31
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    veor q3, q3, q4
+; CHECK-NEXT:    vmov q4[2], q4[0], r4, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_31
+; CHECK-NEXT:    vmov q4[3], q4[1], r5, r9
+; CHECK-NEXT:    vldrw.u32 q5, [r2]
+; CHECK-NEXT:    vand q5, q0, q5
+; CHECK-NEXT:    vmov r2, s20
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s22
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    adr.w r2, .LCPI38_33
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q4
+; CHECK-NEXT:    veor q4, q3, q4
+; CHECK-NEXT:    vmov.f32 s13, s8
+; CHECK-NEXT:    vmov.f32 s15, s10
+; CHECK-NEXT:    vldrw.u32 q2, [r2]
+; CHECK-NEXT:    vldr s12, .LCPI38_75
+; CHECK-NEXT:    vand q2, q0, q2
+; CHECK-NEXT:    vmov r2, s9
+; CHECK-NEXT:    vmov.f32 s14, s12
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s11
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #1
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q2, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #1
+; CHECK-NEXT:    veor q2, q4, q2
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_34
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #2
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #2
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_35
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #3
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #3
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_36
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #4
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #4
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_37
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #5
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #5
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_38
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #6
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #6
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_39
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #7
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #7
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_40
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #8
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #8
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_41
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #9
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #9
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_42
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #10
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #10
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_43
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #11
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #11
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_44
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #12
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #12
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_45
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #13
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #13
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_46
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #14
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #14
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_47
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #15
+; CHECK-NEXT:    b.w .LBB38_2
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI38_75:
+; CHECK-NEXT:    .long 0x00000000 @ float 0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB38_2:
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #15
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_48
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #16
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #16
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_49
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #17
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #17
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_50
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #18
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #18
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_51
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #19
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #19
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_52
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #20
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #20
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_53
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #21
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #21
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_54
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #22
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #22
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_55
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #23
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #23
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_56
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    b.w .LBB38_14
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI38_65:
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.4:
+; CHECK-NEXT:  .LCPI38_66:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.5:
+; CHECK-NEXT:  .LCPI38_67:
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.6:
+; CHECK-NEXT:  .LCPI38_68:
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.7:
+; CHECK-NEXT:  .LCPI38_69:
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.8:
+; CHECK-NEXT:  .LCPI38_70:
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.9:
+; CHECK-NEXT:  .LCPI38_71:
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.10:
+; CHECK-NEXT:  .LCPI38_72:
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.11:
+; CHECK-NEXT:  .LCPI38_73:
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.12:
+; CHECK-NEXT:  .LCPI38_74:
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.13:
+; CHECK-NEXT:  .LCPI38_76:
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .p2align 1
+; CHECK-NEXT:  .LBB38_14:
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #24
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #24
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_57
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #25
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #25
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_58
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #26
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #26
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_59
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #27
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #27
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_60
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #28
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #28
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_61
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #29
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #29
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr.w r2, .LCPI38_62
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #30
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    lsls r2, r0, #30
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vmov s13, r2
+; CHECK-NEXT:    adr r2, .LCPI38_63
+; CHECK-NEXT:    vldrw.u32 q4, [r2]
+; CHECK-NEXT:    lsls r0, r0, #31
+; CHECK-NEXT:    vand q4, q0, q4
+; CHECK-NEXT:    vmov r2, s17
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #0, #8
+; CHECK-NEXT:    vmov r2, s19
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, eq
+; CHECK-NEXT:    bfi r3, r2, #8, #8
+; CHECK-NEXT:    lsl.w r2, r12, #31
+; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    vpsel q4, q1, q3
+; CHECK-NEXT:    vmov s13, r0
+; CHECK-NEXT:    adr r0, .LCPI38_64
+; CHECK-NEXT:    veor q2, q2, q4
+; CHECK-NEXT:    vldrw.u32 q4, [r0]
+; CHECK-NEXT:    vmov s15, r2
+; CHECK-NEXT:    vand q0, q0, q4
+; CHECK-NEXT:    vmov r0, s1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    vmov r0, s3
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    vmsr p0, r1
+; CHECK-NEXT:    vpsel q0, q1, q3
+; CHECK-NEXT:    veor q0, q2, q0
+; CHECK-NEXT:    vrev64.8 q1, q0
+; CHECK-NEXT:    vbrsr.8 q0, q1, lr
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    lsrl r0, r1, #1
+; CHECK-NEXT:    lsrl r2, r3, #1
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r9, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.15:
+; CHECK-NEXT:  .LCPI38_11:
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_12:
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_13:
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_14:
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_15:
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_16:
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_17:
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_18:
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_19:
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_20:
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_21:
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_22:
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_23:
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_24:
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_25:
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_26:
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_27:
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_28:
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_29:
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_30:
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_31:
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:  .LCPI38_33:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:  .LCPI38_34:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:  .LCPI38_35:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4 @ 0x4
+; CHECK-NEXT:  .LCPI38_36:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8 @ 0x8
+; CHECK-NEXT:  .LCPI38_37:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16 @ 0x10
+; CHECK-NEXT:  .LCPI38_38:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32 @ 0x20
+; CHECK-NEXT:  .LCPI38_39:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 64 @ 0x40
+; CHECK-NEXT:  .LCPI38_40:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 128 @ 0x80
+; CHECK-NEXT:  .LCPI38_41:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 256 @ 0x100
+; CHECK-NEXT:  .LCPI38_42:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 512 @ 0x200
+; CHECK-NEXT:  .LCPI38_43:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1024 @ 0x400
+; CHECK-NEXT:  .LCPI38_44:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2048 @ 0x800
+; CHECK-NEXT:  .LCPI38_45:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4096 @ 0x1000
+; CHECK-NEXT:  .LCPI38_46:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8192 @ 0x2000
+; CHECK-NEXT:  .LCPI38_47:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16384 @ 0x4000
+; CHECK-NEXT:  .LCPI38_48:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 32768 @ 0x8000
+; CHECK-NEXT:  .LCPI38_49:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 65536 @ 0x10000
+; CHECK-NEXT:  .LCPI38_50:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 131072 @ 0x20000
+; CHECK-NEXT:  .LCPI38_51:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 262144 @ 0x40000
+; CHECK-NEXT:  .LCPI38_52:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 524288 @ 0x80000
+; CHECK-NEXT:  .LCPI38_53:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1048576 @ 0x100000
+; CHECK-NEXT:  .LCPI38_54:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2097152 @ 0x200000
+; CHECK-NEXT:  .LCPI38_55:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4194304 @ 0x400000
+; CHECK-NEXT:  .LCPI38_56:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 8388608 @ 0x800000
+; CHECK-NEXT:  .LCPI38_57:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 16777216 @ 0x1000000
+; CHECK-NEXT:  .LCPI38_58:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 33554432 @ 0x2000000
+; CHECK-NEXT:  .LCPI38_59:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 67108864 @ 0x4000000
+; CHECK-NEXT:  .LCPI38_60:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 134217728 @ 0x8000000
+; CHECK-NEXT:  .LCPI38_61:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 268435456 @ 0x10000000
+; CHECK-NEXT:  .LCPI38_62:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 536870912 @ 0x20000000
+; CHECK-NEXT:  .LCPI38_63:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1073741824 @ 0x40000000
+; CHECK-NEXT:  .LCPI38_64:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+  %a.ext = zext <2 x i64> %a to <2 x i128>
+  %b.ext = zext <2 x i64> %b to <2 x i128>
+  %clmul = call <2 x i128> @llvm.clmul.v2i128(<2 x i128> %a.ext, <2 x i128> %b.ext)
+  %res.ext = lshr <2 x i128> %clmul, splat (i128 64)
+  %res = trunc <2 x i128> %res.ext to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <1 x i64> @clmulh_v1i64(<1 x i64> %a, <1 x i64> %b) nounwind {
+; CHECK-LABEL: clmulh_v1i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #116
+; CHECK-NEXT:    sub sp, #116
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    rbit r2, r3
+; CHECK-NEXT:    and r7, r2, #2
+; CHECK-NEXT:    and r6, r2, #1
+; CHECK-NEXT:    rbit r0, r0
+; CHECK-NEXT:    str r7, [sp, #100] @ 4-byte Spill
+; CHECK-NEXT:    str r6, [sp, #96] @ 4-byte Spill
+; CHECK-NEXT:    and r11, r2, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    and r10, r2, #67108864
+; CHECK-NEXT:    and r9, r2, #134217728
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    and r8, r2, #268435456
+; CHECK-NEXT:    and lr, r2, #-2147483648
+; CHECK-NEXT:    mul r5, r0, r9
+; CHECK-NEXT:    rbit r12, r12
+; CHECK-NEXT:    mul r4, r0, r8
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4
+; CHECK-NEXT:    str r6, [sp, #92] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8
+; CHECK-NEXT:    str r6, [sp, #88] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16
+; CHECK-NEXT:    str r6, [sp, #84] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32
+; CHECK-NEXT:    str r6, [sp, #80] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #64
+; CHECK-NEXT:    str r6, [sp, #76] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #128
+; CHECK-NEXT:    str r6, [sp, #72] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #256
+; CHECK-NEXT:    str r6, [sp, #68] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #512
+; CHECK-NEXT:    str r6, [sp, #64] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1024
+; CHECK-NEXT:    str r6, [sp, #60] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2048
+; CHECK-NEXT:    str r6, [sp, #56] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4096
+; CHECK-NEXT:    str r6, [sp, #52] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8192
+; CHECK-NEXT:    str r6, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16384
+; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #32768
+; CHECK-NEXT:    str r6, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #65536
+; CHECK-NEXT:    str r6, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #131072
+; CHECK-NEXT:    str r6, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #262144
+; CHECK-NEXT:    str r6, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #524288
+; CHECK-NEXT:    str r6, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #1048576
+; CHECK-NEXT:    str r6, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #2097152
+; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #4194304
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #8388608
+; CHECK-NEXT:    str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r2, #16777216
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r0, r11
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    mul r6, r0, r10
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    eors r5, r6
+; CHECK-NEXT:    eors r4, r5
+; CHECK-NEXT:    and r5, r2, #536870912
+; CHECK-NEXT:    mul r7, r0, r5
+; CHECK-NEXT:    eors r7, r4
+; CHECK-NEXT:    and r4, r2, #1073741824
+; CHECK-NEXT:    mul r6, r0, r4
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1
+; CHECK-NEXT:    eor.w r2, r6, r0
+; CHECK-NEXT:    rbit r0, r1
+; CHECK-NEXT:    and r6, r12, #2
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    muls r6, r0, r6
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #32
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #64
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #128
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #256
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #512
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1024
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #2048
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4096
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8192
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16384
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #32768
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #65536
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #131072
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #262144
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #524288
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1048576
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #2097152
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #4194304
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #8388608
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #16777216
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #33554432
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #67108864
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #134217728
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #268435456
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #536870912
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #1073741824
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r6, r7
+; CHECK-NEXT:    and r7, r12, #-2147483648
+; CHECK-NEXT:    muls r7, r0, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1
+; CHECK-NEXT:    eors r2, r7
+; CHECK-NEXT:    and r7, r3, #2
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    muls r7, r1, r7
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #64
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #128
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #256
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #512
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1024
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2048
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4096
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8192
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16384
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #32768
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #65536
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #131072
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #262144
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #524288
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #1048576
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #2097152
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #4194304
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #8388608
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #16777216
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #33554432
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #67108864
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #134217728
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #268435456
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    and r6, r3, #536870912
+; CHECK-NEXT:    and r3, r3, #1073741824
+; CHECK-NEXT:    muls r6, r1, r6
+; CHECK-NEXT:    muls r1, r3, r1
+; CHECK-NEXT:    ldr r3, [sp, #96] @ 4-byte Reload
+; CHECK-NEXT:    eors r7, r6
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r7
+; CHECK-NEXT:    rbit r1, r1
+; CHECK-NEXT:    eor.w r1, r2, r1, lsr #1
+; CHECK-NEXT:    rbit r2, r1
+; CHECK-NEXT:    ldr r1, [sp, #100] @ 4-byte Reload
+; CHECK-NEXT:    muls r1, r0, r1
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #92] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #88] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #84] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #80] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #76] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #72] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #68] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #64] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #60] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #52] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    muls r3, r0, r3
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r11
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r10
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r9
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r8
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r5
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    mul r3, r0, r4
+; CHECK-NEXT:    mul r0, r0, lr
+; CHECK-NEXT:    eors r1, r3
+; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    rbit r1, r0
+; CHECK-NEXT:    lsrl r2, r1, #1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    add sp, #116
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a.ext = zext <1 x i64> %a to <1 x i128>
+  %b.ext = zext <1 x i64> %b to <1 x i128>
+  %clmul = call <1 x i128> @llvm.clmul.v1i128(<1 x i128> %a.ext, <1 x i128> %b.ext)
+  %res.ext = lshr <1 x i128> %clmul, splat (i128 64)
+  %res = trunc <1 x i128> %res.ext to <1 x i64>
+  ret <1 x i64> %res
+}