blob: ee182100fdfd89417108cb59cc839070be5d482a [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
; RUN: llc -mtriple=aarch64-linux-unknown-gnu %s -o - -mattr=+aes | FileCheck %s --check-prefixes=CHECK,CHECK-AES
define i8 @clmul_i8(i8 %x, i8 %y) {
; CHECK-LABEL: clmul_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w1
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: pmul v0.8b, v1.8b, v0.8b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%a = call i8 @llvm.clmul.i8(i8 %x, i8 %y)
ret i8 %a
}
define i16 @clmul_i16(i16 %x, i16 %y) {
; CHECK-NEON-LABEL: clmul_i16:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: and w8, w1, #0x2
; CHECK-NEON-NEXT: and w9, w1, #0x1
; CHECK-NEON-NEXT: and w10, w1, #0x4
; CHECK-NEON-NEXT: mul w8, w0, w8
; CHECK-NEON-NEXT: and w11, w1, #0x8
; CHECK-NEON-NEXT: and w12, w1, #0x10
; CHECK-NEON-NEXT: mul w9, w0, w9
; CHECK-NEON-NEXT: and w13, w1, #0x20
; CHECK-NEON-NEXT: and w15, w1, #0x80
; CHECK-NEON-NEXT: mul w10, w0, w10
; CHECK-NEON-NEXT: and w16, w1, #0x100
; CHECK-NEON-NEXT: and w2, w1, #0x800
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: and w14, w1, #0x40
; CHECK-NEON-NEXT: and w17, w1, #0x200
; CHECK-NEON-NEXT: mul w12, w0, w12
; CHECK-NEON-NEXT: eor w8, w9, w8
; CHECK-NEON-NEXT: and w9, w1, #0x1000
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: and w18, w1, #0x400
; CHECK-NEON-NEXT: mul w15, w0, w15
; CHECK-NEON-NEXT: eor w10, w10, w11
; CHECK-NEON-NEXT: and w11, w1, #0x2000
; CHECK-NEON-NEXT: mul w16, w0, w16
; CHECK-NEON-NEXT: eor w8, w8, w10
; CHECK-NEON-NEXT: and w10, w1, #0x4000
; CHECK-NEON-NEXT: mul w2, w0, w2
; CHECK-NEON-NEXT: eor w12, w12, w13
; CHECK-NEON-NEXT: and w13, w1, #0xffff8000
; CHECK-NEON-NEXT: mul w9, w0, w9
; CHECK-NEON-NEXT: mul w14, w0, w14
; CHECK-NEON-NEXT: eor w15, w15, w16
; CHECK-NEON-NEXT: mul w17, w0, w17
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: eor w9, w2, w9
; CHECK-NEON-NEXT: mul w18, w0, w18
; CHECK-NEON-NEXT: eor w12, w12, w14
; CHECK-NEON-NEXT: mul w10, w0, w10
; CHECK-NEON-NEXT: eor w14, w15, w17
; CHECK-NEON-NEXT: eor w8, w8, w12
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: eor w9, w9, w11
; CHECK-NEON-NEXT: eor w11, w14, w18
; CHECK-NEON-NEXT: eor w9, w9, w10
; CHECK-NEON-NEXT: eor w8, w8, w11
; CHECK-NEON-NEXT: eor w9, w9, w13
; CHECK-NEON-NEXT: eor w0, w8, w9
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i16:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: fmov s0, w1
; CHECK-AES-NEXT: fmov s1, w0
; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
; CHECK-AES-NEXT: fmov w0, s0
; CHECK-AES-NEXT: ret
%a = call i16 @llvm.clmul.i16(i16 %x, i16 %y)
ret i16 %a
}
define i32 @clmul_i32(i32 %x, i32 %y) {
; CHECK-NEON-LABEL: clmul_i32:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: and w8, w1, #0x2
; CHECK-NEON-NEXT: and w9, w1, #0x1
; CHECK-NEON-NEXT: and w10, w1, #0x4
; CHECK-NEON-NEXT: mul w8, w0, w8
; CHECK-NEON-NEXT: and w11, w1, #0x8
; CHECK-NEON-NEXT: and w12, w1, #0x10
; CHECK-NEON-NEXT: mul w9, w0, w9
; CHECK-NEON-NEXT: and w13, w1, #0x20
; CHECK-NEON-NEXT: and w14, w1, #0x40
; CHECK-NEON-NEXT: mul w10, w0, w10
; CHECK-NEON-NEXT: and w2, w1, #0x800
; CHECK-NEON-NEXT: and w15, w1, #0x80
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: and w16, w1, #0x100
; CHECK-NEON-NEXT: and w17, w1, #0x200
; CHECK-NEON-NEXT: mul w12, w0, w12
; CHECK-NEON-NEXT: eor w8, w9, w8
; CHECK-NEON-NEXT: and w9, w1, #0x1000
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: and w18, w1, #0x400
; CHECK-NEON-NEXT: mul w14, w0, w14
; CHECK-NEON-NEXT: eor w10, w10, w11
; CHECK-NEON-NEXT: and w11, w1, #0x2000
; CHECK-NEON-NEXT: mul w2, w0, w2
; CHECK-NEON-NEXT: eor w8, w8, w10
; CHECK-NEON-NEXT: and w10, w1, #0x4000
; CHECK-NEON-NEXT: mul w9, w0, w9
; CHECK-NEON-NEXT: eor w12, w12, w13
; CHECK-NEON-NEXT: and w13, w1, #0x8000
; CHECK-NEON-NEXT: mul w15, w0, w15
; CHECK-NEON-NEXT: eor w12, w12, w14
; CHECK-NEON-NEXT: and w14, w1, #0x10000
; CHECK-NEON-NEXT: mul w16, w0, w16
; CHECK-NEON-NEXT: eor w8, w8, w12
; CHECK-NEON-NEXT: and w12, w1, #0x20000
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: eor w9, w2, w9
; CHECK-NEON-NEXT: and w2, w1, #0x400000
; CHECK-NEON-NEXT: mul w17, w0, w17
; CHECK-NEON-NEXT: mul w10, w0, w10
; CHECK-NEON-NEXT: eor w15, w15, w16
; CHECK-NEON-NEXT: and w16, w1, #0x40000
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: eor w9, w9, w11
; CHECK-NEON-NEXT: and w11, w1, #0x800000
; CHECK-NEON-NEXT: mul w18, w0, w18
; CHECK-NEON-NEXT: eor w15, w15, w17
; CHECK-NEON-NEXT: and w17, w1, #0x80000
; CHECK-NEON-NEXT: mul w14, w0, w14
; CHECK-NEON-NEXT: eor w9, w9, w10
; CHECK-NEON-NEXT: and w10, w1, #0x1000000
; CHECK-NEON-NEXT: mul w12, w0, w12
; CHECK-NEON-NEXT: eor w9, w9, w13
; CHECK-NEON-NEXT: and w13, w1, #0x2000000
; CHECK-NEON-NEXT: mul w16, w0, w16
; CHECK-NEON-NEXT: eor w15, w15, w18
; CHECK-NEON-NEXT: and w18, w1, #0x100000
; CHECK-NEON-NEXT: mul w2, w0, w2
; CHECK-NEON-NEXT: eor w8, w8, w15
; CHECK-NEON-NEXT: and w15, w1, #0x200000
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: eor w12, w14, w12
; CHECK-NEON-NEXT: and w14, w1, #0x4000000
; CHECK-NEON-NEXT: mul w17, w0, w17
; CHECK-NEON-NEXT: eor w12, w12, w16
; CHECK-NEON-NEXT: and w16, w1, #0x8000000
; CHECK-NEON-NEXT: mul w10, w0, w10
; CHECK-NEON-NEXT: eor w8, w8, w9
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: eor w11, w2, w11
; CHECK-NEON-NEXT: and w2, w1, #0x20000000
; CHECK-NEON-NEXT: mul w18, w0, w18
; CHECK-NEON-NEXT: eor w12, w12, w17
; CHECK-NEON-NEXT: and w17, w1, #0x10000000
; CHECK-NEON-NEXT: mul w14, w0, w14
; CHECK-NEON-NEXT: eor w10, w11, w10
; CHECK-NEON-NEXT: and w11, w1, #0x40000000
; CHECK-NEON-NEXT: mul w15, w0, w15
; CHECK-NEON-NEXT: eor w10, w10, w13
; CHECK-NEON-NEXT: and w13, w1, #0x80000000
; CHECK-NEON-NEXT: mul w16, w0, w16
; CHECK-NEON-NEXT: eor w12, w12, w18
; CHECK-NEON-NEXT: mul w17, w0, w17
; CHECK-NEON-NEXT: eor w10, w10, w14
; CHECK-NEON-NEXT: mul w2, w0, w2
; CHECK-NEON-NEXT: eor w9, w12, w15
; CHECK-NEON-NEXT: mul w11, w0, w11
; CHECK-NEON-NEXT: eor w10, w10, w16
; CHECK-NEON-NEXT: eor w8, w8, w9
; CHECK-NEON-NEXT: mul w13, w0, w13
; CHECK-NEON-NEXT: eor w9, w10, w17
; CHECK-NEON-NEXT: eor w8, w8, w9
; CHECK-NEON-NEXT: eor w10, w2, w11
; CHECK-NEON-NEXT: eor w9, w10, w13
; CHECK-NEON-NEXT: eor w0, w8, w9
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i32:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: fmov s0, w0
; CHECK-AES-NEXT: fmov s1, w1
; CHECK-AES-NEXT: pmull v0.1q, v0.1d, v1.1d
; CHECK-AES-NEXT: fmov w0, s0
; CHECK-AES-NEXT: ret
%a = call i32 @llvm.clmul.i32(i32 %x, i32 %y)
ret i32 %a
}
define i64 @clmul_i64(i64 %x, i64 %y) {
; CHECK-NEON-LABEL: clmul_i64:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: sub sp, sp, #304
; CHECK-NEON-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
; CHECK-NEON-NEXT: .cfi_def_cfa_offset 304
; CHECK-NEON-NEXT: .cfi_offset w19, -8
; CHECK-NEON-NEXT: .cfi_offset w20, -16
; CHECK-NEON-NEXT: .cfi_offset w21, -24
; CHECK-NEON-NEXT: .cfi_offset w22, -32
; CHECK-NEON-NEXT: .cfi_offset w23, -40
; CHECK-NEON-NEXT: .cfi_offset w24, -48
; CHECK-NEON-NEXT: .cfi_offset w25, -56
; CHECK-NEON-NEXT: .cfi_offset w26, -64
; CHECK-NEON-NEXT: .cfi_offset w27, -72
; CHECK-NEON-NEXT: .cfi_offset w28, -80
; CHECK-NEON-NEXT: .cfi_offset w30, -88
; CHECK-NEON-NEXT: .cfi_offset w29, -96
; CHECK-NEON-NEXT: and x8, x1, #0x2
; CHECK-NEON-NEXT: mul x9, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1
; CHECK-NEON-NEXT: mul x10, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4
; CHECK-NEON-NEXT: mul x11, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8
; CHECK-NEON-NEXT: mul x13, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x10
; CHECK-NEON-NEXT: eor x9, x10, x9
; CHECK-NEON-NEXT: mul x12, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x20
; CHECK-NEON-NEXT: mul x14, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x40
; CHECK-NEON-NEXT: eor x10, x11, x13
; CHECK-NEON-NEXT: and x11, x1, #0x10000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #200] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x80
; CHECK-NEON-NEXT: mul x15, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x100
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #160] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x200
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #152] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x400
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #184] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x800
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #192] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x1000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #144] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x2000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #136] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x4000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #176] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x8000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #168] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x10000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #120] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x20000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #80] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x40000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #72] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x80000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #104] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x100000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #96] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x200000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #128] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x400000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #112] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x800000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #64] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x1000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #40] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x2000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: ldr x30, [sp, #40] // 8-byte Reload
; CHECK-NEON-NEXT: str x8, [sp, #32] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x4000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #56] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x8000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #48] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x10000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #88] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x20000000
; CHECK-NEON-NEXT: mul x26, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x40000000
; CHECK-NEON-NEXT: mul x22, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x80000000
; CHECK-NEON-NEXT: mul x23, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x100000000
; CHECK-NEON-NEXT: mul x24, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x200000000
; CHECK-NEON-NEXT: eor x22, x26, x22
; CHECK-NEON-NEXT: ldr x26, [sp, #32] // 8-byte Reload
; CHECK-NEON-NEXT: mul x25, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x400000000
; CHECK-NEON-NEXT: eor x22, x22, x23
; CHECK-NEON-NEXT: and x23, x1, #0x400000000000000
; CHECK-NEON-NEXT: mul x27, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x800000000
; CHECK-NEON-NEXT: eor x22, x22, x24
; CHECK-NEON-NEXT: ldr x24, [sp, #48] // 8-byte Reload
; CHECK-NEON-NEXT: mul x28, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1000000000
; CHECK-NEON-NEXT: eor x22, x22, x25
; CHECK-NEON-NEXT: ldr x25, [sp, #88] // 8-byte Reload
; CHECK-NEON-NEXT: mul x29, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x2000000000
; CHECK-NEON-NEXT: eor x22, x22, x27
; CHECK-NEON-NEXT: mul x21, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4000000000
; CHECK-NEON-NEXT: mul x7, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8000000000
; CHECK-NEON-NEXT: mul x19, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x10000000000
; CHECK-NEON-NEXT: mul x5, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x20000000000
; CHECK-NEON-NEXT: eor x7, x21, x7
; CHECK-NEON-NEXT: mul x6, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x40000000000
; CHECK-NEON-NEXT: mul x20, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x80000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: mul x23, x0, x23
; CHECK-NEON-NEXT: str x8, [sp, #24] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x100000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #16] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x200000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #8] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x400000000000
; CHECK-NEON-NEXT: mul x4, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x800000000000
; CHECK-NEON-NEXT: mul x17, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1000000000000
; CHECK-NEON-NEXT: mul x18, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x2000000000000
; CHECK-NEON-NEXT: mul x3, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4000000000000
; CHECK-NEON-NEXT: eor x17, x4, x17
; CHECK-NEON-NEXT: mul x2, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8000000000000
; CHECK-NEON-NEXT: eor x17, x17, x18
; CHECK-NEON-NEXT: and x18, x1, #0x4000000000000000
; CHECK-NEON-NEXT: mul x16, x0, x8
; CHECK-NEON-NEXT: eor x8, x9, x10
; CHECK-NEON-NEXT: ldr x9, [sp, #160] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x12, x14
; CHECK-NEON-NEXT: ldr x12, [sp, #80] // 8-byte Reload
; CHECK-NEON-NEXT: eor x17, x17, x3
; CHECK-NEON-NEXT: eor x9, x15, x9
; CHECK-NEON-NEXT: mul x15, x0, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #200] // 8-byte Reload
; CHECK-NEON-NEXT: eor x17, x17, x2
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #152] // 8-byte Reload
; CHECK-NEON-NEXT: mul x18, x0, x18
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #184] // 8-byte Reload
; CHECK-NEON-NEXT: eor x16, x17, x16
; CHECK-NEON-NEXT: eor x9, x9, x11
; CHECK-NEON-NEXT: and x11, x1, #0x20000000000000
; CHECK-NEON-NEXT: ldr x17, [sp, #24] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: mul x14, x0, x11
; CHECK-NEON-NEXT: and x10, x1, #0x40000000000000
; CHECK-NEON-NEXT: eor x11, x8, x9
; CHECK-NEON-NEXT: ldr x8, [sp, #192] // 8-byte Reload
; CHECK-NEON-NEXT: ldr x9, [sp, #144] // 8-byte Reload
; CHECK-NEON-NEXT: mul x13, x0, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #136] // 8-byte Reload
; CHECK-NEON-NEXT: eor x15, x16, x15
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: ldr x9, [sp, #120] // 8-byte Reload
; CHECK-NEON-NEXT: ldr x16, [sp, #16] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #72] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #176] // 8-byte Reload
; CHECK-NEON-NEXT: eor x14, x15, x14
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: and x10, x1, #0x80000000000000
; CHECK-NEON-NEXT: ldr x15, [sp, #8] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x8, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #104] // 8-byte Reload
; CHECK-NEON-NEXT: eor x13, x14, x13
; CHECK-NEON-NEXT: eor x9, x9, x12
; CHECK-NEON-NEXT: mul x12, x0, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #168] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #96] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x11, x8
; CHECK-NEON-NEXT: ldr x11, [sp, #128] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: and x10, x1, #0x100000000000000
; CHECK-NEON-NEXT: eor x9, x9, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #64] // 8-byte Reload
; CHECK-NEON-NEXT: mul x10, x0, x10
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: ldr x9, [sp, #112] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x11
; CHECK-NEON-NEXT: and x11, x1, #0x200000000000000
; CHECK-NEON-NEXT: eor x9, x9, x30
; CHECK-NEON-NEXT: mul x11, x0, x11
; CHECK-NEON-NEXT: eor x9, x9, x26
; CHECK-NEON-NEXT: ldr x26, [sp, #56] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x26
; CHECK-NEON-NEXT: eor x9, x9, x24
; CHECK-NEON-NEXT: and x24, x1, #0x800000000000000
; CHECK-NEON-NEXT: eor x9, x9, x25
; CHECK-NEON-NEXT: mul x24, x0, x24
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: eor x9, x22, x28
; CHECK-NEON-NEXT: and x22, x1, #0x1000000000000000
; CHECK-NEON-NEXT: eor x9, x9, x29
; CHECK-NEON-NEXT: mul x21, x0, x22
; CHECK-NEON-NEXT: and x11, x1, #0x8000000000000000
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: eor x9, x7, x19
; CHECK-NEON-NEXT: and x7, x1, #0x2000000000000000
; CHECK-NEON-NEXT: eor x9, x9, x5
; CHECK-NEON-NEXT: mul x4, x0, x7
; CHECK-NEON-NEXT: eor x10, x10, x23
; CHECK-NEON-NEXT: eor x9, x9, x6
; CHECK-NEON-NEXT: eor x10, x10, x24
; CHECK-NEON-NEXT: eor x9, x9, x20
; CHECK-NEON-NEXT: mul x11, x0, x11
; CHECK-NEON-NEXT: eor x9, x9, x17
; CHECK-NEON-NEXT: eor x10, x10, x21
; CHECK-NEON-NEXT: eor x9, x9, x16
; CHECK-NEON-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x9, x9, x15
; CHECK-NEON-NEXT: eor x10, x10, x4
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: eor x9, x13, x12
; CHECK-NEON-NEXT: eor x10, x10, x18
; CHECK-NEON-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x9, x10, x11
; CHECK-NEON-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x0, x8, x9
; CHECK-NEON-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; CHECK-NEON-NEXT: add sp, sp, #304
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i64:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: fmov d0, x1
; CHECK-AES-NEXT: fmov d1, x0
; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
; CHECK-AES-NEXT: fmov x0, d0
; CHECK-AES-NEXT: ret
%a = call i64 @llvm.clmul.i64(i64 %x, i64 %y)
ret i64 %a
}
define i16 @clmul_i16_zext(i8 %x, i8 %y) {
; CHECK-NEON-LABEL: clmul_i16_zext:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: and w8, w0, #0xff
; CHECK-NEON-NEXT: and w9, w1, #0x2
; CHECK-NEON-NEXT: and w10, w1, #0x1
; CHECK-NEON-NEXT: mul w9, w8, w9
; CHECK-NEON-NEXT: and w11, w1, #0x4
; CHECK-NEON-NEXT: and w12, w1, #0x8
; CHECK-NEON-NEXT: mul w10, w8, w10
; CHECK-NEON-NEXT: and w13, w1, #0x10
; CHECK-NEON-NEXT: and w14, w1, #0x20
; CHECK-NEON-NEXT: mul w11, w8, w11
; CHECK-NEON-NEXT: and w15, w1, #0x40
; CHECK-NEON-NEXT: mul w12, w8, w12
; CHECK-NEON-NEXT: mul w13, w8, w13
; CHECK-NEON-NEXT: eor w9, w10, w9
; CHECK-NEON-NEXT: and w10, w1, #0x80
; CHECK-NEON-NEXT: mul w14, w8, w14
; CHECK-NEON-NEXT: mul w15, w8, w15
; CHECK-NEON-NEXT: eor w11, w11, w12
; CHECK-NEON-NEXT: mul w8, w8, w10
; CHECK-NEON-NEXT: eor w9, w9, w11
; CHECK-NEON-NEXT: eor w12, w13, w14
; CHECK-NEON-NEXT: eor w10, w12, w15
; CHECK-NEON-NEXT: eor w9, w9, w10
; CHECK-NEON-NEXT: eor w0, w9, w8
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i16_zext:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: and w8, w0, #0xff
; CHECK-AES-NEXT: and w9, w1, #0xff
; CHECK-AES-NEXT: fmov s0, w9
; CHECK-AES-NEXT: fmov s1, w8
; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
; CHECK-AES-NEXT: fmov w0, s0
; CHECK-AES-NEXT: ret
%zextx = zext i8 %x to i16
%zexty = zext i8 %y to i16
%a = call i16 @llvm.clmul.i16(i16 %zextx, i16 %zexty)
ret i16 %a
}
define i32 @clmul_i32_zext(i16 %x, i16 %y) {
; CHECK-NEON-LABEL: clmul_i32_zext:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: and w8, w0, #0xffff
; CHECK-NEON-NEXT: and w9, w1, #0x2
; CHECK-NEON-NEXT: and w10, w1, #0x1
; CHECK-NEON-NEXT: mul w9, w8, w9
; CHECK-NEON-NEXT: and w11, w1, #0x4
; CHECK-NEON-NEXT: and w12, w1, #0x8
; CHECK-NEON-NEXT: mul w10, w8, w10
; CHECK-NEON-NEXT: and w13, w1, #0x10
; CHECK-NEON-NEXT: and w14, w1, #0x20
; CHECK-NEON-NEXT: mul w11, w8, w11
; CHECK-NEON-NEXT: and w16, w1, #0x80
; CHECK-NEON-NEXT: and w17, w1, #0x100
; CHECK-NEON-NEXT: mul w12, w8, w12
; CHECK-NEON-NEXT: and w2, w1, #0x800
; CHECK-NEON-NEXT: and w15, w1, #0x40
; CHECK-NEON-NEXT: mul w13, w8, w13
; CHECK-NEON-NEXT: eor w9, w10, w9
; CHECK-NEON-NEXT: and w10, w1, #0x1000
; CHECK-NEON-NEXT: mul w14, w8, w14
; CHECK-NEON-NEXT: and w18, w1, #0x200
; CHECK-NEON-NEXT: and w0, w1, #0x400
; CHECK-NEON-NEXT: mul w16, w8, w16
; CHECK-NEON-NEXT: eor w11, w11, w12
; CHECK-NEON-NEXT: and w12, w1, #0x2000
; CHECK-NEON-NEXT: mul w17, w8, w17
; CHECK-NEON-NEXT: eor w9, w9, w11
; CHECK-NEON-NEXT: and w11, w1, #0x4000
; CHECK-NEON-NEXT: mul w2, w8, w2
; CHECK-NEON-NEXT: eor w13, w13, w14
; CHECK-NEON-NEXT: and w14, w1, #0x8000
; CHECK-NEON-NEXT: mul w10, w8, w10
; CHECK-NEON-NEXT: mul w15, w8, w15
; CHECK-NEON-NEXT: eor w16, w16, w17
; CHECK-NEON-NEXT: mul w18, w8, w18
; CHECK-NEON-NEXT: mul w12, w8, w12
; CHECK-NEON-NEXT: eor w10, w2, w10
; CHECK-NEON-NEXT: mul w0, w8, w0
; CHECK-NEON-NEXT: eor w13, w13, w15
; CHECK-NEON-NEXT: mul w11, w8, w11
; CHECK-NEON-NEXT: eor w9, w9, w13
; CHECK-NEON-NEXT: mul w8, w8, w14
; CHECK-NEON-NEXT: eor w14, w16, w18
; CHECK-NEON-NEXT: eor w10, w10, w12
; CHECK-NEON-NEXT: eor w12, w14, w0
; CHECK-NEON-NEXT: eor w10, w10, w11
; CHECK-NEON-NEXT: eor w9, w9, w12
; CHECK-NEON-NEXT: eor w8, w10, w8
; CHECK-NEON-NEXT: eor w0, w9, w8
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i32_zext:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: and w8, w0, #0xffff
; CHECK-AES-NEXT: and w9, w1, #0xffff
; CHECK-AES-NEXT: fmov s0, w8
; CHECK-AES-NEXT: fmov s1, w9
; CHECK-AES-NEXT: pmull v0.1q, v0.1d, v1.1d
; CHECK-AES-NEXT: fmov w0, s0
; CHECK-AES-NEXT: ret
%zextx = zext i16 %x to i32
%zexty = zext i16 %y to i32
%a = call i32 @llvm.clmul.i32(i32 %zextx, i32 %zexty)
ret i32 %a
}
define i64 @clmul_i64_zext(i32 %x, i32 %y) {
; CHECK-NEON-LABEL: clmul_i64_zext:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: mov w8, w1
; CHECK-NEON-NEXT: and x9, x8, #0x2
; CHECK-NEON-NEXT: and x10, x8, #0x1
; CHECK-NEON-NEXT: and x11, x8, #0x4
; CHECK-NEON-NEXT: umull x9, w9, w0
; CHECK-NEON-NEXT: and x12, x8, #0x8
; CHECK-NEON-NEXT: and x13, x8, #0x10
; CHECK-NEON-NEXT: umull x10, w10, w0
; CHECK-NEON-NEXT: and x14, x8, #0x20
; CHECK-NEON-NEXT: and x15, x8, #0x40
; CHECK-NEON-NEXT: umull x11, w11, w0
; CHECK-NEON-NEXT: and x2, x8, #0x800
; CHECK-NEON-NEXT: and x16, x8, #0x80
; CHECK-NEON-NEXT: umull x12, w12, w0
; CHECK-NEON-NEXT: and x17, x8, #0x100
; CHECK-NEON-NEXT: and x18, x8, #0x200
; CHECK-NEON-NEXT: umull x13, w13, w0
; CHECK-NEON-NEXT: eor x9, x10, x9
; CHECK-NEON-NEXT: and x10, x8, #0x1000
; CHECK-NEON-NEXT: umull x14, w14, w0
; CHECK-NEON-NEXT: and x1, x8, #0x400
; CHECK-NEON-NEXT: umull x15, w15, w0
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: and x12, x8, #0x2000
; CHECK-NEON-NEXT: umull x2, w2, w0
; CHECK-NEON-NEXT: eor x9, x9, x11
; CHECK-NEON-NEXT: and x11, x8, #0x4000
; CHECK-NEON-NEXT: umull x10, w10, w0
; CHECK-NEON-NEXT: eor x13, x13, x14
; CHECK-NEON-NEXT: and x14, x8, #0x8000
; CHECK-NEON-NEXT: umull x16, w16, w0
; CHECK-NEON-NEXT: eor x13, x13, x15
; CHECK-NEON-NEXT: and x15, x8, #0x10000
; CHECK-NEON-NEXT: umull x17, w17, w0
; CHECK-NEON-NEXT: eor x9, x9, x13
; CHECK-NEON-NEXT: and x13, x8, #0x20000
; CHECK-NEON-NEXT: umull x12, w12, w0
; CHECK-NEON-NEXT: eor x10, x2, x10
; CHECK-NEON-NEXT: and x2, x8, #0x400000
; CHECK-NEON-NEXT: umull x18, w18, w0
; CHECK-NEON-NEXT: umull x11, w11, w0
; CHECK-NEON-NEXT: eor x16, x16, x17
; CHECK-NEON-NEXT: and x17, x8, #0x40000
; CHECK-NEON-NEXT: umull x14, w14, w0
; CHECK-NEON-NEXT: eor x10, x10, x12
; CHECK-NEON-NEXT: and x12, x8, #0x800000
; CHECK-NEON-NEXT: umull x1, w1, w0
; CHECK-NEON-NEXT: eor x16, x16, x18
; CHECK-NEON-NEXT: and x18, x8, #0x80000
; CHECK-NEON-NEXT: umull x15, w15, w0
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: and x11, x8, #0x1000000
; CHECK-NEON-NEXT: umull x13, w13, w0
; CHECK-NEON-NEXT: eor x10, x10, x14
; CHECK-NEON-NEXT: and x14, x8, #0x2000000
; CHECK-NEON-NEXT: umull x17, w17, w0
; CHECK-NEON-NEXT: eor x16, x16, x1
; CHECK-NEON-NEXT: and x1, x8, #0x100000
; CHECK-NEON-NEXT: umull x2, w2, w0
; CHECK-NEON-NEXT: eor x9, x9, x16
; CHECK-NEON-NEXT: and x16, x8, #0x200000
; CHECK-NEON-NEXT: umull x12, w12, w0
; CHECK-NEON-NEXT: eor x13, x15, x13
; CHECK-NEON-NEXT: and x15, x8, #0x4000000
; CHECK-NEON-NEXT: umull x18, w18, w0
; CHECK-NEON-NEXT: eor x13, x13, x17
; CHECK-NEON-NEXT: and x17, x8, #0x8000000
; CHECK-NEON-NEXT: umull x11, w11, w0
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: umull x14, w14, w0
; CHECK-NEON-NEXT: eor x12, x2, x12
; CHECK-NEON-NEXT: and x2, x8, #0x20000000
; CHECK-NEON-NEXT: umull x1, w1, w0
; CHECK-NEON-NEXT: eor x13, x13, x18
; CHECK-NEON-NEXT: and x18, x8, #0x10000000
; CHECK-NEON-NEXT: umull x15, w15, w0
; CHECK-NEON-NEXT: eor x11, x12, x11
; CHECK-NEON-NEXT: and x12, x8, #0x40000000
; CHECK-NEON-NEXT: umull x16, w16, w0
; CHECK-NEON-NEXT: eor x11, x11, x14
; CHECK-NEON-NEXT: and x8, x8, #0x80000000
; CHECK-NEON-NEXT: umull x17, w17, w0
; CHECK-NEON-NEXT: eor x13, x13, x1
; CHECK-NEON-NEXT: umull x18, w18, w0
; CHECK-NEON-NEXT: eor x11, x11, x15
; CHECK-NEON-NEXT: umull x2, w2, w0
; CHECK-NEON-NEXT: eor x10, x13, x16
; CHECK-NEON-NEXT: umull x12, w12, w0
; CHECK-NEON-NEXT: eor x11, x11, x17
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: umull x8, w8, w0
; CHECK-NEON-NEXT: eor x10, x11, x18
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: eor x11, x2, x12
; CHECK-NEON-NEXT: eor x8, x11, x8
; CHECK-NEON-NEXT: eor x0, x9, x8
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i64_zext:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: mov w8, w0
; CHECK-AES-NEXT: mov w9, w1
; CHECK-AES-NEXT: fmov d0, x8
; CHECK-AES-NEXT: fmov d1, x9
; CHECK-AES-NEXT: pmull v0.1q, v0.1d, v1.1d
; CHECK-AES-NEXT: fmov x0, d0
; CHECK-AES-NEXT: ret
%zextx = zext i32 %x to i64
%zexty = zext i32 %y to i64
%a = call i64 @llvm.clmul.i64(i64 %zextx, i64 %zexty)
ret i64 %a
}
define i128 @clmul_i128_zext(i64 %x, i64 %y) {
; CHECK-NEON-LABEL: clmul_i128_zext:
; CHECK-NEON: // %bb.0:
; CHECK-NEON-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-NEON-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEON-NEXT: sub sp, sp, #624
; CHECK-NEON-NEXT: .cfi_def_cfa_offset 720
; CHECK-NEON-NEXT: .cfi_offset w19, -8
; CHECK-NEON-NEXT: .cfi_offset w20, -16
; CHECK-NEON-NEXT: .cfi_offset w21, -24
; CHECK-NEON-NEXT: .cfi_offset w22, -32
; CHECK-NEON-NEXT: .cfi_offset w23, -40
; CHECK-NEON-NEXT: .cfi_offset w24, -48
; CHECK-NEON-NEXT: .cfi_offset w25, -56
; CHECK-NEON-NEXT: .cfi_offset w26, -64
; CHECK-NEON-NEXT: .cfi_offset w27, -72
; CHECK-NEON-NEXT: .cfi_offset w28, -80
; CHECK-NEON-NEXT: .cfi_offset w30, -88
; CHECK-NEON-NEXT: .cfi_offset w29, -96
; CHECK-NEON-NEXT: and x8, x1, #0x2
; CHECK-NEON-NEXT: mul x11, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1
; CHECK-NEON-NEXT: mul x12, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4
; CHECK-NEON-NEXT: mul x13, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8
; CHECK-NEON-NEXT: mul x14, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x10
; CHECK-NEON-NEXT: mul x15, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x20
; CHECK-NEON-NEXT: mul x16, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x40
; CHECK-NEON-NEXT: mul x17, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x80
; CHECK-NEON-NEXT: mul x18, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x100
; CHECK-NEON-NEXT: mul x3, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x200
; CHECK-NEON-NEXT: mul x2, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x400
; CHECK-NEON-NEXT: mul x4, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x800
; CHECK-NEON-NEXT: mul x5, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1000
; CHECK-NEON-NEXT: mul x20, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x2000
; CHECK-NEON-NEXT: mul x6, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4000
; CHECK-NEON-NEXT: mul x7, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8000
; CHECK-NEON-NEXT: mul x19, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x10000
; CHECK-NEON-NEXT: mul x21, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x20000
; CHECK-NEON-NEXT: mul x22, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x40000
; CHECK-NEON-NEXT: mul x23, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x80000
; CHECK-NEON-NEXT: mul x24, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x100000
; CHECK-NEON-NEXT: mul x25, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x200000
; CHECK-NEON-NEXT: mul x26, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x400000
; CHECK-NEON-NEXT: mul x27, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x800000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #592] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x1000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #584] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x2000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #616] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x4000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #576] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x8000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #608] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x10000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #600] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x20000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #568] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x40000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #512] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x80000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #536] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x100000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #528] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x200000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #560] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x400000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #520] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x800000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #552] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x1000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #544] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x2000000000
; CHECK-NEON-NEXT: mul x9, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x4000000000
; CHECK-NEON-NEXT: mul x10, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x8000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #464] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x10000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #456] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x20000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x8, x9, [sp, #488] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x8, x1, #0x40000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x10, x8, [sp, #440] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x8, x1, #0x80000000000
; CHECK-NEON-NEXT: mul x9, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x100000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x8, x9, [sp, #472] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x8, x1, #0x200000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #504] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x400000000000
; CHECK-NEON-NEXT: mul x9, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x800000000000
; CHECK-NEON-NEXT: mul x10, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x1000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #416] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x2000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: str x8, [sp, #408] // 8-byte Spill
; CHECK-NEON-NEXT: and x8, x1, #0x4000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x8, x9, [sp, #424] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x8, x1, #0x8000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x10, x8, [sp, #392] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x8, x1, #0x100000000000000
; CHECK-NEON-NEXT: mul x9, x0, x8
; CHECK-NEON-NEXT: and x8, x1, #0x200000000000000
; CHECK-NEON-NEXT: mul x8, x0, x8
; CHECK-NEON-NEXT: stp x8, x9, [sp, #376] // 16-byte Folded Spill
; CHECK-NEON-NEXT: and x9, x1, #0x400000000000000
; CHECK-NEON-NEXT: rbit x8, x1
; CHECK-NEON-NEXT: mul x9, x0, x9
; CHECK-NEON-NEXT: and x10, x8, #0x2
; CHECK-NEON-NEXT: str x9, [sp, #368] // 8-byte Spill
; CHECK-NEON-NEXT: rbit x9, x0
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #360] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x1
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #352] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x4
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #344] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x8
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #336] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x10
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #328] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x20
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #320] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x40
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #312] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x80
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #304] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x100
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #296] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x200
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #288] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x400
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #280] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x800
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #272] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x1000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #256] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x2000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #248] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x4000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #264] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x8000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #240] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x10000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #232] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x20000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #200] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x40000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #224] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x80000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #192] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x100000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #216] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x200000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #208] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x400000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #184] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x800000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #136] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x1000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #168] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x2000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #160] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x4000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #176] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x8000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #152] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x10000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #144] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x20000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #128] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x40000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #120] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x80000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #112] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x100000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #104] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x200000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #96] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x400000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #88] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x800000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #80] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x1000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #72] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x2000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #64] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x4000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #56] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x8000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #48] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x10000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #40] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x20000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #32] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x40000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #24] // 8-byte Spill
; CHECK-NEON-NEXT: and x10, x8, #0x80000000000
; CHECK-NEON-NEXT: mul x10, x9, x10
; CHECK-NEON-NEXT: str x10, [sp, #16] // 8-byte Spill
; CHECK-NEON-NEXT: eor x10, x12, x11
; CHECK-NEON-NEXT: and x12, x8, #0x100000000000
; CHECK-NEON-NEXT: mul x12, x9, x12
; CHECK-NEON-NEXT: eor x11, x13, x14
; CHECK-NEON-NEXT: and x13, x8, #0x200000000000
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: eor x11, x15, x16
; CHECK-NEON-NEXT: ldr x14, [sp, #608] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x17
; CHECK-NEON-NEXT: mul x30, x9, x13
; CHECK-NEON-NEXT: and x13, x8, #0x400000000000
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: mul x29, x9, x13
; CHECK-NEON-NEXT: and x13, x8, #0x800000000000
; CHECK-NEON-NEXT: str x12, [sp, #8] // 8-byte Spill
; CHECK-NEON-NEXT: eor x12, x18, x3
; CHECK-NEON-NEXT: eor x11, x12, x2
; CHECK-NEON-NEXT: eor x12, x5, x20
; CHECK-NEON-NEXT: mul x28, x9, x13
; CHECK-NEON-NEXT: eor x11, x11, x4
; CHECK-NEON-NEXT: ldr x13, [sp, #592] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: eor x11, x12, x6
; CHECK-NEON-NEXT: eor x12, x21, x22
; CHECK-NEON-NEXT: eor x11, x11, x7
; CHECK-NEON-NEXT: eor x12, x12, x23
; CHECK-NEON-NEXT: eor x13, x27, x13
; CHECK-NEON-NEXT: eor x11, x11, x19
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: eor x11, x12, x24
; CHECK-NEON-NEXT: and x12, x8, #0x1000000000000
; CHECK-NEON-NEXT: eor x11, x11, x25
; CHECK-NEON-NEXT: mul x27, x9, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #584] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x26
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #616] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x13, x12
; CHECK-NEON-NEXT: and x13, x8, #0x2000000000000
; CHECK-NEON-NEXT: eor x11, x12, x11
; CHECK-NEON-NEXT: ldr x12, [sp, #576] // 8-byte Reload
; CHECK-NEON-NEXT: mul x25, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #512] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #568] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x14
; CHECK-NEON-NEXT: ldr x14, [sp, #536] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x13
; CHECK-NEON-NEXT: and x13, x8, #0x4000000000000
; CHECK-NEON-NEXT: mul x24, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #600] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x14
; CHECK-NEON-NEXT: ldr x14, [sp, #552] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #528] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #560] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x13
; CHECK-NEON-NEXT: and x13, x8, #0x8000000000000
; CHECK-NEON-NEXT: eor x11, x12, x11
; CHECK-NEON-NEXT: ldr x12, [sp, #520] // 8-byte Reload
; CHECK-NEON-NEXT: mul x23, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #440] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #496] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x14
; CHECK-NEON-NEXT: ldr x14, [sp, #464] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x13
; CHECK-NEON-NEXT: and x13, x8, #0x10000000000000
; CHECK-NEON-NEXT: mul x21, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #544] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x14
; CHECK-NEON-NEXT: ldr x14, [sp, #256] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #456] // 8-byte Reload
; CHECK-NEON-NEXT: eor x26, x10, x11
; CHECK-NEON-NEXT: ldr x10, [sp, #488] // 8-byte Reload
; CHECK-NEON-NEXT: ldr x11, [sp, #448] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x13
; CHECK-NEON-NEXT: and x13, x8, #0x20000000000000
; CHECK-NEON-NEXT: eor x10, x12, x10
; CHECK-NEON-NEXT: mul x20, x9, x13
; CHECK-NEON-NEXT: ldr x12, [sp, #392] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #432] // 8-byte Reload
; CHECK-NEON-NEXT: ldr x13, [sp, #480] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: and x12, x8, #0x40000000000000
; CHECK-NEON-NEXT: eor x10, x10, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #416] // 8-byte Reload
; CHECK-NEON-NEXT: mul x7, x9, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #472] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #504] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x10, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #408] // 8-byte Reload
; CHECK-NEON-NEXT: eor x22, x10, x13
; CHECK-NEON-NEXT: ldr x10, [sp, #424] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: and x12, x8, #0x80000000000000
; CHECK-NEON-NEXT: eor x10, x11, x10
; CHECK-NEON-NEXT: ldr x11, [sp, #400] // 8-byte Reload
; CHECK-NEON-NEXT: mul x5, x9, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #368] // 8-byte Reload
; CHECK-NEON-NEXT: eor x19, x10, x11
; CHECK-NEON-NEXT: ldp x11, x10, [sp, #376] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: and x11, x8, #0x100000000000000
; CHECK-NEON-NEXT: eor x6, x10, x12
; CHECK-NEON-NEXT: ldp x12, x10, [sp, #352] // 16-byte Folded Reload
; CHECK-NEON-NEXT: mul x4, x9, x11
; CHECK-NEON-NEXT: eor x10, x12, x10
; CHECK-NEON-NEXT: ldp x12, x11, [sp, #336] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: ldp x13, x12, [sp, #320] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x10, x10, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #312] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x13
; CHECK-NEON-NEXT: and x13, x8, #0x200000000000000
; CHECK-NEON-NEXT: eor x11, x12, x11
; CHECK-NEON-NEXT: mul x3, x9, x13
; CHECK-NEON-NEXT: and x13, x8, #0x400000000000000
; CHECK-NEON-NEXT: eor x12, x10, x11
; CHECK-NEON-NEXT: ldp x11, x10, [sp, #296] // 16-byte Folded Reload
; CHECK-NEON-NEXT: mul x2, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #280] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x10, x11
; CHECK-NEON-NEXT: ldr x10, [sp, #288] // 8-byte Reload
; CHECK-NEON-NEXT: eor x3, x4, x3
; CHECK-NEON-NEXT: and x4, x1, #0x2000000000000000
; CHECK-NEON-NEXT: eor x11, x11, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #272] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #248] // 8-byte Reload
; CHECK-NEON-NEXT: eor x2, x3, x2
; CHECK-NEON-NEXT: eor x10, x10, x14
; CHECK-NEON-NEXT: eor x11, x12, x11
; CHECK-NEON-NEXT: ldr x12, [sp, #264] // 8-byte Reload
; CHECK-NEON-NEXT: eor x10, x10, x13
; CHECK-NEON-NEXT: and x13, x8, #0x800000000000000
; CHECK-NEON-NEXT: mul x3, x0, x4
; CHECK-NEON-NEXT: eor x12, x10, x12
; CHECK-NEON-NEXT: ldr x10, [sp, #240] // 8-byte Reload
; CHECK-NEON-NEXT: mul x18, x9, x13
; CHECK-NEON-NEXT: ldr x13, [sp, #200] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x12, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #232] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: ldp x14, x12, [sp, #216] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x13, x10, x13
; CHECK-NEON-NEXT: and x10, x8, #0x1000000000000000
; CHECK-NEON-NEXT: mul x17, x9, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #192] // 8-byte Reload
; CHECK-NEON-NEXT: eor x12, x13, x12
; CHECK-NEON-NEXT: ldr x13, [sp, #136] // 8-byte Reload
; CHECK-NEON-NEXT: eor x18, x2, x18
; CHECK-NEON-NEXT: eor x12, x12, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #184] // 8-byte Reload
; CHECK-NEON-NEXT: and x2, x1, #0x4000000000000000
; CHECK-NEON-NEXT: eor x12, x12, x14
; CHECK-NEON-NEXT: ldr x14, [sp, #168] // 8-byte Reload
; CHECK-NEON-NEXT: eor x13, x10, x13
; CHECK-NEON-NEXT: and x10, x8, #0x2000000000000000
; CHECK-NEON-NEXT: and x8, x8, #0x4000000000000000
; CHECK-NEON-NEXT: mul x16, x9, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #208] // 8-byte Reload
; CHECK-NEON-NEXT: eor x13, x13, x14
; CHECK-NEON-NEXT: eor x17, x18, x17
; CHECK-NEON-NEXT: eor x12, x12, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #160] // 8-byte Reload
; CHECK-NEON-NEXT: mul x15, x9, x8
; CHECK-NEON-NEXT: ldr x8, [sp, #152] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: eor x13, x13, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #176] // 8-byte Reload
; CHECK-NEON-NEXT: mul x18, x0, x2
; CHECK-NEON-NEXT: eor x16, x17, x16
; CHECK-NEON-NEXT: and x17, x1, #0x8000000000000000
; CHECK-NEON-NEXT: eor x12, x13, x10
; CHECK-NEON-NEXT: and x13, x1, #0x10000000000000
; CHECK-NEON-NEXT: eor x9, x12, x8
; CHECK-NEON-NEXT: ldp x10, x8, [sp, #120] // 16-byte Folded Reload
; CHECK-NEON-NEXT: mul x14, x0, x13
; CHECK-NEON-NEXT: eor x12, x8, x10
; CHECK-NEON-NEXT: ldr x8, [sp, #144] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x8
; CHECK-NEON-NEXT: ldp x8, x10, [sp, #104] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x9, x11, x9
; CHECK-NEON-NEXT: eor x14, x19, x14
; CHECK-NEON-NEXT: eor x12, x12, x10
; CHECK-NEON-NEXT: eor x11, x12, x8
; CHECK-NEON-NEXT: ldr x8, [sp, #96] // 8-byte Reload
; CHECK-NEON-NEXT: and x12, x1, #0x20000000000000
; CHECK-NEON-NEXT: mul x13, x0, x12
; CHECK-NEON-NEXT: eor x11, x11, x8
; CHECK-NEON-NEXT: ldp x10, x8, [sp, #56] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: ldp x12, x10, [sp, #80] // 16-byte Folded Reload
; CHECK-NEON-NEXT: eor x11, x11, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #48] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x12
; CHECK-NEON-NEXT: ldr x12, [sp, #40] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: and x10, x1, #0x40000000000000
; CHECK-NEON-NEXT: eor x8, x8, x12
; CHECK-NEON-NEXT: mul x12, x0, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #72] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x10
; CHECK-NEON-NEXT: ldr x10, [sp, #32] // 8-byte Reload
; CHECK-NEON-NEXT: eor x9, x9, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #24] // 8-byte Reload
; CHECK-NEON-NEXT: eor x8, x8, x10
; CHECK-NEON-NEXT: and x10, x1, #0x80000000000000
; CHECK-NEON-NEXT: eor x8, x8, x11
; CHECK-NEON-NEXT: ldr x11, [sp, #16] // 8-byte Reload
; CHECK-NEON-NEXT: mul x10, x0, x10
; CHECK-NEON-NEXT: eor x8, x8, x11
; CHECK-NEON-NEXT: eor x11, x29, x28
; CHECK-NEON-NEXT: ldr x29, [sp, #8] // 8-byte Reload
; CHECK-NEON-NEXT: eor x11, x11, x27
; CHECK-NEON-NEXT: and x28, x1, #0x800000000000000
; CHECK-NEON-NEXT: eor x8, x8, x29
; CHECK-NEON-NEXT: eor x11, x11, x25
; CHECK-NEON-NEXT: mul x27, x0, x28
; CHECK-NEON-NEXT: eor x8, x8, x30
; CHECK-NEON-NEXT: and x25, x1, #0x1000000000000000
; CHECK-NEON-NEXT: eor x8, x9, x8
; CHECK-NEON-NEXT: eor x9, x11, x24
; CHECK-NEON-NEXT: mul x11, x0, x25
; CHECK-NEON-NEXT: eor x9, x9, x23
; CHECK-NEON-NEXT: eor x9, x9, x21
; CHECK-NEON-NEXT: eor x9, x9, x20
; CHECK-NEON-NEXT: eor x9, x9, x7
; CHECK-NEON-NEXT: eor x9, x9, x5
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: eor x9, x16, x15
; CHECK-NEON-NEXT: mul x15, x0, x17
; CHECK-NEON-NEXT: eor x16, x6, x27
; CHECK-NEON-NEXT: eor x8, x8, x9
; CHECK-NEON-NEXT: eor x9, x14, x13
; CHECK-NEON-NEXT: eor x11, x16, x11
; CHECK-NEON-NEXT: rbit x8, x8
; CHECK-NEON-NEXT: eor x9, x9, x12
; CHECK-NEON-NEXT: eor x11, x11, x3
; CHECK-NEON-NEXT: eor x13, x26, x22
; CHECK-NEON-NEXT: eor x9, x9, x10
; CHECK-NEON-NEXT: eor x10, x11, x18
; CHECK-NEON-NEXT: lsr x1, x8, #1
; CHECK-NEON-NEXT: eor x8, x13, x9
; CHECK-NEON-NEXT: eor x9, x10, x15
; CHECK-NEON-NEXT: eor x0, x8, x9
; CHECK-NEON-NEXT: add sp, sp, #624
; CHECK-NEON-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-NEON-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; CHECK-NEON-NEXT: ret
;
; CHECK-AES-LABEL: clmul_i128_zext:
; CHECK-AES: // %bb.0:
; CHECK-AES-NEXT: rbit x8, x1
; CHECK-AES-NEXT: rbit x9, x0
; CHECK-AES-NEXT: fmov d2, x0
; CHECK-AES-NEXT: fmov d0, x8
; CHECK-AES-NEXT: fmov d1, x9
; CHECK-AES-NEXT: pmull v0.1q, v1.1d, v0.1d
; CHECK-AES-NEXT: fmov d1, x1
; CHECK-AES-NEXT: pmull v1.1q, v2.1d, v1.1d
; CHECK-AES-NEXT: fmov x8, d0
; CHECK-AES-NEXT: rbit x8, x8
; CHECK-AES-NEXT: fmov x0, d1
; CHECK-AES-NEXT: lsr x1, x8, #1
; CHECK-AES-NEXT: ret
%zextx = zext i64 %x to i128
%zexty = zext i64 %y to i128
%a = call i128 @llvm.clmul.i128(i128 %zextx, i128 %zexty)
ret i128 %a
}